[
  {
    "path": ".clang-format",
    "content": "Language: Cpp\nBasedOnStyle: GNU\nSortIncludes:    false\nAllowShortIfStatementsOnASingleLine: false\nBreakBeforeBraces: Linux\nTabWidth:        4\nIndentWidth:     4\nColumnLimit:     89\nSpaceBeforeParens:\n    ControlStatements\nSpacesInCStyleCastParentheses: false\nSpaceAfterCStyleCast: true\nIndentCaseLabels: true\nAlignAfterOpenBracket: DontAlign\nBinPackArguments: true\nBinPackParameters: true\nAlwaysBreakAfterReturnType: AllDefinitions\n\nStatementMacros: [\"PyObject_HEAD\", \"Py_BEGIN_ALLOW_THREADS\", \"Py_END_ALLOW_THREADS\"]\nAlignConsecutiveMacros: true\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "## Description\n\nThanks for contributing to tskit! :heart:\nA guide to the PR process is [here](https://tskit.dev/tskit/docs/stable/development.html#git-workflow)\nPlease replace this text with a summary of the change and which issue is fixed, if any. Please also include relevant motivation and context.\n\nFixes #(issue) <- Putting the issue number here will auto-close the issue when this PR is merged \n\n# PR Checklist:\n\n- [ ] Tests that fully cover new/changed functionality.\n- [ ] Documentation including tutorial content if appropriate.\n- [ ] Changelogs, if there are API changes.\n"
  },
  {
    "path": ".github/workflows/docs.yml",
    "content": "name: Build Docs\non:\n  pull_request:\n  merge_group:\n  push:\n    branches: [main]\n    tags:\n      - '*'\n\nenv:\n  FORCE_COLOR: 1\n\njobs:\n  Docs:\n    uses: tskit-dev/.github/.github/workflows/docs.yml@v15\n    with:\n      pyproject-directory: python\n      additional-apt-packages: doxygen\n      pre-build-command: cd docs/doxygen && doxygen\n"
  },
  {
    "path": ".github/workflows/lint.yml",
    "content": "name: Lint\n\non:\n  pull_request:\n  merge_group:\n\njobs:\n  Lint:\n    uses: tskit-dev/.github/.github/workflows/lint.yml@v15\n    with:\n      pyproject-directory: python\n"
  },
  {
    "path": ".github/workflows/release-c.yml",
    "content": "name: Publish C API release\n\non:\n  push:\n    branches: [main, test]\n    tags: ['*']\n\nenv:\n  FORCE_COLOR: 1\n\njobs:\n  build:\n    runs-on: ubuntu-24.04\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v6.0.2\n      - name: Install uv\n        uses: astral-sh/setup-uv@v6\n        with:\n          version: \"0.10.0\"\n      - name: Install system deps\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y ninja-build libcunit1-dev\n      - name: Install meson\n        run: uv tool install meson==1.10.1\n      - name: Build tarball\n        run: |\n          git rm -rf c/tests/meson-subproject\n          git config --global user.email \"CI@CI.com\"\n          git config --global user.name \"Mr Robot\"\n          git add -A\n          git commit -m \"dummy commit to make meson not add in the symlinked directory\"\n          meson c build-gcc\n          meson dist -C build-gcc\n      - name: C Release\n        uses: softprops/action-gh-release@v2.5.0\n        if: startsWith(github.ref, 'refs/tags/') && contains(github.event.ref, 'C_')\n        with:\n          draft: True\n          files: build-gcc/meson-dist/*\n"
  },
  {
    "path": ".github/workflows/tests.yml",
    "content": "name: Tests\n\non:\n  pull_request:\n  merge_group:\n  push:\n    branches: [main, test]\n\nenv:\n  FORCE_COLOR: 1\n\njobs:\n\n  packaging:\n    name: Python packaging\n    uses: tskit-dev/.github/.github/workflows/python-packaging.yml@v15\n    with:\n      pyproject-directory: python\n      cli-test-cmd: tskit --help\n\n  test-c:\n    name: C tests\n    uses: tskit-dev/.github/.github/workflows/c-tests.yml@v15\n    with:\n      library-directory: c\n    secrets: inherit\n\n  test-python-c:\n    name: Python-C tests\n    uses: tskit-dev/.github/.github/workflows/python-c-tests.yml@v15\n    with:\n      tests: python/tests/test_python_c.py python/tests/test_dict_encoding.py\n      pyproject-directory: python\n    secrets: inherit\n\n\n  test:\n    name: Python\n    uses: tskit-dev/.github/.github/workflows/python-tests.yml@v15\n    with:\n      os: ${{ matrix.os }}\n      python-version: ${{ matrix.python }}\n      pyproject-directory: python\n      coverage-directory: python/tskit\n    secrets: inherit\n    strategy:\n      matrix:\n        python: [ 3.11, 3.13 ]\n        os:  [ macos-latest, ubuntu-24.04, windows-latest ]\n\n\n  msys2:\n    runs-on: windows-latest\n    strategy:\n      matrix:\n        include:\n        - { sys: mingw32, env: i686 }\n        - { sys: mingw64, env: x86_64 }\n    name: Windows (${{ matrix.sys }}, ${{ matrix.env }})\n    defaults:\n      run:\n        shell: msys2 {0}\n    steps:\n    - name: Cancel Previous Runs\n      uses: styfle/cancel-workflow-action@0.13.0\n      with:\n        access_token: ${{ github.token }}\n\n    - name: 'Checkout'\n      uses: actions/checkout@v6.0.2\n\n    - name: Setup MSYS2 ${{matrix.sys}}\n      uses: msys2/setup-msys2@v2.27.0\n      with:\n        msystem: ${{matrix.sys}}\n        update: true\n        install: >-\n          git\n          mingw-w64-${{matrix.env}}-toolchain\n          mingw-w64-${{matrix.env}}-ninja\n          mingw-w64-${{matrix.env}}-meson\n          mingw-w64-${{matrix.env}}-cunit\n\n    - name: Build\n      working-directory: c\n      run: |\n        meson build -Dbuild_examples=false\n        ninja -C build\n\n    - name: Run tests\n      working-directory: c\n      run: |\n        ninja -C build test\n\n  bespoke-python-test:\n    name: Bespoke Python tests\n    runs-on: ubuntu-24.04\n\n    steps:\n      - name: Cancel Previous Runs\n        uses: styfle/cancel-workflow-action@0.13.0\n        with:\n          access_token: ${{ github.token }}\n\n      - name: Checkout\n        uses: actions/checkout@v6.0.2\n        with:\n          submodules: true\n\n      - name: Install uv and set the python version\n        uses: astral-sh/setup-uv@v6\n        with:\n          python-version: 3.11\n          version: \"0.10.0\"\n\n      - name: Install Python dependencies\n        working-directory: python\n        run: uv sync --locked --group test --no-default-groups\n\n      - name: Minidom test\n        working-directory: python\n        # Importing either IPython or pytest causes import of xml.dom.minidom\n        # So to actually test that tskit imports it, we need a minimal test\n        run: |\n          uv run --locked --group test --no-default-groups \\\n            python -c \"import tskit;tskit.Tree.generate_star(5).tree_sequence.draw_svg(path='test.svg')\"\n\n      - name: Run JIT code coverage\n        run: |\n          NUMBA_DISABLE_JIT=1 uv run --locked --project=python --no-default-groups\\\n            pytest --cov=python/tskit --cov-report=xml --cov-branch \\\n            python/tests/test_jit.py\n\n      - name: Upload coverage to Codecov\n        uses: codecov/codecov-action@v5.5.2\n        with:\n          token: ${{ secrets.CODECOV_TOKEN }}\n          fail_ci_if_error: true\n          files: coverage.xml\n          disable_search: true\n          verbose: true\n          flags: python-tests-no-jit\n\n      - name: Build example LWT interface code and test\n        working-directory: python/lwt_interface/\n        run: |\n          make allchecks\n          uv run --project=../ --group=test pytest -vs\n\n      - name: Build cython example LWT interface code and run\n        working-directory: python/lwt_interface/cython_example\n        run: make\n\n\n  bespoke-c-test:\n    name: Bespoke C tests\n    runs-on: ubuntu-24.04\n\n    steps:\n      - name: Cancel Previous Runs\n        uses: styfle/cancel-workflow-action@0.13.0\n        with:\n          access_token: ${{ github.token }}\n\n      - name: Checkout\n        uses: actions/checkout@v6.0.2\n        with:\n          submodules: true\n\n      - name: Install system deps\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y libcunit1-dev ninja-build clang\n\n      - name: Install uv\n        uses: astral-sh/setup-uv@v6\n        with:\n          version: \"0.10.0\"\n\n      - name: Install uv deps\n        run: |\n          uv tool install meson==1.10.1\n\n      - name: Configure code\n        run: CFLAGS=-D_TSK_BIG_TABLES CPPFLAGS=-D_TSK_BIG_TABLES meson setup build-bt c/\n\n      - name: Compile\n        run: ninja -C build-bt\n\n      - name: Run tests\n        run: ninja -C build-bt test\n\n\n      - name: Test building with meson subproject\n        run: |\n          meson build-subproject c/tests/meson-subproject\n          ninja -C build-subproject\n          ./build-subproject/example\n\n      - name: Install shared library and hand-compile program.\n        run: |\n          meson build-install c --prefix=/usr\n          sudo ninja -C build-install install\n          clang c/examples/api_structure.c -I c/subprojects/kastore -o api_structure -ltskit\n          ./api_structure\n\n      - name: Run example make file\n        run: |\n          make -C c/examples\n\n"
  },
  {
    "path": ".github/workflows/wheels.yml",
    "content": "name: Publish Python release\n\non:\n  push:\n    branches: [test-publish]\n  release:\n    types: [published]\n\njobs:\n  build-wheels:\n    if: \"!startsWith(github.ref, 'refs/tags/C_')\"\n    uses: tskit-dev/.github/.github/workflows/build-wheels.yml@v15\n    with:\n      pyproject-directory: python\n\n  publish:\n    runs-on: ubuntu-24.04\n    environment: release\n    needs: [ 'build-wheels' ]\n    permissions:\n      id-token: write\n    steps:\n      - name: Download artifacts\n        uses: actions/download-artifact@v7.0.0\n        with:\n          pattern: build-*\n          path: dist\n          merge-multiple: true\n\n      - name: Show artifacts\n        run: ls -lah dist\n\n      - name: Publish distribution to Test PyPI\n        if: github.event_name == 'push' && github.ref_name == 'test-publish'\n        uses: pypa/gh-action-pypi-publish@v1.13.0\n        with:\n          repository-url: https://test.pypi.org/legacy/\n          verbose: true\n\n      - name: Publish distribution to Production PyPI\n        if: github.event_name == 'release'\n        uses: pypa/gh-action-pypi-publish@v1.13.0\n"
  },
  {
    "path": ".gitignore",
    "content": "build-gcc\n.DS_Store\npython/benchmark/*.trees\npython/benchmark/*.json\npython/benchmark/*.html\n.venv\n.env\n.vscode\nenv\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing\n\nTskit is a free and open-source project that welcomes contributions from everyone.\nThe [Developer documentation](https://tskit.dev/tskit/docs/latest/development.html)\nwill help you get started. \n\nWe have an active slack group where tskit and associated projects are discussed.\nIf you wish to join email [admin@tskit.dev](mailto:admin@tskit.dev).\n\nWe ask all users to follow our [code of conduct](https://github.com/tskit-dev/.github/blob/main/CODE_OF_CONDUCT.md)\nwhen interacting with the project.\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2018-2019 Tskit Developers\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# tskit  <img align=\"right\" width=\"145\" height=\"90\" src=\"https://github.com/tskit-dev/administrative/blob/main/tskit_logo.svg\">\n\n[![License](https://img.shields.io/github/license/tskit-dev/tskit)](https://github.com/tskit-dev/tskit/blob/main/LICENSE)\n[![Contributors](https://img.shields.io/github/contributors/tskit-dev/tskit)](https://github.com/tskit-dev/tskit/graphs/contributors)\n[![Commit activity](https://img.shields.io/github/commit-activity/m/tskit-dev/tskit)](https://github.com/tskit-dev/tskit/commits/main)\n[![Coverage](https://codecov.io/gh/tskit-dev/tskit/branch/main/graph/badge.svg)](https://codecov.io/gh/tskit-dev/tskit)\n![OS](https://img.shields.io/badge/OS-linux%20%7C%20OSX%20%7C%20win--64-steelblue)\n\n[Documentation (stable)](https://tskit.dev/tskit/docs/stable/) • [Documentation (latest)](https://tskit.dev/tskit/docs/latest/)\n\n[![Docs Build](https://github.com/tskit-dev/tskit/actions/workflows/docs.yml/badge.svg)](https://github.com/tskit-dev/tskit/actions/workflows/docs.yml)[![Tests](https://github.com/tskit-dev/tskit/actions/workflows/tests.yml/badge.svg)](https://github.com/tskit-dev/tskit/actions/workflows/tests.yml)\n\n\nThe succinct tree sequence (`tskit`) format is an efficient way of representing\nthe genetic history - sometimes known as an\n[Ancestral Recombination Graph or ARG](https://doi.org/10.1093/genetics/iyae100) -\nof a set of related DNA sequences. `Tskit` is used\nby a number of software libraries and programs (such as\n[msprime](https://github.com/tskit-dev/msprime),\n[SLiM](https://github.com/MesserLab/SLiM),\n[fwdpp](http://molpopgen.github.io/fwdpp/), and\n[tsinfer](https://tskit.dev/tsinfer/docs/stable/)) that either simulate or infer\nthe evolutionary ancestry of genetic sequences.\n\nThe `tskit` library provides the underlying functionality used to load, examine, and\nmanipulate ARGs in the tree sequence format, including efficient access to the\nsequence of correlated trees along a genome and general methods to calculate\ngenetic statistics. `Tskit` often forms part of an installation of other\nsoftware packages such as those listed above. Please see the\n[documentation](https://tskit.dev/tskit/docs/stable/) for further details, which\nincludes\n[installation instructions](https://tskit.dev/tskit/docs/stable/installation.html).\n\nTo get started with tskit, tutorials and other content are at http://tskit.dev. For help\nand support from the community you can use\n[discussions](https://github.com/tskit-dev/tskit/discussions) here on github, or raise an\nissue for a specific bug or feature request.\n\nWe warmly welcome contributions from the community. Raise an issue if you have an\nidea you'd like to work on, or submit a PR for comments and help.\n\nThe base `tskit` library provides both a [Python](https://tskit.dev/tskit/docs/stable/python-api.html)\nand [C](https://tskit.dev/tskit/docs/stable/c-api.html) API. A Rust API is provided in the\n[tskit-rust](https://github.com/tskit-dev/tskit-rust) repository.\n\n\n#### Python API\n[![PyPI version](https://img.shields.io/pypi/v/tskit.svg)](https://pypi.org/project/tskit/)\n[![Supported Python Versions](https://img.shields.io/pypi/pyversions/tskit.svg)](https://pypi.org/project/tskit/)\n[![Wheel](https://img.shields.io/pypi/wheel/tskit)](https://pypi.org/project/tskit/)\n[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)\n\n\nMost users of `tskit` will use the python API as it provides a convenient, high-level API\nto access, analyse and create tree sequences. Full documentation is\n[here](https://tskit.dev/tskit/docs/stable/python-api.html).   \n\n#### C API\n[![C99](https://img.shields.io/badge/Language-C99-steelblue.svg)](https://en.wikipedia.org/wiki/C99)\n\n\nThe `tskit` C API provides comprehensive, low-level methods for manipulating and\nprocessing tree-sequences. Written to the C99 standard and fully thread-safe, it can be\nused with either C or C++. Full documentation is\n[here](https://tskit.dev/tskit/docs/stable/c-api.html).\n\n## Installation\n\n```bash\npython -m pip install tskit\n# or\nconda install -c conda-forge tskit\n```\n"
  },
  {
    "path": "c/.gitignore",
    "content": "build\n.*.swp\n.*.swo\n"
  },
  {
    "path": "c/CHANGELOG.rst",
    "content": "--------------------\n[1.3.2] - 2026-XX-XX\n--------------------\n\nIn development\n\n- Add ``tsk_json_struct_metadata_get_blob`` function\n  (:user:`benjeffery`, :pr:`3306`)\n\n--------------------\n[1.3.1] - 2026-03-06\n--------------------\n\nMaintenance release.\n\n- Update to kastore 2.1.2\n- Fix doc typo for file uuid (:pr:`3399`)\n- Migrate linting to clang-format 21.1.8 (:pr:`3389`)\n- Support compile time setting of debug stream (:pr:`3364`)\n\n--------------------\n[1.3.0] - 2025-11-27\n--------------------\n\n**Breaking changes**\n\n- ``trees.c`` now depends on ``genotypes.c`` (via ``tskit/genotypes.h``) and must\n  be built and linked together with it.\n  (:user:`benjeffery`, :pr:`3324`)\n\n\n**Features**\n\n- ``tsk_variant_init`` and associated variant decoding methods now\n  fully support ``TSK_ISOLATED_NOT_MISSING`` not being set for internal nodes.\n  (:user:`benjeffery`, :pr:`3313`)\n\n- Add ``tsk_treeseq_decode_alignments`` to decode full-length reference-based\n  sequence alignments for specified nodes over a genomic interval, respecting\n  ``TSK_ISOLATED_NOT_MISSING`` semantics.\n  (:user:`benjeffery`, :pr:`3324`, :issue:`3319`)\n\n\n--------------------\n[1.2.0] - 2025-09-24\n--------------------\n\n**Breaking changes**\n\n- Remove ``tsk_diff_iter_t`` and associated functions.\n  (:user:`benjeffery`, :pr:`3221`, :issue:`2797`).\n\n- ``tsk_treeseq_init`` now requires that mutation parents in the table collection\n  are correct and consistent with the topology of the tree at each mutation site.\n  Returns ``TSK_ERR_BAD_MUTATION_PARENT`` if this is not the case, or\n  ``TSK_ERR_MUTATION_PARENT_AFTER_CHILD`` if the mutations are not in an order\n  compatible with the correct mutation parent.\n  (:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).\n\n**Features**\n\n- Add ``TSK_TS_INIT_COMPUTE_MUTATION_PARENTS`` to ``tsk_treeseq_init``\n  to compute mutation parents from the tree sequence topology.\n  Note that the mutations must be in the correct order.\n  (:user:`benjeffery`, :issue:`2757`, :pr:`3212`).\n\n- Add ``TSK_CHECK_MUTATION_PARENTS`` option to ``tsk_table_collection_check_integrity``\n  to check that mutation parents are consistent with the tree sequence topology.\n  This option implies ``TSK_CHECK_TREES``.\n  (:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).\n\n- Add the ``TSK_NO_CHECK_INTEGRITY`` option to ``tsk_table_collection_compute_mutation_parents``\n  to skip the integrity checks that are normally run when computing mutation parents.\n  This is useful for speeding up the computation of mutation parents when the\n  tree sequence is certainly known to be valid.\n  (:user:`benjeffery`, :pr:`3212`).\n\n- Mutations returned by ``tsk_treeseq_get_mutation`` now include pre-computed\n  ``inherited_state`` and ``inherited_state_length`` fields. The inherited state\n  is computed during tree sequence initialization and represents the state that\n  existed at the site before each mutation occurred (either the ancestral state\n  if the mutation is the root mutation or the derived state of the parent mutation).\n  Note that this breaks ABI compatibility due to the addition of these fields\n  to the ``tsk_mutation_t`` struct.\n  (:user:`benjeffery`, :pr:`3277`, :issue:`2631`).\n\n\n\n--------------------\n[1.1.4] - 2025-03-31\n--------------------\n\n**Changes**\n\n- Added the TSK_TRACE_ERRORS macro to enable tracing of errors in the C library.\n  This is useful for debugging as errors will print to stderr when set.\n  (:user:`jeromekelleher`, :pr:`3095`).\n\n--------------------\n[1.1.3] - 2024-10-16\n--------------------\n\n**Features**\n\n- Add the `tsk_treeseq_extend_haplotypes` method that can compress a tree sequence\n  by extending edges into adjacent trees and thus creating unary nodes in those\n  trees (:user:`petrelharp`, :user:`hfr1tze`, :user:`avabamf`, :pr:`2651`, :pr:`2938`).\n\n--------------------\n[1.1.2] - 2023-05-17\n--------------------\n\n**Performance improvements**\n\n- tsk_tree_seek is now much faster at seeking to arbitrary points along\n  the sequence from the null tree (:user:`molpopgen`, :pr:`2661`).\n\n**Features**\n\n- The struct ``tsk_treeseq_t`` now has the variables ``min_time`` and ``max_time``,\n  which are the minimum and maximum among the node times and mutation times,\n  respectively. ``min_time`` and ``max_time`` can be accessed using the functions\n  ``tsk_treeseq_get_min_time`` and ``tsk_treeseq_get_max_time``, respectively.\n  (:user:`szhan`, :pr:`2612`, :issue:`2271`)\n\n- Add the `TSK_SIMPLIFY_NO_FILTER_NODES` option to simplify to allow unreferenced\n  nodes be kept in the output (:user:`jeromekelleher`, :user:`hyanwong`,\n  :issue:`2606`, :pr:`2619`).\n\n- Add the `TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS` option to simplify which ensures\n  no node sample flags are changed to allow calling code to manage sample status.\n  (:user:`jeromekelleher`, :issue:`2662`, :pr:`2663`).\n\n- Guarantee that unfiltered tables are not written to unnecessarily\n  during simplify (:user:`jeromekelleher`, :pr:`2619`).\n\n- Add `x_table_keep_rows` methods to provide efficient in-place table subsetting\n  (:user:`jeromekelleher`, :pr:`2700`).\n\n- Add `tsk_tree_seek_index` function\n\n--------------------\n[1.1.1] - 2022-07-29\n--------------------\n\n**Bug fixes**\n\n- Fix segfault in tsk_variant_restricted_copy in tree sequences with large\n  numbers of alleles or very long alleles\n  (:user:`jeromekelleher`, :pr:`2437`, :issue:`2429`).\n\n--------------------\n[1.1.0] - 2022-07-14\n--------------------\n\n**Features**\n\n- Add ``num_children`` to ``tsk_tree_t`` an array which contains counts of the number of child\n  nodes of each node in the tree. (:user:`GertjanBisschop`, :issue:`2274`, :pr:`2316`)\n\n- Add ``edge`` to ``tsk_tree_t`` an array which contains the ``edge_id`` of the edge encoding\n  the relationship between the child node and its parent for each (child) node in the tree.\n  (:user:`GertjanBisschop`, :issue:`2304`, :pr:`2340`)\n\n\n**Changes**\n\n- Reduce the maximum number of rows in a table by 1. This removes edge cases so that a ``tsk_id_t`` can be\n  used to count the number of rows. (:user:`benjeffery`, :issue:`2336`, :pr:`2337`)\n\n- Samples are now copied by ``tsk_variant_restricted_copy``. (:user:`benjeffery`, :issue:`2400`, :pr:`2401`)\n\n\n--------------------\n[1.0.0] - 2022-05-24\n--------------------\n\nThis major release marks the point at which the documented API becomes stable and supported.\n\n**Breaking changes**\n\n- Change the type of genotypes to ``int32_t``, removing the TSK_16_BIT_GENOTYPES flag option.\n  (:user:`benjeffery`, :issue:`463`, :pr:`2108`)\n\n- ``tsk_variant_t`` now includes its ``tsk_site_t`` rather than pointing to it.\n  (:user:`benjeffery`, :issue:`2161`, :pr:`2162`)\n\n- Rename ``TSK_TAKE_TABLES`` to ``TSK_TAKE_OWNERSHIP``.\n  (:user:`benjeffery`, :issue:`2221`, :pr:`2222`)\n\n- ``TSK_DEBUG``, ``TSK_NO_INIT``, ``TSK_NO_CHECK_INTEGRITY`` and ``TSK_TAKE_OWNERSHIP`` have moved to ``core.h``\n  (:user:`benjeffery`, :issue:`2218`, :pr:`2230`))\n\n- Rename several flags:\n     - All flags to ``simplify`` for example ``TSK_KEEP_INPUT_ROOTS`` becomes ``TSK_SIMPLIFY_KEEP_INPUT_ROOTS``.\n     - All flags to ``subset`` for example ``TSK_KEEP_UNREFERENCED`` becomes ``TSK_SUBSET_KEEP_UNREFERENCED``.\n     - ``TSK_BUILD_INDEXES`` -> ``TSK_TS_INIT_BUILD_INDEXES``\n     - ``TSK_NO_METADATA`` -> ``TSK_TABLE_NO_METADATA``\n     - ``TSK_NO_EDGE_METADATA`` -> ``TSK_TC_NO_EDGE_METADATA``\n\n  (:user:`benjeffery`, :issue:`1720`, :pr:`2226`, :pr:`2229`, :pr:`2224`)\n\n- Remove the generic ``TSK_ERR_OUT_OF_BOUNDS`` - replacing with specific errors.\n  Remove ``TSK_ERR_NON_SINGLE_CHAR_MUTATION`` which was unused.\n  (:user:`benjeffery`, :pr:`2260`)\n\n- Reorder stats API methods to place ``result`` as the last argument. (:user:`benjeffery`, :pr:`2292`, :issue:`2285`)\n\n**Features**\n\n- Make dumping of tables and tree sequences to disk a zero-copy operation.\n  (:user:`benjeffery`, :issue:`2111`, :pr:`2124`)\n\n- Add ``edge`` attribute to ``mutation_t`` struct and make available in tree sequence.\n  (:user:`jeromekelleher`, :issue:`685`, :pr:`2279`)\n\n- Reduce peak memory usage in ``tsk_treeseq_simplify``.\n  (:user:`jeromekelleher`, :issue:`2287`, :pr:`2288`)\n\n----------------------\n[0.99.15] - 2021-12-07\n----------------------\n\n**Breaking changes**\n\n- The ``tables`` argument to ``tsk_treeseq_init`` is no longer ``const``, to allow for future no-copy tree sequence creation.\n  (:user:`benjeffery`, :issue:`1718`, :pr:`1719`)\n- Additional consistency checks for mutation tables are now run by ``tsk_table_collection_check_integrity``\n  even when ``TSK_CHECK_MUTATION_ORDERING`` is not passed in. (:user:`petrelharp`, :issue:`1713`, :pr:`1722`)\n\n- ``num_tracked_samples`` and ``num_samples`` in ``tsk_tree_t`` are now typed as ``tsk_size_t``\n  (:user:`benjeffery`, :issue:`1723`, :pr:`1727`)\n\n- The previously deprecated option ``TSK_SAMPLE_COUNTS`` has been removed. (:user:`benjeffery`, :issue:`1744`, :pr:`1761`).\n- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.\n  ``tsk_table_collection_sort`` no longer sorts individuals.\n  (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)\n\n- The ``tsk_tree_t.left_root`` member has been removed. Client code can be updated\n  most easily by using the equivalent ``tsk_tree_get_left_root`` function. However,\n  it may be worth considering updating code to use either the standard traversal\n  functions (which automatically iterate over roots) or to use the ``virtual_root``\n  member (which may lead to more concise code). (:user:`jeromekelleher`, :issue:`1796`,\n  :pr:`1862`)\n\n- Rename ``tsk_tree_t.left`` and ``tsk_tree_t.right`` members to\n  ``tsk_tree_t.interval.left`` and ``tsk_tree_t.interval.right`` respectively.\n  (:user:`jeromekelleher`, :issue:`1686`, :pr:`1913`)\n\n- ``kastore`` is now vendored into this repo instead of being a git submodule. Developers need to run\n  ``git submodule update``. (:user:`jeromekelleher`, :issue:`1687`, :pr:`1973`)\n\n- ``Tree`` arrays such as ``left_sib``, ``right_child`` etc. now have an additional\n  \"virtual root\" node at the end. (:user:`jeromekelleher`, :issue:`1691`, :pr:`1704`)\n\n- ``marked`` and ``mark`` have been removed from ``tsk_tree_t``. (:user:`jeromekelleher`, :pr:`1936`)\n\n**Features**\n\n- Add ``tsk_table_collection_individual_topological_sort`` to sort the individuals as this is no longer done by the\n  default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)\n\n- The default behaviour for table size growth is now to double the current size of the table,\n  up to a threshold. To keep the previous behaviour, use (e.g.)\n  ``tsk_edge_table_set_max_rows_increment(tables->edges, 1024)``, which results in adding\n  space for 1024 additional rows each time we run out of space in the edge table.\n  (:user:`benjeffery`, :issue:`5`, :pr:`1683`)\n- ``tsk_table_collection_check_integrity`` now has a ``TSK_CHECK_MIGRATION_ORDERING`` flag. (:user:`petrelharp`, :pr:`1722`)\n\n- The default behaviour for ragged column growth is now to double the current size of the column,\n  up to a threshold. To keep the previous behaviour, use (e.g.)\n  ``tsk_node_table_set_max_metadata_length_increment(tables->nodes, 1024)``, which results in adding\n  space for 1024 additional entries each time we run out of space in the ragged column.\n  (:user:`benjeffery`, :issue:`1703`, :pr:`1709`)\n\n- Support for compiling the C library on Windows using msys2 (:user:`jeromekelleher`,\n  :pr:`1742`).\n\n- Add ``time_units`` to ``tsk_table_collection_t`` to describe the units of the time dimension of the\n  tree sequence. This is then used to geerate an error if ``time_units`` is ``uncalibrated`` when\n  using the branch lengths in statistics. (:user:`benjeffery`, :issue:`1644`, :pr:`1760`)\n\n- Add the ``TSK_LOAD_SKIP_TABLES`` option to load just the top-level information from a\n  file. Also add the ``TSK_CMP_IGNORE_TABLES`` option to compare only the top-level\n  information in two table collections. (:user:`clwgg`, :pr:`1882`, :issue:`1854`).\n\n- Add reference sequence.\n  (:user:`jeromekelleher`, :user:`benjeffery`, :issue:`146`, :pr:`1911`, :pr:`1944`, :pr:`1911`)\n\n- Add the ``TSK_LOAD_SKIP_REFERENCE_SEQUENCE`` option to load a table collection\n  without the reference sequence. Also add the TSK_CMP_IGNORE_REFERENCE_SEQUENCE\n  option to compare two table collections without comparing their reference\n  sequence. (:user:`clwgg`, :pr:`2019`, :issue:`1971`).\n\n- Add a \"virtual root\" to ``Tree`` arrays such as ``left_sib``, ``right_child`` etc.\n  The virtual root is appended to each array, has all real roots as its children,\n  but is not the parent of any node. Simplifies traversal algorithms.\n  (:user:`jeromekelleher`, :issue:`1691`, :pr:`1704`)\n\n- Add ``num_edges`` to ``tsk_tree_t`` to count the edges that define the topology of\n  the tree. (:user:`jeromekelleher`, :pr:`1704`)\n\n- Add the ``tsk_tree_get_size_bound`` function which returns an upper bound on the number of nodes reachable from\n  the roots of a tree. Useful for tree stack allocations (:user:`jeromekelleher`, :pr:`1704`).\n\n- Add ``MetadataSchema.permissive_json`` for an easy way to get the simplest schema.\n\n\n----------------------\n[0.99.14] - 2021-09-03\n----------------------\n\n**Breaking changes**\n\n- 64 bits are now used to store the sizes of ragged table columns such as metadata,\n  allowing them to hold more data. As such ``tsk_size_t`` is now 64 bits wide.\n  This change is fully backwards and forwards compatible for all tree-sequences whose\n  ragged column sizes fit into 32 bits. New tree-sequences with\n  large offset arrays that require 64 bits will fail to load in previous versions with\n  error ``TSK_ERR_BAD_COLUMN_TYPE``.\n  (:user:`jeromekelleher`, :issue:`343`, :issue:`1527`, :issue:`1528`, :issue:`1530`,\n  :issue:`1554`, :issue:`1573`, :issue:`1589`,:issue:`1598`,:issue:`1628`, :pr:`1571`,\n  :pr:`1579`, :pr:`1585`, :pr:`1590`, :pr:`1602`, :pr:`1618`, :pr:`1620`, :pr:`1652`).\n\n**Features**\n\n- Add `tsk_X_table_update_row` methods which allow modifying single rows of tables\n  (:user:`jeromekelleher`, :issue:`1545`, :pr:`1552`).\n\n----------------------\n[0.99.13] - 2021-07-08\n----------------------\n**Fixes**\n\n- Fix segfault when very large columns overflow\n  (:user:`bhaller`, :user:`benjeffery`, :issue:`1509`, :pr:`1511`).\n\n----------------------\n[0.99.12] - 2021-05-14\n----------------------\n\n**Breaking changes**\n\n- Removed ``TSK_NO_BUILD_INDEXES``.\n  Not building indexes is now the default behaviour of `tsk_table_collection_dump` and related functions.\n  (:user:`molpopgen`, :issue:`1327`, :pr:`1337`).\n\n**Features**\n\n- Add ``tsk_*_table_extend`` methods to append to a table from another\n  (:user:`benjeffery`, :issue:`1271`, :pr:`1287`).\n\n**Fixes**\n\n----------------------\n[0.99.11] - 2021-03-16\n----------------------\n\n**Features**\n\n- Add ``parents`` to the individual table to enable recording of pedigrees\n  (:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`, :pr:`1199`).\n\n- Added a ``tsk_table_collection_canonicalise`` method, that allows checking for equality between\n  tables that are equivalent up to reordering (:user:`petrelharp`, :user:`mufernando`, :pr:`1108`).\n\n- Removed a previous requirement on ``tsk_table_collection_union``, allowing for unioning of\n  new information both above and below shared history (:user:`petrelharp`, :user:`mufernando`, :pr:`1108`).\n\n- Support migrations in tsk_table_collection_sort. (:user:`jeromekelleher`,\n  :issue:`22`, :issue:`117`, :pr:`1131`).\n\n**Breaking changes**\n\n- Method ``tsk_individual_table_add_row`` has an extra arguments ``parents`` and ``parents_length``.\n\n- Add an ``options`` argument to ``tsk_table_collection_subset`` (:user:`petrelharp`, :pr:`1108`),\n  to allow for retaining the order of populations.\n\n- Mutation error codes have changed\n\n**Changes**\n\n- Allow mutations that have the same derived state as their parent mutation.\n  (:user:`benjeffery`, :issue:`1180`, :pr:`1233`)\n\n- File minor version change to support individual parents\n\n----------------------\n[0.99.10] - 2021-01-25\n----------------------\n\nMinor bugfix on internal APIs\n\n---------------------\n[0.99.9] - 2021-01-22\n---------------------\n\n**Features**\n\n- Add ``TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS`` flag to simplify, which allows the user to\n  keep unary nodes only if they belong to a tabled individual. This is useful for\n  simplification in forwards simulations (:user:`hyanwong`, :issue:`1113`, :pr:`1119`).\n\n\n---------------------\n[0.99.8] - 2020-11-27\n---------------------\n\n**Features**\n\n- Add ``tsk_treeseq_genetic_relatedness`` for calculating genetic relatedness between\n  pairs of sets of nodes (:user:`brieuclehmann`, :issue:`1021`, :pr:`1023`, :issue:`974`,\n  :issue:`973`, :pr:`898`).\n\n- Exposed ``tsk_table_collection_set_indexes`` to the API\n  (:user:`benjeffery`, :issue:`870`, :pr:`921`).\n\n**Breaking changes**\n\n- Added an ``options`` argument to ``tsk_table_collection_equals``\n  and table equality methods to allow for more flexible equality criteria\n  (e.g., ignore top-level metadata and schema or provenance tables).\n  Existing code should add an extra final parameter ``0`` to retain the\n  current behaviour (:user:`mufernando`, :user:`jeromekelleher`,\n  :issue:`896`, :pr:`897`, :issue:`913`, :pr:`917`).\n\n- Changed default behaviour of ``tsk_table_collection_clear`` to not clear\n  provenances and added ``options`` argument to optionally clear provenances\n  and schemas (:user:`benjeffery`, :issue:`929`, :pr:`1001`).\n\n- Renamed ``ts.trait_regression`` to ``ts.trait_linear_model``.\n\n---------------------\n[0.99.7] - 2020-09-29\n---------------------\n\n- Added ``TSK_INCLUDE_TERMINAL`` option to ``tsk_diff_iter_init`` to output the last edges\n  at the end of a tree sequence (:user:`hyanwong`, :issue:`783`, :pr:`787`).\n\n- Added ``tsk_bug_assert`` for assertions that should be compiled into release binaries\n  (:user:`benjeffery`, :pr:`860`).\n\n---------------------\n[0.99.6] - 2020-09-04\n---------------------\n\n**Bugfixes**\n\n- :issue:`823` - Fix mutation time error when using\n  ``tsk_table_collection_simplify`` with ``TSK_SIMPLIFY_KEEP_INPUT_ROOTS``\n  (:user:`petrelharp`, :pr:`823`).\n\n---------------------\n[0.99.5] - 2020-08-27\n---------------------\n\n**Breaking changes**\n\n- The macro ``TSK_IMPUTE_MISSING_DATA`` is renamed to ``TSK_ISOLATED_NOT_MISSING``\n  (:user:`benjeffery`, :issue:`716`, :pr:`794`)\n\n**New features**\n\n- Add a ``TSK_SIMPLIFY_KEEP_INPUT_ROOTS`` option to simplify which, if enabled, adds edges\n  from the MRCAs of samples in the simplified tree sequence back to the roots\n  in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).\n\n**Bugfixes**\n\n- :issue:`777` - Mutations over isolated samples were incorrectly decoded as\n  missing data. (:user:`jeromekelleher`, :pr:`778`)\n\n- :issue:`776` - Fix a segfault when a partial list of samples\n  was provided to the ``variants`` iterator. (:user:`jeromekelleher`, :pr:`778`)\n\n---------------------\n[0.99.4] - 2020-08-12\n---------------------\n\n**Note**\n\n- The ``TSK_VERSION_PATCH`` macro was incorrectly set to ``4`` for 0.99.3, so both\n  0.99.4 and 0.99.3 have the same value.\n\n**Changes**\n\n- Mutation times can be a mixture of known and unknown as long as for each\n  individual site  they are either all known or all unknown (:user:`benjeffery`, :pr:`761`).\n\n**Bugfixes**\n\n- Fix for including core.h under C++ (:user:`petrelharp`, :pr:`755`).\n\n---------------------\n[0.99.3] - 2020-07-27\n---------------------\n\n**Breaking changes**\n\n- ``tsk_mutation_table_add_row`` has an extra ``time`` argument. If the time\n  is unknown ``TSK_UNKNOWN_TIME`` should be passed.\n  (:user:`benjeffery`, :pr:`672`)\n\n- Change genotypes from unsigned to signed to accommodate missing data\n  (see :issue:`144` for discussion). This only affects users of the\n  ``tsk_vargen_t`` class. Genotypes are now stored as int8_t and int16_t\n  types rather than the former unsigned types. The field names in the\n  genotypes union of the ``tsk_variant_t`` struct returned by ``tsk_vargen_next``\n  have been renamed to ``i8`` and ``i16`` accordingly; care should be\n  taken when updating client code to ensure that types are correct. The number\n  of distinct alleles supported by 8 bit genotypes has therefore dropped\n  from 255 to 127, with a similar reduction for 16 bit genotypes.\n\n- Change the ``tsk_vargen_init`` method to take an extra parameter ``alleles``.\n  To keep the current behaviour, set this parameter to NULL.\n\n- Edges can now have metadata. Hence edge methods now take two extra arguments:\n  metadata and metadata length. The file format has also changed to accommodate this,\n  but is backwards compatible. Edge metadata can be disabled for a table collection with\n  the TSK_NO_EDGE_METADATA flag.\n  (:user:`benjeffery`, :pr:`496`, :pr:`712`)\n\n- Migrations can now have metadata. Hence migration methods now take two extra arguments:\n  metadata and metadata length. The file format has also changed to accommodate this,\n  but is backwards compatible.\n  (:user:`benjeffery`, :pr:`505`)\n\n- The text dump of tables with metadata now includes the metadata schema as a header.\n  (:user:`benjeffery`, :pr:`493`)\n\n- Bad tree topologies are detected earlier, so that it is no longer possible\n  to create a tsk_treeseq_t object which contains a parent with contradictory\n  children on an interval. Previously an error occured when some operation\n  building the trees was attempted (:user:`jeromekelleher`, :pr:`709`).\n\n**New features**\n\n- New methods to perform set operations on table collections.\n  ``tsk_table_collection_subset`` subsets and reorders table collections by nodes\n  (:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).\n  ``tsk_table_collection_union`` forms the node-wise union of two table collections\n  (:user:`mufernando`, :user:`petrelharp`, :issue:`381`, :pr:`623`).\n\n- Mutations now have an optional double-precision floating-point ``time`` column.\n  If not specified, this defaults to a particular NaN value (``TSK_UNKNOWN_TIME``)\n  indicating that the time is unknown. For a tree sequence to be considered valid\n  it must meet new criteria for mutation times, see :ref:`sec_mutation_requirements`.\n  Add ``tsk_table_collection_compute_mutation_times`` and new flag to\n  ``tsk_table_collection_check_integrity``:``TSK_CHECK_MUTATION_TIME``. Table sorting\n  orders mutations by non-increasing time per-site, which is also a requirement for a\n  valid tree sequence.\n  (:user:`benjeffery`, :pr:`672`)\n\n- Add ``metadata`` and ``metadata_schema`` fields to table collection, with accessors on\n  tree sequence. These store arbitrary bytes and are optional in the file format.\n  (:user: `benjeffery`, :pr:`641`)\n\n- Add the ``TSK_SIMPLIFY_KEEP_UNARY`` option to simplify (:user:`gtsambos`). See :issue:`1`\n  and :pr:`143`.\n\n- Add a ``set_root_threshold`` option to tsk_tree_t which allows us to set the\n  number of samples a node must be an ancestor of to be considered a root\n  (:pr:`462`).\n\n- Change the semantics of tsk_tree_t so that sample counts are always\n  computed, and add a new ``TSK_NO_SAMPLE_COUNTS`` option to turn this\n  off (:pr:`462`).\n\n- Tables with metadata now have an optional `metadata_schema` field that can contain\n  arbitrary bytes. (:user:`benjeffery`, :pr:`493`)\n\n- Tables loaded from a file can now be edited in the same way as any other\n  table collection (:user:`jeromekelleher`, :issue:`536`, :pr:`530`.\n\n- Support for reading/writing to arbitrary file streams with the loadf/dumpf\n  variants for tree sequence and table collection load/dump\n  (:user:`jeromekelleher`, :user:`grahamgower`, :issue:`565`, :pr:`599`).\n\n- Add low-level sorting API and ``TSK_NO_CHECK_INTEGRITY`` flag\n  (:user:`jeromekelleher`, :pr:`627`, :issue:`626`).\n\n- Add extension of Kendall-Colijn tree distance metric for tree sequences\n  computed by ``tsk_treeseq_kc_distance``\n  (:user:`daniel-goldstein`, :pr:`548`)\n\n**Deprecated**\n\n- The ``TSK_SAMPLE_COUNTS`` options is now ignored and  will print out a warning\n  if used (:pr:`462`).\n\n---------------------\n[0.99.2] - 2019-03-27\n---------------------\n\nBugfix release. Changes:\n\n- Fix incorrect errors on tbl_collection_dump (#132)\n- Catch table overflows (#157)\n\n---------------------\n[0.99.1] - 2019-01-24\n---------------------\n\nRefinements to the C API as we move towards 1.0.0. Changes:\n\n- Change the ``_tbl_`` abbreviation to ``_table_`` to improve readability.\n  Hence, we now have, e.g., ``tsk_node_table_t`` etc.\n- Change ``tsk_tbl_size_t`` to ``tsk_size_t``.\n- Standardise public API to use ``tsk_size_t`` and ``tsk_id_t`` as appropriate.\n- Add ``tsk_flags_t`` typedef and consistently use this as the type used to\n  encode bitwise flags. To avoid confusion, functions now have an ``options``\n  parameter.\n- Rename ``tsk_table_collection_position_t`` to ``tsk_bookmark_t``.\n- Rename ``tsk_table_collection_reset_position`` to ``tsk_table_collection_truncate``\n  and ``tsk_table_collection_record_position`` to ``tsk_table_collection_record_num_rows``.\n- Generalise ``tsk_table_collection_sort`` to take a bookmark as start argument.\n- Relax restriction that nodes in the ``samples`` argument to simplify must\n  currently be marked as samples. (https://github.com/tskit-dev/tskit/issues/72)\n- Allow ``tsk_table_collection_simplify`` to take a NULL samples argument to\n  specify \"all samples in the current tables\".\n- Add support for building as a meson subproject.\n\n---------------------\n[0.99.0] - 2019-01-14\n---------------------\n\nInitial alpha version of the tskit C API tagged. Version 0.99.x\nrepresents the series of releases leading to version 1.0.0 which\nwill be the first stable release. After 1.0.0, semver rules\nregarding API/ABI breakage will apply; however, in the 0.99.x\nseries arbitrary changes may happen.\n\n--------------------\n[0.0.0] - 2019-01-10\n--------------------\n\nInitial extraction of tskit code from msprime. Relicense to MIT.\nCode copied at hash 29921408661d5fe0b1a82b1ca302a8b87510fd23\n"
  },
  {
    "path": "c/VERSION.txt",
    "content": "1.3.1"
  },
  {
    "path": "c/examples/Makefile",
    "content": "# Simple Makefile for building examples.\n# This will build the examples in the current directory by compiling in the\n# full tskit source into each of the examples. This is *not* recommended for\n# real projects!\n#\n# To use, type \"make\" in the this directory. If you have GSL installed you\n# should then get two example programs built.\n#\n# **Note**: This repo uses git submodules, and these must be checked out\n# correctly for this makefile to work, e.g.:\n#\n# $ git clone git@github.com:tskit-dev/tskit.git --recurse-submodules\n#\n# See the documentation (https://tskit.dev/tskit/docs/stable/c-api.html)\n# for more details on how to use the C API, and the tskit build examples\n# repo (https://github.com/tskit-dev/tskit-build-examples) for examples\n# of how to set up a production-ready build with tskit.\n#\n\nCFLAGS=-I../ -I../subprojects/kastore\nTSKIT_SOURCE=../tskit/*.c ../subprojects/kastore/kastore.c\n\ntargets = api_structure error_handling \\\n\thaploid_wright_fisher streaming \\\n\ttree_iteration tree_traversal \\\n\ttake_ownership \\\n\tjson_struct_metadata\n\nall: $(targets)\n\n$(targets): %: %.c\n\t${CC} ${CFLAGS} -o $@ $< ${TSKIT_SOURCE} -lm\n\nclean:\n\trm -f $(targets)\n\n"
  },
  {
    "path": "c/examples/api_structure.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <tskit/tables.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        fprintf(stderr, \"line %d: %s\", __LINE__, tsk_strerror(val));                    \\\n        exit(EXIT_FAILURE);                                                             \\\n    }\n\nint\nmain(int argc, char **argv)\n{\n    int j, ret;\n    tsk_edge_table_t edges;\n\n    ret = tsk_edge_table_init(&edges, 0);\n    check_tsk_error(ret);\n    for (j = 0; j < 5; j++) {\n        ret = tsk_edge_table_add_row(&edges, 0, 1, j + 1, j, NULL, 0);\n        check_tsk_error(ret);\n    }\n    tsk_edge_table_print_state(&edges, stdout);\n    tsk_edge_table_free(&edges);\n\n    return EXIT_SUCCESS;\n}\n"
  },
  {
    "path": "c/examples/cpp_sorting_example.cpp",
    "content": "#include <cstddef>\n#include <vector>\n#include <algorithm>\n#include <stdexcept>\n#include <sstream>\n#include <iostream>\n#include <type_traits>\n#include <tskit.h>\n\nstatic void\nhandle_tskit_return_code(int code)\n{\n    if (code != 0) {\n        std::ostringstream o;\n        o << tsk_strerror(code);\n        throw std::runtime_error(o.str());\n    }\n}\n\nstruct edge_plus_time {\n    double time;\n    tsk_id_t parent, child;\n    double left, right;\n};\n\nint\nsort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)\n{\n    if (sorter->tables->edges.metadata_length != 0) {\n        throw std::invalid_argument(\n            \"the sorter does not currently handle edge metadata\");\n    }\n    if (start != 0) {\n        throw std::invalid_argument(\"the sorter requires start==0\");\n    }\n\n    std::vector<edge_plus_time> temp;\n    temp.reserve(static_cast<std::size_t>(sorter->tables->edges.num_rows));\n\n    auto edges = &sorter->tables->edges;\n    auto nodes = &sorter->tables->nodes;\n\n    for (tsk_size_t i = 0; i < sorter->tables->edges.num_rows; ++i) {\n        temp.push_back(edge_plus_time{ nodes->time[edges->parent[i]], edges->parent[i],\n            edges->child[i], edges->left[i], edges->right[i] });\n    }\n\n    std::sort(begin(temp), end(temp),\n        [](const edge_plus_time &lhs, const edge_plus_time &rhs) {\n            if (lhs.time == rhs.time) {\n                if (lhs.parent == rhs.parent) {\n                    if (lhs.child == rhs.child) {\n                        return lhs.left < rhs.left;\n                    }\n                    return lhs.child < rhs.child;\n                }\n                return lhs.parent < rhs.parent;\n            }\n            return lhs.time < rhs.time;\n        });\n\n    for (std::size_t i = 0; i < temp.size(); ++i) {\n        edges->left[i] = temp[i].left;\n        edges->right[i] = temp[i].right;\n        edges->parent[i] = temp[i].parent;\n        edges->child[i] = temp[i].child;\n    }\n\n    return 0;\n}\n\nint\nmain(int argc, char **argv)\n{\n    if (argc != 3) {\n        std::cerr << \"Usage: \" << argv[0] << \" input.trees output.trees\\n\";\n        std::exit(0);\n    }\n    const char *infile = argv[1];\n    const char *outfile = argv[2];\n\n    tsk_table_collection_t tables;\n    auto ret = tsk_table_collection_load(&tables, infile, 0);\n    handle_tskit_return_code(ret);\n\n    tsk_table_sorter_t sorter;\n    ret = tsk_table_sorter_init(&sorter, &tables, 0);\n    handle_tskit_return_code(ret);\n    sorter.sort_edges = sort_edges;\n    try {\n        ret = tsk_table_sorter_run(&sorter, NULL);\n    } catch (std::exception &e) {\n        std::cerr << e.what() << '\\n';\n        std::exit(1);\n    }\n    handle_tskit_return_code(ret);\n    ret = tsk_table_collection_dump(&tables, outfile, 0);\n    handle_tskit_return_code(ret);\n    ret = tsk_table_collection_free(&tables);\n    handle_tskit_return_code(ret);\n}\n\n"
  },
  {
    "path": "c/examples/error_handling.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <tskit.h>\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    tsk_treeseq_t ts;\n\n    if (argc != 2) {\n        fprintf(stderr, \"usage: <tree sequence file>\");\n        exit(EXIT_FAILURE);\n    }\n    ret = tsk_treeseq_load(&ts, argv[1], 0);\n    if (ret < 0) {\n        /* Error condition. Free and exit */\n        tsk_treeseq_free(&ts);\n        fprintf(stderr, \"%s\", tsk_strerror(ret));\n        exit(EXIT_FAILURE);\n    }\n    printf(\"Loaded tree sequence with %lld nodes and %lld edges from %s\\n\",\n        (long long) tsk_treeseq_get_num_nodes(&ts),\n        (long long) tsk_treeseq_get_num_edges(&ts), argv[1]);\n    tsk_treeseq_free(&ts);\n\n    return EXIT_SUCCESS;\n}\n"
  },
  {
    "path": "c/examples/haploid_wright_fisher.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <assert.h>\n#include <err.h>\n\n#include <tskit/tables.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        errx(EXIT_FAILURE, \"line %d: %s\", __LINE__, tsk_strerror(val));                 \\\n    }\n\nvoid\nsimulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval)\n{\n    tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;\n    double breakpoint;\n    int ret, j, t, b;\n\n    assert(simplify_interval != 0); // leads to division by zero\n    buffer = malloc(2 * N * sizeof(tsk_id_t));\n    if (buffer == NULL) {\n        errx(EXIT_FAILURE, \"Out of memory\");\n    }\n    tables->sequence_length = 1.0;\n    parents = buffer;\n    for (j = 0; j < N; j++) {\n        parents[j]\n            = tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);\n        check_tsk_error(parents[j]);\n    }\n    b = 0;\n    for (t = T - 1; t >= 0; t--) {\n        /* Alternate between using the first and last N values in the buffer */\n        parents = buffer + (b * N);\n        b = (b + 1) % 2;\n        children = buffer + (b * N);\n        for (j = 0; j < N; j++) {\n            child = tsk_node_table_add_row(\n                &tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);\n            check_tsk_error(child);\n            /* NOTE: the use of rand() is discouraged for\n             * research code and proper random number generator\n             * libraries should be preferred.\n             */\n            left_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];\n            right_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];\n            do {\n                breakpoint = rand() / (1. + RAND_MAX);\n            } while (breakpoint == 0); /* tiny proba of breakpoint being 0 */\n            ret = tsk_edge_table_add_row(\n                &tables->edges, 0, breakpoint, left_parent, child, NULL, 0);\n            check_tsk_error(ret);\n            ret = tsk_edge_table_add_row(\n                &tables->edges, breakpoint, 1, right_parent, child, NULL, 0);\n            check_tsk_error(ret);\n            children[j] = child;\n        }\n        if (t % simplify_interval == 0) {\n            printf(\"Simplify at generation %lld: (%lld nodes %lld edges)\", (long long) t,\n                (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows);\n            /* Note: Edges must be sorted for simplify to work, and we use a brute force\n             * approach of sorting each time here for simplicity. This is inefficient. */\n            ret = tsk_table_collection_sort(tables, NULL, 0);\n            check_tsk_error(ret);\n            ret = tsk_table_collection_simplify(tables, children, N, 0, NULL);\n            check_tsk_error(ret);\n            printf(\" -> (%lld nodes %lld edges)\\n\", (long long) tables->nodes.num_rows,\n                (long long) tables->edges.num_rows);\n            for (j = 0; j < N; j++) {\n                children[j] = j;\n            }\n        }\n    }\n    free(buffer);\n}\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    if (argc != 6) {\n        errx(EXIT_FAILURE, \"usage: N T simplify-interval output-file seed\");\n    }\n    ret = tsk_table_collection_init(&tables, 0);\n    check_tsk_error(ret);\n    srand((unsigned) atoi(argv[5]));\n    simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));\n\n    /* Sort and index so that the result can be opened as a tree sequence */\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    check_tsk_error(ret);\n    ret = tsk_table_collection_build_index(&tables, 0);\n    check_tsk_error(ret);\n    ret = tsk_table_collection_dump(&tables, argv[4], 0);\n    check_tsk_error(ret);\n\n    tsk_table_collection_free(&tables);\n    return 0;\n}\n"
  },
  {
    "path": "c/examples/json_struct_metadata.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <err.h>\n#include <string.h>\n#include <tskit.h>\n\n// these are properties of the ``json+struct`` codec, documented in tskit\n#define JSON_STRUCT_HEADER_SIZE 21\n\nconst uint8_t json_struct_codec_magic[4] = { 'J', 'B', 'L', 'B' };\nconst uint8_t json_struct_codec_version = 1;\n\n// little-endian read of a uint64_t from an address\nstatic uint64_t\nload_u64_le(const uint8_t *p)\n{\n    uint64_t value = (uint64_t) p[0];\n    value |= (uint64_t) p[1] << 8;\n    value |= (uint64_t) p[2] << 16;\n    value |= (uint64_t) p[3] << 24;\n    value |= (uint64_t) p[4] << 32;\n    value |= (uint64_t) p[5] << 40;\n    value |= (uint64_t) p[6] << 48;\n    value |= (uint64_t) p[7] << 56;\n    return value;\n}\n\n// little-endian write of a uint64_t to an address\nstatic void\nset_u64_le(uint8_t *dest, uint64_t value)\n{\n    dest[0] = (uint8_t) (value & 0xFF);\n    dest[1] = (uint8_t) ((value >> 8) & 0xFF);\n    dest[2] = (uint8_t) ((value >> 16) & 0xFF);\n    dest[3] = (uint8_t) ((value >> 24) & 0xFF);\n    dest[4] = (uint8_t) ((value >> 32) & 0xFF);\n    dest[5] = (uint8_t) ((value >> 40) & 0xFF);\n    dest[6] = (uint8_t) ((value >> 48) & 0xFF);\n    dest[7] = (uint8_t) ((value >> 56) & 0xFF);\n}\n\n// Extract the json and binary payloads from the `json+struct` codec data buffer.\n// Note that the output pointers `json` and `binary` reference memory\n// inside the `metadata` buffer passed in.\nvoid\njson_struct_codec_get_components(uint8_t *metadata, tsk_size_t metadata_length,\n    uint8_t **json, tsk_size_t *json_length, uint8_t **binary, tsk_size_t *binary_length)\n{\n    // check the structure of the codec header and the sizes it specifies\n    if (metadata == NULL || json == NULL || json_length == NULL || binary == NULL\n        || binary_length == NULL)\n        errx(EXIT_FAILURE, \"bad parameter value.\");\n    if (metadata_length < JSON_STRUCT_HEADER_SIZE)\n        errx(EXIT_FAILURE, \"metadata truncated.\");\n    if (memcmp(metadata, json_struct_codec_magic, sizeof(json_struct_codec_magic)) != 0)\n        errx(EXIT_FAILURE, \"bad magic bytes.\");\n\n    uint8_t version = metadata[4];\n    if (version != json_struct_codec_version)\n        errx(EXIT_FAILURE, \"bad version number.\");\n\n    uint64_t json_length_u64 = load_u64_le(metadata + 5);\n    uint64_t binary_length_u64 = load_u64_le(metadata + 13);\n    if (json_length_u64 > UINT64_MAX - (uint64_t) JSON_STRUCT_HEADER_SIZE)\n        errx(EXIT_FAILURE, \"invalid length.\");\n\n    // determine the number of padding bytes and do more safety checks\n    uint64_t length = (uint64_t) JSON_STRUCT_HEADER_SIZE + json_length_u64;\n    uint64_t padding_length = (8 - (length & 0x07)) % 8;\n    if (padding_length > UINT64_MAX - length)\n        errx(EXIT_FAILURE, \"invalid length.\");\n\n    length += padding_length;\n    if (binary_length_u64 > UINT64_MAX - length)\n        errx(EXIT_FAILURE, \"invalid length.\");\n\n    length += binary_length_u64;\n    if ((uint64_t) metadata_length != length)\n        errx(EXIT_FAILURE, \"unexpected size.\");\n\n    uint8_t *padding_start = metadata + JSON_STRUCT_HEADER_SIZE + json_length_u64;\n    for (uint64_t j = 0; j < padding_length; ++j)\n        if (*(padding_start + j) != 0)\n            errx(EXIT_FAILURE, \"padding bytes are nonzero.\");\n\n    // the structure of the codec data seems valid; return components\n    *json = metadata + JSON_STRUCT_HEADER_SIZE;\n    *json_length = (tsk_size_t) json_length_u64;\n\n    *binary = metadata + JSON_STRUCT_HEADER_SIZE + json_length_u64 + padding_length;\n    *binary_length = (tsk_size_t) binary_length_u64;\n}\n\n// malloc and return a data buffer for the `json+struct` codec\n// that contains the given components\nvoid\njson_struct_codec_create_buffer(const uint8_t *json, tsk_size_t json_length,\n    const uint8_t *binary, tsk_size_t binary_length, uint8_t **buffer,\n    tsk_size_t *buffer_length)\n{\n    // figure out the total length of the codec's data and allocate the buffer for it\n    tsk_size_t header_length = JSON_STRUCT_HEADER_SIZE;\n    tsk_size_t padding_length = (8 - ((header_length + json_length) & 0x07)) % 8;\n    tsk_size_t total_length\n        = header_length + json_length + padding_length + binary_length;\n    uint8_t *bytes = malloc(total_length);\n    if (!bytes)\n        errx(EXIT_FAILURE, \"memory for buffer could not be allocated.\");\n\n    // then set up the bytes for the codec header\n    memcpy(bytes, json_struct_codec_magic, 4);\n    bytes[4] = json_struct_codec_version;\n    set_u64_le(bytes + 5, (uint64_t) json_length);\n    set_u64_le(bytes + 13, (uint64_t) binary_length);\n\n    // copy in the JSON and binary data, separated by the padding bytes; the goal of the\n    // padding bytes is to ensure that the binary data is 8-byte-aligned relative to the\n    // start of the buffer\n    memcpy(bytes + header_length, json, json_length);\n    memset(bytes + header_length + json_length, 0, padding_length);\n    memcpy(bytes + header_length + json_length + padding_length, binary, binary_length);\n\n    // return the buffer and its length; the caller takes ownership of the buffer\n    *buffer = bytes;\n    *buffer_length = total_length;\n}\n\nint\nmain(int argc, char **argv)\n{\n    // we start with JSON and binary payloads that we encode into a new buffer\n    // note that the JSON payload does not have to end with a trailing NULL\n    const char json_payload[] = { '{', '\"', 'a', '\"', ':', '1', '}' };\n    const uint8_t binary_payload[] = { 0x01, 0x02, 0x03, 0x04 };\n    uint8_t *metadata;\n    tsk_size_t metadata_length;\n\n    json_struct_codec_create_buffer((const uint8_t *) json_payload, sizeof(json_payload),\n        binary_payload, sizeof(binary_payload), &metadata, &metadata_length);\n\n    // then we decode that buffer to recover the json and binary data\n    uint8_t *decoded_json, *decoded_binary;\n    tsk_size_t decoded_json_length, decoded_binary_length;\n\n    json_struct_codec_get_components(metadata, metadata_length, &decoded_json,\n        &decoded_json_length, &decoded_binary, &decoded_binary_length);\n\n    // print the recovered data to demonstrate that the round-trip worked\n    // note that the JSON data is not NULL-terminated unless you put a NULL there!\n    printf(\"JSON: %.*s\\n\", (int) decoded_json_length, decoded_json);\n\n    printf(\"Binary data:\");\n    for (tsk_size_t j = 0; j < decoded_binary_length; j++)\n        printf(\" %#04x\", decoded_binary[j]);\n    printf(\"\\n\");\n\n    free(metadata);\n    return EXIT_SUCCESS;\n}\n"
  },
  {
    "path": "c/examples/multichrom_wright_fisher.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <assert.h>\n#include <err.h>\n#include <string.h>\n\n#include <pthread.h>\n#include <tskit/tables.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        errx(EXIT_FAILURE, \"line %d: %s\\n\", __LINE__, tsk_strerror(val));               \\\n    }\n\nstatic void\ninit_tables(tsk_table_collection_t *tcs, int num_chroms)\n{\n    int j, ret;\n\n    for (j = 0; j < num_chroms; j++) {\n        ret = tsk_table_collection_init(&tcs[j], 0);\n        check_tsk_error(ret);\n        if (j > 0) {\n            tsk_node_table_free(&tcs[j].nodes);\n        }\n    }\n}\n\nstatic void\nfree_tables(tsk_table_collection_t *tcs, int num_chroms)\n{\n    int j;\n\n    for (j = 0; j < num_chroms; j++) {\n        if (j > 0) {\n            /* Must not double free node table columns. */\n            memset(&tcs[j].nodes, 0, sizeof(tcs[j].nodes));\n        }\n        tsk_table_collection_free(&tcs[j]);\n    }\n}\n\nstatic void\njoin_tables(tsk_table_collection_t *tcs, int num_chroms)\n{\n    int j, ret;\n\n    for (j = 1; j < num_chroms; j++) {\n        ret = tsk_edge_table_extend(\n            &tcs[0].edges, &tcs[j].edges, tcs[j].edges.num_rows, NULL, 0);\n        check_tsk_error(ret);\n    }\n    /* Get all the squashable edges next to each other */\n    ret = tsk_table_collection_sort(&tcs[0], NULL, 0);\n    check_tsk_error(ret);\n    ret = tsk_edge_table_squash(&tcs[0].edges);\n    check_tsk_error(ret);\n    /* We need to sort again after squash */\n    ret = tsk_table_collection_sort(&tcs[0], NULL, 0);\n    check_tsk_error(ret);\n    ret = tsk_table_collection_build_index(&tcs[0], 0);\n    check_tsk_error(ret);\n}\n\nstruct chunk_work {\n    int chunk;\n    tsk_table_collection_t *tc;\n    int *samples;\n    int N;\n};\n\nvoid *\nsimplify_chunk(void *arg)\n{\n    int ret;\n    struct chunk_work *work = (struct chunk_work *) arg;\n    tsk_size_t edges_before = work->tc->edges.num_rows;\n\n    ret = tsk_table_collection_sort(work->tc, NULL, 0);\n    check_tsk_error(ret);\n    ret = tsk_table_collection_simplify(work->tc, work->samples, work->N,\n        TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, NULL);\n    check_tsk_error(ret);\n    /* NOTE: this printf makes helgrind complain */\n    printf(\"\\tchunk %d: %lld -> %lld\\n\", work->chunk, (long long) edges_before,\n        (long long) work->tc->edges.num_rows);\n\n    return NULL;\n}\n\nvoid\nsort_and_simplify_all(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)\n{\n    int j, ret;\n    struct chunk_work work[num_chroms];\n    pthread_t threads[num_chroms];\n\n    for (j = 1; j < num_chroms; j++) {\n        tcs[j].nodes = tcs[0].nodes;\n    }\n\n    for (j = 0; j < num_chroms; j++) {\n        work[j].chunk = j;\n        work[j].tc = &tcs[j];\n        work[j].samples = samples;\n        work[j].N = N;\n\n        ret = pthread_create(&threads[j], NULL, simplify_chunk, (void *) &work[j]);\n        if (ret != 0) {\n            errx(EXIT_FAILURE, \"Pthread create failed\");\n        }\n        /* simplify_chunk((void *) &work[j]); */\n    }\n    for (j = 0; j < num_chroms; j++) {\n        ret = pthread_join(threads[j], NULL);\n        if (ret != 0) {\n            errx(EXIT_FAILURE, \"Pthread join failed\");\n        }\n    }\n}\n\nvoid\nsimplify_tables(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)\n{\n    int j, k, num_edges, ret;\n    const tsk_size_t num_nodes = tcs[0].nodes.num_rows;\n    tsk_bool_t *keep_nodes = malloc(num_nodes * sizeof(*keep_nodes));\n    tsk_id_t *node_id_map = malloc(num_nodes * sizeof(*node_id_map));\n    tsk_id_t *edge_child, *edge_parent;\n\n    if (keep_nodes == NULL || node_id_map == NULL) {\n        errx(EXIT_FAILURE, \"Out of memory\");\n    }\n\n    printf(\"Simplify %lld nodes\\n\", (long long) tcs[0].nodes.num_rows);\n    sort_and_simplify_all(tcs, num_chroms, samples, N);\n\n    for (j = 0; j < num_nodes; j++) {\n        keep_nodes[j] = false;\n        tcs[0].nodes.flags[j] &= (~TSK_NODE_IS_SAMPLE);\n    }\n    for (j = 0; j < N; j++) {\n        keep_nodes[samples[j]] = true;\n        tcs[0].nodes.flags[samples[j]] |= TSK_NODE_IS_SAMPLE;\n    }\n\n    for (j = 0; j < num_chroms; j++) {\n        edge_child = tcs[j].edges.child;\n        edge_parent = tcs[j].edges.parent;\n        num_edges = tcs[j].edges.num_rows;\n        for (k = 0; k < num_edges; k++) {\n            keep_nodes[edge_child[k]] = true;\n            keep_nodes[edge_parent[k]] = true;\n        }\n    }\n    tsk_node_table_keep_rows(&tcs[0].nodes, keep_nodes, 0, node_id_map);\n    printf(\"\\tdone: %lld nodes\\n\", (long long) tcs[0].nodes.num_rows);\n\n    /* Remap node references */\n    for (j = 0; j < num_chroms; j++) {\n        edge_child = tcs[j].edges.child;\n        edge_parent = tcs[j].edges.parent;\n        num_edges = tcs[j].edges.num_rows;\n        for (k = 0; k < num_edges; k++) {\n            edge_child[k] = node_id_map[edge_child[k]];\n            edge_parent[k] = node_id_map[edge_parent[k]];\n        }\n        ret = tsk_table_collection_check_integrity(&tcs[j], 0);\n        check_tsk_error(ret);\n    }\n    for (j = 0; j < N; j++) {\n        samples[j] = node_id_map[samples[j]];\n    }\n    free(keep_nodes);\n    free(node_id_map);\n}\n\nvoid\nsimulate(\n    tsk_table_collection_t *tcs, int num_chroms, int N, int T, int simplify_interval)\n{\n    tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;\n    bool left_is_first;\n    double chunk_left, chunk_right;\n    int ret, j, t, b, k;\n\n    assert(simplify_interval != 0); // leads to division by zero\n    buffer = malloc(2 * N * sizeof(tsk_id_t));\n    if (buffer == NULL) {\n        errx(EXIT_FAILURE, \"Out of memory\");\n    }\n    for (k = 0; k < num_chroms; k++) {\n        tcs[k].sequence_length = num_chroms;\n    }\n    parents = buffer;\n    for (j = 0; j < N; j++) {\n        parents[j]\n            = tsk_node_table_add_row(&tcs[0].nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);\n        check_tsk_error(parents[j]);\n    }\n    b = 0;\n    for (t = T - 1; t >= 0; t--) {\n        /* Alternate between using the first and last N values in the buffer */\n        parents = buffer + (b * N);\n        b = (b + 1) % 2;\n        children = buffer + (b * N);\n        for (j = 0; j < N; j++) {\n            child = tsk_node_table_add_row(\n                &tcs[0].nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);\n            check_tsk_error(child);\n            /* NOTE: the use of rand() is discouraged for\n             * research code and proper random number generator\n             * libraries should be preferred.\n             */\n            left_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];\n            right_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];\n            left_is_first = rand() < 0.5;\n            chunk_left = 0.0;\n            for (k = 0; k < num_chroms; k++) {\n                chunk_right = chunk_left + rand() / (1. + RAND_MAX);\n                /* a very tiny chance that right and left are equal */\n                if (chunk_right > chunk_left) {\n                    ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_left, chunk_right,\n                        left_is_first ? left_parent : right_parent, child, NULL, 0);\n                    check_tsk_error(ret);\n                }\n                chunk_left += 1.0;\n                if (chunk_right < chunk_left) {\n                    ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_right, chunk_left,\n                        left_is_first ? right_parent : left_parent, child, NULL, 0);\n                    check_tsk_error(ret);\n                }\n            }\n            children[j] = child;\n        }\n        if (t % simplify_interval == 0) {\n            simplify_tables(tcs, num_chroms, children, N);\n        }\n    }\n    /* Set the sample flags for final generation */\n    for (j = 0; j < N; j++) {\n        tcs[0].nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;\n    }\n    free(buffer);\n}\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    int num_chroms;\n\n    if (argc != 7) {\n        errx(EXIT_FAILURE, \"usage: N T simplify-interval output seed num-chroms\");\n    }\n\n    num_chroms = atoi(argv[6]);\n    tsk_table_collection_t tcs[num_chroms];\n\n    srand((unsigned) atoi(argv[5]));\n    init_tables(tcs, num_chroms);\n    simulate(tcs, num_chroms, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));\n    join_tables(tcs, num_chroms);\n    ret = tsk_table_collection_dump(&tcs[0], argv[4], 0);\n    check_tsk_error(ret);\n    free_tables(tcs, num_chroms);\n\n    return 0;\n}\n"
  },
  {
    "path": "c/examples/multichrom_wright_fisher_singlethreaded.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <assert.h>\n#include <err.h>\n#include <string.h>\n\n#include <tskit/tables.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        errx(EXIT_FAILURE, \"line %d: %s\\n\", __LINE__, tsk_strerror(val));               \\\n    }\n\nvoid\nsimulate(\n    tsk_table_collection_t *tables, int num_chroms, int N, int T, int simplify_interval)\n{\n    tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;\n    bool left_is_first;\n    double chunk_left, chunk_right;\n    int ret, j, t, b, k;\n\n    assert(simplify_interval != 0); // leads to division by zero\n    buffer = malloc(2 * N * sizeof(tsk_id_t));\n    if (buffer == NULL) {\n        errx(EXIT_FAILURE, \"Out of memory\");\n    }\n    tables->sequence_length = num_chroms;\n    parents = buffer;\n    for (j = 0; j < N; j++) {\n        parents[j]\n            = tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);\n        check_tsk_error(parents[j]);\n    }\n    b = 0;\n    for (t = T - 1; t >= 0; t--) {\n        /* Alternate between using the first and last N values in the buffer */\n        parents = buffer + (b * N);\n        b = (b + 1) % 2;\n        children = buffer + (b * N);\n        for (j = 0; j < N; j++) {\n            child = tsk_node_table_add_row(\n                &tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);\n            check_tsk_error(child);\n            /* NOTE: the use of rand() is discouraged for\n             * research code and proper random number generator\n             * libraries should be preferred.\n             */\n            left_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];\n            right_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];\n            left_is_first = rand() < 0.5;\n            chunk_left = 0.0;\n            for (k = 0; k < num_chroms; k++) {\n                chunk_right = chunk_left + rand() / (1. + RAND_MAX);\n                /* a very tiny chance that right and left are equal */\n                if (chunk_right > chunk_left) {\n                    ret = tsk_edge_table_add_row(&tables->edges, chunk_left, chunk_right,\n                        left_is_first ? left_parent : right_parent, child, NULL, 0);\n                    check_tsk_error(ret);\n                }\n                chunk_left += 1.0;\n                if (chunk_right < chunk_left) {\n                    ret = tsk_edge_table_add_row(&tables->edges, chunk_right, chunk_left,\n                        left_is_first ? right_parent : left_parent, child, NULL, 0);\n                    check_tsk_error(ret);\n                }\n            }\n            children[j] = child;\n        }\n        if (t % simplify_interval == 0) {\n            printf(\"Simplify at generation %lld: (%lld nodes %lld edges)\", (long long) t,\n                (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows);\n            /* Note: Edges must be sorted for simplify to work, and we use a brute force\n             * approach of sorting each time here for simplicity. This is inefficient. */\n            ret = tsk_table_collection_sort(tables, NULL, 0);\n            check_tsk_error(ret);\n            ret = tsk_table_collection_simplify(tables, children, N, 0, NULL);\n            check_tsk_error(ret);\n            printf(\" -> (%lld nodes %lld edges)\\n\", (long long) tables->nodes.num_rows,\n                (long long) tables->edges.num_rows);\n            for (j = 0; j < N; j++) {\n                children[j] = j;\n            }\n        }\n    }\n    /* Set the sample flags for final generation */\n    for (j = 0; j < N; j++) {\n        tables->nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;\n    }\n    free(buffer);\n}\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    if (argc != 7) {\n        errx(EXIT_FAILURE, \"usage: N T simplify-interval output seed num-chroms\");\n    }\n    ret = tsk_table_collection_init(&tables, 0);\n    check_tsk_error(ret);\n    srand((unsigned) atoi(argv[5]));\n    simulate(&tables, atoi(argv[6]), atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));\n\n    /* Sort and index so that the result can be opened as a tree sequence */\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    check_tsk_error(ret);\n    ret = tsk_table_collection_build_index(&tables, 0);\n    check_tsk_error(ret);\n    ret = tsk_table_collection_dump(&tables, argv[4], 0);\n    check_tsk_error(ret);\n\n    tsk_table_collection_free(&tables);\n    return 0;\n}\n"
  },
  {
    "path": "c/examples/streaming.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <tskit/tables.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        fprintf(stderr, \"Error: line %d: %s\\n\", __LINE__, tsk_strerror(val));           \\\n        exit(EXIT_FAILURE);                                                             \\\n    }\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    int j = 0;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    check_tsk_error(ret);\n\n    while (true) {\n        ret = tsk_table_collection_loadf(&tables, stdin, TSK_NO_INIT);\n        if (ret == TSK_ERR_EOF) {\n            break;\n        }\n        check_tsk_error(ret);\n        fprintf(stderr, \"Tree sequence %d had %lld mutations\\n\", j,\n            (long long) tables.mutations.num_rows);\n        ret = tsk_mutation_table_truncate(&tables.mutations, 0);\n        check_tsk_error(ret);\n        ret = tsk_table_collection_dumpf(&tables, stdout, 0);\n        check_tsk_error(ret);\n        j++;\n    }\n    tsk_table_collection_free(&tables);\n    return EXIT_SUCCESS;\n}\n"
  },
  {
    "path": "c/examples/take_ownership.c",
    "content": "#include <err.h>\n#include <stdlib.h>\n#include <tskit/tables.h>\n#include <tskit/trees.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        errx(EXIT_FAILURE, \"line %d: %s\", __LINE__, tsk_strerror(val));                 \\\n    }\n\nint\nmain(int argc, char **argv)\n{\n    tsk_table_collection_t *tables;\n    tsk_treeseq_t treeseq;\n    int rv;\n\n    tables = malloc(sizeof(*tables));\n    rv = tsk_table_collection_init(tables, 0);\n    check_tsk_error(rv);\n\n    /* NOTE: you must set sequence length AFTER initialization */\n    tables->sequence_length = 1.0;\n\n    /* Do your regular table operations */\n    rv = tsk_node_table_add_row(&tables->nodes, 0, 0.0, -1, -1, NULL, 0);\n    check_tsk_error(rv);\n\n    /* Initalize the tree sequence, transferring all responsibility\n     * for the table collection's memory managment\n     */\n    rv = tsk_treeseq_init(\n        &treeseq, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TAKE_OWNERSHIP);\n    check_tsk_error(rv);\n\n    /* WARNING: calling tsk_table_collection_free is now a memory error! */\n    tsk_treeseq_free(&treeseq);\n}\n"
  },
  {
    "path": "c/examples/tree_iteration.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <err.h>\n\n#include <tskit.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        errx(EXIT_FAILURE, \"line %d: %s\", __LINE__, tsk_strerror(val));                 \\\n    }\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n\n    if (argc != 2) {\n        errx(EXIT_FAILURE, \"usage: <tree sequence file>\");\n    }\n    ret = tsk_treeseq_load(&ts, argv[1], 0);\n    check_tsk_error(ret);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    check_tsk_error(ret);\n\n    printf(\"Iterate forwards\\n\");\n    for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {\n        printf(\"\\ttree %lld has %lld roots\\n\", (long long) tree.index,\n            (long long) tsk_tree_get_num_roots(&tree));\n    }\n    check_tsk_error(ret);\n\n    printf(\"Iterate backwards\\n\");\n    for (ret = tsk_tree_last(&tree); ret == TSK_TREE_OK; ret = tsk_tree_prev(&tree)) {\n        printf(\"\\ttree %lld has %lld roots\\n\", (long long) tree.index,\n            (long long) tsk_tree_get_num_roots(&tree));\n    }\n    check_tsk_error(ret);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n    return 0;\n}\n"
  },
  {
    "path": "c/examples/tree_traversal.c",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <err.h>\n\n#include <tskit.h>\n\n#define check_tsk_error(val)                                                            \\\n    if (val < 0) {                                                                      \\\n        errx(EXIT_FAILURE, \"line %d: %s\", __LINE__, tsk_strerror(val));                 \\\n    }\n\nstatic void\ntraverse_standard(const tsk_tree_t *tree)\n{\n    int ret;\n    tsk_size_t num_nodes, j;\n    tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));\n\n    if (nodes == NULL) {\n        errx(EXIT_FAILURE, \"Out of memory\");\n    }\n    ret = tsk_tree_preorder(tree, nodes, &num_nodes);\n    check_tsk_error(ret);\n    for (j = 0; j < num_nodes; j++) {\n        printf(\"Visit preorder %lld\\n\", (long long) nodes[j]);\n    }\n\n    ret = tsk_tree_postorder(tree, nodes, &num_nodes);\n    check_tsk_error(ret);\n    for (j = 0; j < num_nodes; j++) {\n        printf(\"Visit postorder %lld\\n\", (long long) nodes[j]);\n    }\n\n    free(nodes);\n}\n\nstatic void\n_traverse(const tsk_tree_t *tree, tsk_id_t u, int depth)\n{\n    tsk_id_t v;\n    int j;\n\n    for (j = 0; j < depth; j++) {\n        printf(\"    \");\n    }\n    printf(\"Visit recursive %lld\\n\", (long long) u);\n    for (v = tree->left_child[u]; v != TSK_NULL; v = tree->right_sib[v]) {\n        _traverse(tree, v, depth + 1);\n    }\n}\n\nstatic void\ntraverse_recursive(const tsk_tree_t *tree)\n{\n    _traverse(tree, tree->virtual_root, -1);\n}\n\nstatic void\ntraverse_stack(const tsk_tree_t *tree)\n{\n    int stack_top;\n    tsk_id_t u, v;\n    tsk_id_t *stack = malloc(tsk_tree_get_size_bound(tree) * sizeof(*stack));\n\n    if (stack == NULL) {\n        errx(EXIT_FAILURE, \"Out of memory\");\n    }\n    stack_top = 0;\n    stack[stack_top] = tree->virtual_root;\n    while (stack_top >= 0) {\n        u = stack[stack_top];\n        stack_top--;\n        printf(\"Visit stack %lld\\n\", (long long) u);\n        /* Put nodes on the stack right-to-left, so we visit in left-to-right */\n        for (v = tree->right_child[u]; v != TSK_NULL; v = tree->left_sib[v]) {\n            stack_top++;\n            stack[stack_top] = v;\n        }\n    }\n    free(stack);\n}\n\nstatic void\ntraverse_upwards(const tsk_tree_t *tree)\n{\n    const tsk_id_t *samples = tsk_treeseq_get_samples(tree->tree_sequence);\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(tree->tree_sequence);\n    tsk_size_t j;\n    tsk_id_t u;\n\n    for (j = 0; j < num_samples; j++) {\n        u = samples[j];\n        while (u != TSK_NULL) {\n            printf(\"Visit upwards: %lld\\n\", (long long) u);\n            u = tree->parent[u];\n        }\n    }\n}\n\nint\nmain(int argc, char **argv)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n\n    if (argc != 2) {\n        errx(EXIT_FAILURE, \"usage: <tree sequence file>\");\n    }\n    ret = tsk_treeseq_load(&ts, argv[1], 0);\n    check_tsk_error(ret);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    check_tsk_error(ret);\n    ret = tsk_tree_first(&tree);\n    check_tsk_error(ret);\n\n    traverse_standard(&tree);\n\n    traverse_recursive(&tree);\n\n    traverse_stack(&tree);\n\n    traverse_upwards(&tree);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n    return 0;\n}\n"
  },
  {
    "path": "c/meson.build",
    "content": "project('tskit', ['c', 'cpp'],\n    version: files('VERSION.txt'),    \n    default_options: ['c_std=c99', 'cpp_std=c++11']\n)\n\ndebug_c_args = []\nif get_option('buildtype').startswith('debug')\n    debug_c_args = ['-DTSK_TRACE_ERRORS']\nendif\n\nkastore_proj = subproject('kastore')\nkastore_dep = kastore_proj.get_variable('kastore_dep')\nkastore_inc = kastore_proj.get_variable('kastore_inc')\n\ncc = meson.get_compiler('c')\nm_dep = cc.find_library('m', required: false)\nlib_deps = [m_dep, kastore_dep]\n\nextra_c_args = [\n    '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W',\n    '-Wmissing-prototypes',  '-Wstrict-prototypes',\n    '-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align',\n    '-Wcast-qual', '-Wwrite-strings', '-Wnested-externs',\n    '-fshort-enums', '-fno-common'] + debug_c_args\n\nlib_sources = [\n    'tskit/core.c', 'tskit/tables.c', 'tskit/trees.c',\n    'tskit/genotypes.c', 'tskit/stats.c', 'tskit/convert.c', 'tskit/haplotype_matching.c']\nlib_headers = [\n    'tskit/core.h', 'tskit/tables.h', 'tskit/trees.h',\n    'tskit/genotypes.h', 'tskit/stats.h', 'tskit/convert.h', 'tskit/haplotype_matching.h']\n\n# Subprojects use the static library for simplicity.\ntskit_inc = [kastore_inc, include_directories(['.'])]\ntskit_lib = static_library('tskit',\n    sources: lib_sources, dependencies: lib_deps)\ntskit_dep = declare_dependency(include_directories:tskit_inc, link_with: tskit_lib)\n\nif not meson.is_subproject()\n\n    # Shared library install target.\n    shared_library('tskit',\n        sources: lib_sources, dependencies: lib_deps, c_args: extra_c_args, install: true)\n    install_headers('tskit.h')\n    install_headers(lib_headers, subdir: 'tskit')\n\n    cunit_dep = dependency('cunit')\n    # We don't specify extra C args here as CUnit won't pass the checks.\n    test_lib = static_library('testlib',\n        sources: ['tests/testlib.c'], dependencies: [cunit_dep, kastore_dep, tskit_dep])\n\n    test_core = executable('test_core',\n        sources: ['tests/test_core.c'],\n        link_with: [tskit_lib, test_lib],\n        c_args: extra_c_args+['-DMESON_PROJECT_VERSION=\"@0@\"'.format(meson.project_version())],\n        dependencies: kastore_dep,\n        )\n    test('core', test_core)\n\n    test_tables = executable('test_tables',\n        sources: ['tests/test_tables.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('tables', test_tables)\n\n    test_trees = executable('test_trees',\n        sources: ['tests/test_trees.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('trees', test_trees)\n\n    test_genotypes = executable('test_genotypes',\n        sources: ['tests/test_genotypes.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('genotypes', test_genotypes)\n\n    test_convert = executable('test_convert',\n        sources: ['tests/test_convert.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('convert', test_convert)\n\n    test_stats = executable('test_stats',\n        sources: ['tests/test_stats.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('stats', test_stats)\n\n    test_haplotype_matching = executable('test_haplotype_matching',\n        sources: ['tests/test_haplotype_matching.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('haplotype_matching', test_haplotype_matching)\n\n    test_file_format = executable('test_file_format',\n        sources: ['tests/test_file_format.c'],\n        link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)\n    test('file_format', test_file_format)\n\n    test_minimal_cpp = executable('test_minimal_cpp',\n        sources: ['tests/test_minimal_cpp.cpp'], link_with: [tskit_lib],\n        dependencies: kastore_dep)\n    test('minimal_cpp', test_minimal_cpp)\n\n    if get_option('build_examples')\n      # These example programs use less portable features,\n      # and we don't want to always compile them. Use, e.g.,\n      # meson build -Dbuild_examples=false\n      executable('api_structure',\n          sources: ['examples/api_structure.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('error_handling',\n          sources: ['examples/error_handling.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('tree_iteration',\n          sources: ['examples/tree_iteration.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('tree_traversal',\n          sources: ['examples/tree_traversal.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('streaming',\n          sources: ['examples/streaming.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('cpp_sorting_example',\n          sources: ['examples/cpp_sorting_example.cpp'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('haploid_wright_fisher',\n          sources: ['examples/haploid_wright_fisher.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('multichrom_wright_fisher_singlethreaded',\n          sources: ['examples/multichrom_wright_fisher_singlethreaded.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n      executable('json_struct_metadata',\n          sources: ['examples/json_struct_metadata.c'], \n          link_with: [tskit_lib], dependencies: lib_deps)\n\n      thread_dep = dependency('threads')\n      executable('multichrom_wright_fisher',\n          sources: ['examples/multichrom_wright_fisher.c'], \n          link_with: [tskit_lib], dependencies: [m_dep, kastore_dep, thread_dep])\n    endif\nendif\n"
  },
  {
    "path": "c/meson_options.txt",
    "content": "option('build_examples', type : 'boolean', value : true)\n"
  },
  {
    "path": "c/subprojects/kastore/README.md",
    "content": "This directory is an abbreviated version of the kastore distribution source.\n\nAll files should be updated when we are updating to a new kastore version.\n"
  },
  {
    "path": "c/subprojects/kastore/VERSION.txt",
    "content": "2.1.2\n"
  },
  {
    "path": "c/subprojects/kastore/kastore.c",
    "content": "#include <stdio.h>\n#include <string.h>\n#include <stdlib.h>\n#include <assert.h>\n#include <errno.h>\n#include <stdbool.h>\n\n#include \"kastore.h\"\n\n/* Private flag used to indicate when we have opened the file ourselves\n * and need to free it. */\n/* Note: we use 1<<14 to keep this flag at the end of the flag space,\n * and this is the highest bit that can be guaranteed to fit into\n * an int. */\n#define OWN_FILE (1 << 14)\n\nconst char *\nkas_strerror(int err)\n{\n    const char *ret = \"Unknown error\";\n\n    switch (err) {\n        case KAS_ERR_GENERIC:\n            ret = \"Generic error; please file a bug report\";\n            break;\n        case KAS_ERR_IO:\n            if (errno != 0) {\n                ret = strerror(errno);\n            } else {\n                ret = \"I/O error with errno unset. Please file a bug report\";\n            }\n            break;\n        case KAS_ERR_BAD_MODE:\n            ret = \"Bad open mode; must be \\\"r\\\", \\\"w\\\", or \\\"a\\\"\";\n            break;\n        case KAS_ERR_BAD_FLAGS:\n            ret = \"Unknown flags specified. Only (KAS_GET_TAKES_OWNERSHIP and/or\"\n                  \"KAS_READ_ALL) or 0 can be specified \"\n                  \"for open, and KAS_BORROWS_ARRAY or 0 for put\";\n            break;\n        case KAS_ERR_NO_MEMORY:\n            ret = \"Out of memory\";\n            break;\n        case KAS_ERR_BAD_FILE_FORMAT:\n            ret = \"File not in KAS format\";\n            break;\n        case KAS_ERR_VERSION_TOO_OLD:\n            ret = \"File format version is too old. Please upgrade using \"\n                  \"'kas upgrade <filename>'\";\n            break;\n        case KAS_ERR_VERSION_TOO_NEW:\n            ret = \"File format version is too new. Please upgrade your \"\n                  \"kastore library version\";\n            break;\n        case KAS_ERR_BAD_TYPE:\n            ret = \"Unknown data type\";\n            break;\n        case KAS_ERR_DUPLICATE_KEY:\n            ret = \"Duplicate key provided\";\n            break;\n        case KAS_ERR_KEY_NOT_FOUND:\n            ret = \"Key not found\";\n            break;\n        case KAS_ERR_EMPTY_KEY:\n            ret = \"Keys cannot be empty\";\n            break;\n        case KAS_ERR_ILLEGAL_OPERATION:\n            ret = \"Cannot perform the requested operation in the current mode\";\n            break;\n        case KAS_ERR_TYPE_MISMATCH:\n            ret = \"Mismatch between requested and stored types for array\";\n            break;\n        case KAS_ERR_EOF:\n            ret = \"End of file\";\n            break;\n    }\n    return ret;\n}\n\nkas_version_t\nkas_version(void)\n{\n    kas_version_t version;\n\n    version.major = KAS_VERSION_MAJOR;\n    version.minor = KAS_VERSION_MINOR;\n    version.patch = KAS_VERSION_PATCH;\n    return version;\n}\n\nstatic size_t\ntype_size(int type)\n{\n    const size_t type_size_map[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };\n    assert(type < KAS_NUM_TYPES);\n    return type_size_map[type];\n}\n\n/* Compare item keys lexicographically. */\nstatic int\ncompare_items(const void *a, const void *b)\n{\n    const kaitem_t *ia = (const kaitem_t *) a;\n    const kaitem_t *ib = (const kaitem_t *) b;\n    size_t len = ia->key_len < ib->key_len ? ia->key_len : ib->key_len;\n    int ret = memcmp(ia->key, ib->key, len);\n    if (ret == 0) {\n        ret = (ia->key_len > ib->key_len) - (ia->key_len < ib->key_len);\n    }\n    return ret;\n}\n\n/* When a read error occurs we don't know whether this is because the file\n * ended unexpectedly or an IO error occured. If the file ends unexpectedly\n * this is a file format error.\n */\nstatic int KAS_WARN_UNUSED\nkastore_get_read_io_error(kastore_t *self)\n{\n    int ret = KAS_ERR_IO;\n\n    if (feof(self->file) || errno == 0) {\n        ret = KAS_ERR_BAD_FILE_FORMAT;\n    }\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_write_header(kastore_t *self)\n{\n    int ret = 0;\n    char header[KAS_HEADER_SIZE];\n    uint16_t version_major = KAS_FILE_VERSION_MAJOR;\n    uint16_t version_minor = KAS_FILE_VERSION_MINOR;\n    uint32_t num_items = (uint32_t) self->num_items;\n    uint64_t file_size = (uint64_t) self->file_size;\n\n    memset(header, 0, sizeof(header));\n    memcpy(header, KAS_MAGIC, 8);\n    memcpy(header + 8, &version_major, 2);\n    memcpy(header + 10, &version_minor, 2);\n    memcpy(header + 12, &num_items, 4);\n    memcpy(header + 16, &file_size, 8);\n    /* Rest of header is reserved */\n    if (fwrite(header, KAS_HEADER_SIZE, 1, self->file) != 1) {\n        ret = KAS_ERR_IO;\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_read_header(kastore_t *self)\n{\n    int ret = 0;\n    char header[KAS_HEADER_SIZE];\n    uint16_t version_major, version_minor;\n    uint32_t num_items;\n    uint64_t file_size;\n    size_t count;\n\n    count = fread(header, 1, KAS_HEADER_SIZE, self->file);\n    if (count == 0 && feof(self->file)) {\n        ret = KAS_ERR_EOF;\n        goto out;\n    } else if (count != KAS_HEADER_SIZE) {\n        ret = kastore_get_read_io_error(self);\n        goto out;\n    }\n    if (strncmp(header, KAS_MAGIC, 8) != 0) {\n        ret = KAS_ERR_BAD_FILE_FORMAT;\n        goto out;\n    }\n    memcpy(&version_major, header + 8, 2);\n    memcpy(&version_minor, header + 10, 2);\n    memcpy(&num_items, header + 12, 4);\n    memcpy(&file_size, header + 16, 8);\n    self->file_version[0] = (int) version_major;\n    self->file_version[1] = (int) version_minor;\n    if (self->file_version[0] < KAS_FILE_VERSION_MAJOR) {\n        ret = KAS_ERR_VERSION_TOO_OLD;\n        goto out;\n    } else if (self->file_version[0] > KAS_FILE_VERSION_MAJOR) {\n        ret = KAS_ERR_VERSION_TOO_NEW;\n        goto out;\n    }\n    self->num_items = num_items;\n    self->file_size = (size_t) file_size;\n    if (self->file_size < KAS_HEADER_SIZE) {\n        ret = KAS_ERR_BAD_FILE_FORMAT;\n        goto out;\n    }\nout:\n    return ret;\n}\n\n/* Compute the locations of the keys and arrays in the file. */\nstatic void\nkastore_pack_items(kastore_t *self)\n{\n    size_t j, offset, remainder;\n\n    /* Pack the keys */\n    offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;\n    for (j = 0; j < self->num_items; j++) {\n        self->items[j].key_start = offset;\n        offset += self->items[j].key_len;\n    }\n    /* Pack the arrays */\n    for (j = 0; j < self->num_items; j++) {\n        remainder = offset % KAS_ARRAY_ALIGN;\n        if (remainder != 0) {\n            offset += KAS_ARRAY_ALIGN - remainder;\n        }\n        self->items[j].array_start = offset;\n        offset += self->items[j].array_len * type_size(self->items[j].type);\n    }\n    self->file_size = offset;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_write_descriptors(kastore_t *self)\n{\n    int ret = 0;\n    size_t j;\n    uint8_t type;\n    uint64_t key_start, key_len, array_start, array_len;\n    char descriptor[KAS_ITEM_DESCRIPTOR_SIZE];\n\n    for (j = 0; j < self->num_items; j++) {\n        memset(descriptor, 0, KAS_ITEM_DESCRIPTOR_SIZE);\n        type = (uint8_t) self->items[j].type;\n        key_start = (uint64_t) self->items[j].key_start;\n        key_len = (uint64_t) self->items[j].key_len;\n        array_start = (uint64_t) self->items[j].array_start;\n        array_len = (uint64_t) self->items[j].array_len;\n        memcpy(descriptor, &type, 1);\n        /* Bytes 1-8 are reserved */\n        memcpy(descriptor + 8, &key_start, 8);\n        memcpy(descriptor + 16, &key_len, 8);\n        memcpy(descriptor + 24, &array_start, 8);\n        memcpy(descriptor + 32, &array_len, 8);\n        /* Rest of descriptor is reserved */\n        if (fwrite(descriptor, sizeof(descriptor), 1, self->file) != 1) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_read_descriptors(kastore_t *self)\n{\n    int ret = KAS_ERR_BAD_FILE_FORMAT;\n    size_t j;\n    uint8_t type;\n    uint64_t key_start, key_len, array_start, array_len;\n    char *descriptor;\n    size_t descriptor_offset, offset, remainder, size, count;\n    char *read_buffer = NULL;\n\n    size = self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;\n    if (size + KAS_HEADER_SIZE > self->file_size) {\n        goto out;\n    }\n    read_buffer = (char *) malloc(size);\n    if (read_buffer == NULL) {\n        ret = KAS_ERR_NO_MEMORY;\n        goto out;\n    }\n    count = fread(read_buffer, size, 1, self->file);\n    if (count == 0) {\n        ret = kastore_get_read_io_error(self);\n        goto out;\n    }\n\n    descriptor_offset = 0;\n    for (j = 0; j < self->num_items; j++) {\n        descriptor = read_buffer + descriptor_offset;\n        descriptor_offset += KAS_ITEM_DESCRIPTOR_SIZE;\n        memcpy(&type, descriptor, 1);\n        memcpy(&key_start, descriptor + 8, 8);\n        memcpy(&key_len, descriptor + 16, 8);\n        memcpy(&array_start, descriptor + 24, 8);\n        memcpy(&array_len, descriptor + 32, 8);\n\n        if (type >= KAS_NUM_TYPES) {\n            ret = KAS_ERR_BAD_TYPE;\n            goto out;\n        }\n        self->items[j].type = (int) type;\n        if (key_start + key_len > self->file_size) {\n            goto out;\n        }\n        self->items[j].key_start = (size_t) key_start;\n        self->items[j].key_len = (size_t) key_len;\n        if (array_start + array_len * type_size(type) > self->file_size) {\n            goto out;\n        }\n        self->items[j].array_start = (size_t) array_start;\n        self->items[j].array_len = (size_t) array_len;\n    }\n\n    /* Check the integrity of the key and array packing. Keys must\n     * be packed sequentially starting immediately after the descriptors. */\n    offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;\n    for (j = 0; j < self->num_items; j++) {\n        if (self->items[j].key_start != offset) {\n            ret = KAS_ERR_BAD_FILE_FORMAT;\n            goto out;\n        }\n        offset += self->items[j].key_len;\n    }\n    for (j = 0; j < self->num_items; j++) {\n        /* Arrays are 8 byte aligned and adjacent */\n        remainder = offset % KAS_ARRAY_ALIGN;\n        if (remainder != 0) {\n            offset += KAS_ARRAY_ALIGN - remainder;\n        }\n        if (self->items[j].array_start != offset) {\n            ret = KAS_ERR_BAD_FILE_FORMAT;\n            goto out;\n        }\n        offset += self->items[j].array_len * type_size(self->items[j].type);\n    }\n    if (offset != self->file_size) {\n        ret = KAS_ERR_BAD_FILE_FORMAT;\n        goto out;\n    }\n    ret = 0;\nout:\n    kas_safe_free(read_buffer);\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_write_data(kastore_t *self)\n{\n    int ret = 0;\n    size_t j, size, offset, padding;\n    char pad[KAS_ARRAY_ALIGN] = { 0, 0, 0, 0, 0, 0, 0 };\n    const void *write_array;\n\n    offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;\n\n    /* Write the keys. */\n    for (j = 0; j < self->num_items; j++) {\n        assert(offset == self->items[j].key_start);\n        if (fwrite(self->items[j].key, self->items[j].key_len, 1, self->file) != 1) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n        offset += self->items[j].key_len;\n    }\n    /* Write the arrays. */\n    for (j = 0; j < self->num_items; j++) {\n        padding = self->items[j].array_start - offset;\n        assert(padding < KAS_ARRAY_ALIGN);\n        if (padding > 0 && fwrite(pad, padding, 1, self->file) != 1) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n        size = self->items[j].array_len * type_size(self->items[j].type);\n        write_array = self->items[j].borrowed_array != NULL\n                          ? self->items[j].borrowed_array\n                          : self->items[j].array;\n        assert(write_array != NULL);\n        if (size > 0 && fwrite(write_array, size, 1, self->file) != 1) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n        offset = self->items[j].array_start + size;\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_read_file(kastore_t *self)\n{\n    int ret = 0;\n    size_t count, size, offset, j;\n    bool read_all = !!(self->flags & KAS_READ_ALL);\n\n    offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;\n\n    /* Read in up to the start of first array. This will contain all the keys. */\n    size = self->items[0].array_start;\n\n    assert(size > offset);\n    size -= offset;\n\n    self->key_read_buffer = (char *) malloc(size);\n    if (self->key_read_buffer == NULL) {\n        ret = KAS_ERR_NO_MEMORY;\n        goto out;\n    }\n    count = fread(self->key_read_buffer, size, 1, self->file);\n    if (count == 0) {\n        ret = kastore_get_read_io_error(self);\n        goto out;\n    }\n    /* Assign the pointers for the keys and arrays */\n    for (j = 0; j < self->num_items; j++) {\n        /* keys are already loaded in the read buffer */\n        self->items[j].key = self->key_read_buffer + self->items[j].key_start - offset;\n        if (read_all) {\n            if (j == self->num_items - 1) {\n                size = self->file_size - self->items[j].array_start;\n            } else {\n                size = self->items[j + 1].array_start - self->items[j].array_start;\n            }\n            self->items[j].array = (char *) malloc(size == 0 ? 1 : size);\n            if (self->items[j].array == NULL) {\n                ret = KAS_ERR_NO_MEMORY;\n                goto out;\n            }\n            if (size > 0) {\n                count = fread(self->items[j].array, size, 1, self->file);\n                if (count == 0) {\n                    ret = kastore_get_read_io_error(self);\n                    goto out;\n                }\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_read_item(kastore_t *self, kaitem_t *item)\n{\n    int ret = 0;\n    int err;\n    size_t size = item->array_len * type_size(item->type);\n    size_t count;\n\n    item->array = malloc(size == 0 ? 1 : size);\n    if (item->array == NULL) {\n        ret = KAS_ERR_NO_MEMORY;\n        goto out;\n    }\n    if (size > 0) {\n        err = fseek(self->file, self->file_offset + (long) item->array_start, SEEK_SET);\n        if (err != 0) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n        count = fread(item->array, size, 1, self->file);\n        if (count == 0) {\n            ret = kastore_get_read_io_error(self);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_write_file(kastore_t *self)\n{\n    int ret = 0;\n\n    qsort(self->items, self->num_items, sizeof(kaitem_t), compare_items);\n    kastore_pack_items(self);\n    ret = kastore_write_header(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = kastore_write_descriptors(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = kastore_write_data(self);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_read(kastore_t *self)\n{\n    int ret = 0;\n\n    if (!(self->flags & KAS_READ_ALL)) {\n        /* Record the current file offset, in case this is a multi-store file,\n         * so that we can seek to the correct location in kastore_read_item().\n         */\n        self->file_offset = ftell(self->file);\n        if (self->file_offset == -1) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n    }\n    ret = kastore_read_header(self);\n    if (ret != 0) {\n        goto out;\n    }\n    if (self->num_items > 0) {\n        self->items = (kaitem_t *) calloc(self->num_items, sizeof(*self->items));\n        if (self->items == NULL) {\n            ret = KAS_ERR_NO_MEMORY;\n            goto out;\n        }\n        ret = kastore_read_descriptors(self);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = kastore_read_file(self);\n        if (ret != 0) {\n            goto out;\n        }\n    } else if (self->file_size != KAS_HEADER_SIZE) {\n        ret = KAS_ERR_BAD_FILE_FORMAT;\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_insert_all(kastore_t *self, kastore_t *other)\n{\n    size_t j;\n    int ret = 0;\n    kaitem_t item;\n\n    for (j = 0; j < other->num_items; j++) {\n        item = other->items[j];\n        ret = kastore_put(\n            self, item.key, item.key_len, item.array, item.array_len, item.type, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_open(kastore_t *self, const char *filename, const char *mode, int flags)\n{\n    int ret = 0;\n    const char *file_mode;\n    bool appending = false;\n    kastore_t tmp;\n    FILE *file;\n    int err;\n\n    memset(self, 0, sizeof(*self));\n    memset(&tmp, 0, sizeof(tmp));\n    if (strlen(mode) != 1) {\n        ret = KAS_ERR_BAD_MODE;\n        goto out;\n    }\n    if (strncmp(mode, \"r\", 1) == 0) {\n        file_mode = \"rb\";\n    } else if (strncmp(mode, \"w\", 1) == 0) {\n        file_mode = \"wb\";\n    } else if (strncmp(mode, \"a\", 1) == 0) {\n        mode = \"w\";\n        file_mode = \"wb\";\n        appending = true;\n    } else {\n        ret = KAS_ERR_BAD_MODE;\n        goto out;\n    }\n    if (appending) {\n        ret = kastore_open(&tmp, filename, \"r\", KAS_READ_ALL);\n        if (ret != 0) {\n            goto out;\n        }\n        /* tmp will now have read all of the data into memory. We can now\n         * close its file. We have to do this for Windows. */\n        err = fclose(tmp.file);\n        tmp.file = NULL;\n        if (err != 0) {\n            ret = KAS_ERR_IO;\n            goto out;\n        }\n    }\n    file = fopen(filename, file_mode);\n    if (file == NULL) {\n        ret = KAS_ERR_IO;\n        goto out;\n    }\n    ret = kastore_openf(self, file, mode, flags);\n    if (ret != 0) {\n        (void) fclose(file);\n    } else {\n        self->flags |= OWN_FILE;\n        if (appending) {\n            ret = kastore_insert_all(self, &tmp);\n        }\n    }\nout:\n    if (appending) {\n        kastore_close(&tmp);\n    }\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_openf(kastore_t *self, FILE *file, const char *mode, int flags)\n{\n    int ret = 0;\n\n    memset(self, 0, sizeof(*self));\n    if (strlen(mode) != 1) {\n        ret = KAS_ERR_BAD_MODE;\n        goto out;\n    }\n    if (strncmp(mode, \"r\", 1) == 0) {\n        self->mode = KAS_READ;\n    } else if (strncmp(mode, \"w\", 1) == 0) {\n        self->mode = KAS_WRITE;\n    } else {\n        ret = KAS_ERR_BAD_MODE;\n        goto out;\n    }\n\n    if (flags > (KAS_READ_ALL | KAS_GET_TAKES_OWNERSHIP) || flags < 0) {\n        ret = KAS_ERR_BAD_FLAGS;\n        goto out;\n    }\n\n    self->flags = flags;\n    self->file = file;\n    if (self->mode == KAS_READ) {\n        ret = kastore_read(self);\n    }\nout:\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_close(kastore_t *self)\n{\n    int ret = 0;\n    int err;\n    size_t j;\n\n    if (self->mode == KAS_WRITE) {\n        if (self->file != NULL) {\n            ret = kastore_write_file(self);\n            if (ret != 0) {\n                /* Ignore errors on close now */\n                if (self->flags & OWN_FILE) {\n                    fclose(self->file);\n                }\n                self->file = NULL;\n            }\n        }\n        if (self->items != NULL) {\n            /* We only alloc memory for the keys and arrays in write mode */\n            for (j = 0; j < self->num_items; j++) {\n                kas_safe_free(self->items[j].key);\n                kas_safe_free(self->items[j].array);\n            }\n        }\n    } else {\n        kas_safe_free(self->key_read_buffer);\n        if (self->items != NULL) {\n            for (j = 0; j < self->num_items; j++) {\n                kas_safe_free(self->items[j].array);\n            }\n        }\n    }\n    kas_safe_free(self->items);\n    if (self->file != NULL && (self->flags & OWN_FILE)) {\n        err = fclose(self->file);\n        if (err != 0) {\n            ret = KAS_ERR_IO;\n        }\n    }\n    memset(self, 0, sizeof(*self));\n    return ret;\n}\n\nstatic int\nkastore_find_item(kastore_t *self, const char *key, size_t key_len, kaitem_t **item)\n{\n    int ret = KAS_ERR_KEY_NOT_FOUND;\n    kaitem_t search;\n    search.key = (char *) malloc(key_len);\n    search.key_len = key_len;\n\n    if (self->mode != KAS_READ) {\n        ret = KAS_ERR_ILLEGAL_OPERATION;\n        goto out;\n    }\n    if (search.key == NULL) {\n        ret = KAS_ERR_NO_MEMORY;\n        goto out;\n    }\n    memcpy(search.key, key, key_len);\n    *item = bsearch(\n        &search, self->items, self->num_items, sizeof(kaitem_t), compare_items);\n    if (*item == NULL) {\n        goto out;\n    }\n    ret = 0;\nout:\n    kas_safe_free(search.key);\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_contains(kastore_t *self, const char *key, size_t key_len)\n{\n    kaitem_t *item;\n    int ret = kastore_find_item(self, key, key_len, &item);\n    if (ret == 0) {\n        ret = 1;\n    } else if (ret == KAS_ERR_KEY_NOT_FOUND) {\n        ret = 0;\n    }\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_containss(kastore_t *self, const char *key)\n{\n    return kastore_contains(self, key, strlen(key));\n}\n\nint KAS_WARN_UNUSED\nkastore_get(kastore_t *self, const char *key, size_t key_len, void **array,\n    size_t *array_len, int *type)\n{\n    kaitem_t *item;\n    int ret = kastore_find_item(self, key, key_len, &item);\n    if (ret != 0) {\n        goto out;\n    }\n    if (item->array == NULL) {\n        ret = kastore_read_item(self, item);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    *array = item->array;\n    *array_len = item->array_len;\n    *type = item->type;\n    if (self->flags & KAS_GET_TAKES_OWNERSHIP) {\n        item->array = NULL;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_gets(\n    kastore_t *self, const char *key, void **array, size_t *array_len, int *type)\n{\n    return kastore_get(self, key, strlen(key), array, array_len, type);\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_gets_type(\n    kastore_t *self, const char *key, void **array, size_t *array_len, int type)\n{\n    int loaded_type = -1;\n    int ret;\n\n    ret = kastore_get(self, key, strlen(key), array, array_len, &loaded_type);\n    if (ret != 0) {\n        goto out;\n    }\n    if (type != loaded_type) {\n        ret = KAS_ERR_TYPE_MISMATCH;\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_int8(kastore_t *self, const char *key, int8_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT8);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_uint8(kastore_t *self, const char *key, uint8_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT8);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_int16(kastore_t *self, const char *key, int16_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT16);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_uint16(\n    kastore_t *self, const char *key, uint16_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT16);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_int32(kastore_t *self, const char *key, int32_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT32);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_uint32(\n    kastore_t *self, const char *key, uint32_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT32);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_int64(kastore_t *self, const char *key, int64_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT64);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_uint64(\n    kastore_t *self, const char *key, uint64_t **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT64);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_float32(kastore_t *self, const char *key, float **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_FLOAT32);\n}\n\nint KAS_WARN_UNUSED\nkastore_gets_float64(kastore_t *self, const char *key, double **array, size_t *array_len)\n{\n    return kastore_gets_type(self, key, (void **) array, array_len, KAS_FLOAT64);\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_put_item(kastore_t *self, kaitem_t **ret_item, const char *key, size_t key_len,\n    int type, int KAS_UNUSED(flags))\n{\n    int ret = 0;\n    kaitem_t *new_item;\n    kaitem_t *p;\n    size_t j;\n\n    if (self->mode != KAS_WRITE) {\n        ret = KAS_ERR_ILLEGAL_OPERATION;\n        goto out;\n    }\n    if (type < 0 || type >= KAS_NUM_TYPES) {\n        ret = KAS_ERR_BAD_TYPE;\n        goto out;\n    }\n    if (key_len == 0) {\n        ret = KAS_ERR_EMPTY_KEY;\n        goto out;\n    }\n    /* This isn't terribly efficient, but we're not expecting large\n     * numbers of items. */\n    p = (kaitem_t *) realloc(self->items, (self->num_items + 1) * sizeof(*self->items));\n    if (p == NULL) {\n        ret = KAS_ERR_NO_MEMORY;\n        goto out;\n    }\n    self->items = p;\n    new_item = self->items + self->num_items;\n\n    memset(new_item, 0, sizeof(*new_item));\n    new_item->type = type;\n    new_item->key_len = key_len;\n    new_item->key = (char *) malloc(key_len);\n    if (new_item->key == NULL) {\n        kas_safe_free(new_item->key);\n        ret = KAS_ERR_NO_MEMORY;\n        goto out;\n    }\n    self->num_items++;\n    memcpy(new_item->key, key, key_len);\n\n    /* Check if this key is already in here. OK, this is a quadratic time\n     * algorithm, but we're not expecting to have lots of items (< 100). In\n     * this case, the simple algorithm is probably better. If/when we ever\n     * deal with more items than this, then we will need a better algorithm.\n     */\n    for (j = 0; j < self->num_items - 1; j++) {\n        if (compare_items(new_item, self->items + j) == 0) {\n            /* Free the key memory and remove this item */\n            self->num_items--;\n            kas_safe_free(new_item->key);\n            ret = KAS_ERR_DUPLICATE_KEY;\n            goto out;\n        }\n    }\n    *ret_item = new_item;\nout:\n    return ret;\n}\n\nstatic int KAS_WARN_UNUSED\nkastore_bput(kastore_t *self, const char *key, size_t key_len, const void *array,\n    size_t array_len, int type, int flags)\n{\n    int ret = 0;\n    kaitem_t *item;\n    ret = kastore_put_item(self, &item, key, key_len, type, flags);\n    if (ret != 0) {\n        goto out;\n    }\n    if (array == NULL) {\n        /* Both can't be null, so assign a dummy array */\n        item->array = malloc(1);\n    } else {\n        item->borrowed_array = array;\n    }\n    item->borrowed_array = array;\n    item->array_len = array_len;\nout:\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_put(kastore_t *self, const char *key, size_t key_len, const void *array,\n    size_t array_len, int type, int flags)\n{\n    int ret;\n    size_t array_size;\n    void *array_copy = NULL;\n\n    if (flags != KAS_BORROWS_ARRAY && flags != 0) {\n        ret = KAS_ERR_BAD_FLAGS;\n        goto out;\n    }\n\n    if (type < 0 || type >= KAS_NUM_TYPES) {\n        ret = KAS_ERR_BAD_TYPE;\n        goto out;\n    }\n    if (flags & KAS_BORROWS_ARRAY) {\n        ret = kastore_bput(self, key, key_len, array, array_len, type, flags);\n    } else {\n        array_size = type_size(type) * array_len;\n        array_copy = malloc(array_size == 0 ? 1 : array_size);\n        if (array_copy == NULL) {\n            ret = KAS_ERR_NO_MEMORY;\n            goto out;\n        }\n        memcpy(array_copy, array, array_size);\n        ret = kastore_oput(self, key, key_len, array_copy, array_len, type, flags);\n        if (ret == 0) {\n            /* Kastore has taken ownership of the array, so we don't need to free it */\n            array_copy = NULL;\n        }\n    }\nout:\n    kas_safe_free(array_copy);\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_oput(kastore_t *self, const char *key, size_t key_len, void *array,\n    size_t array_len, int type, int flags)\n{\n    int ret = 0;\n    kaitem_t *item;\n\n    if (flags != 0) {\n        ret = KAS_ERR_BAD_FLAGS;\n        goto out;\n    }\n\n    ret = kastore_put_item(self, &item, key, key_len, type, flags);\n    if (ret != 0) {\n        goto out;\n    }\n    item->array = array;\n    item->array_len = array_len;\nout:\n    return ret;\n}\n\nint KAS_WARN_UNUSED\nkastore_puts(kastore_t *self, const char *key, const void *array, size_t array_len,\n    int type, int flags)\n{\n    return kastore_put(self, key, strlen(key), array, array_len, type, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_int8(\n    kastore_t *self, const char *key, const int8_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_INT8, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_uint8(\n    kastore_t *self, const char *key, const uint8_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT8, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_int16(\n    kastore_t *self, const char *key, const int16_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_INT16, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_uint16(\n    kastore_t *self, const char *key, const uint16_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT16, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_int32(\n    kastore_t *self, const char *key, const int32_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_INT32, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_uint32(\n    kastore_t *self, const char *key, const uint32_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT32, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_int64(\n    kastore_t *self, const char *key, const int64_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_INT64, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_uint64(\n    kastore_t *self, const char *key, const uint64_t *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT64, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_float32(\n    kastore_t *self, const char *key, const float *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_FLOAT32, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_puts_float64(\n    kastore_t *self, const char *key, const double *array, size_t array_len, int flags)\n{\n    return kastore_puts(self, key, (const void *) array, array_len, KAS_FLOAT64, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs(\n    kastore_t *self, const char *key, void *array, size_t array_len, int type, int flags)\n{\n    return kastore_oput(self, key, strlen(key), array, array_len, type, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_int8(\n    kastore_t *self, const char *key, int8_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_INT8, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_uint8(\n    kastore_t *self, const char *key, uint8_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT8, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_int16(\n    kastore_t *self, const char *key, int16_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_INT16, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_uint16(\n    kastore_t *self, const char *key, uint16_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT16, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_int32(\n    kastore_t *self, const char *key, int32_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_INT32, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_uint32(\n    kastore_t *self, const char *key, uint32_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT32, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_int64(\n    kastore_t *self, const char *key, int64_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_INT64, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_uint64(\n    kastore_t *self, const char *key, uint64_t *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT64, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_float32(\n    kastore_t *self, const char *key, float *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_FLOAT32, flags);\n}\n\nint KAS_WARN_UNUSED\nkastore_oputs_float64(\n    kastore_t *self, const char *key, double *array, size_t array_len, int flags)\n{\n    return kastore_oputs(self, key, (void *) array, array_len, KAS_FLOAT64, flags);\n}\n\nvoid\nkastore_print_state(kastore_t *self, FILE *out)\n{\n    kaitem_t *item;\n    size_t j;\n\n    fprintf(out, \"============================\\n\");\n    fprintf(out, \"kastore state\\n\");\n    fprintf(out, \"file_version = %d.%d\\n\", self->file_version[0], self->file_version[1]);\n    fprintf(out, \"mode  = %d\\n\", self->mode);\n    fprintf(out, \"flags = %d\\n\", self->flags);\n    fprintf(out, \"num_items = %zu\\n\", self->num_items);\n    fprintf(out, \"file_size = %zu\\n\", self->file_size);\n    fprintf(out, \"own_file  = %d\\n\", !!(self->flags & OWN_FILE));\n    fprintf(out, \"file = '%p'\\n\", (void *) self->file);\n    fprintf(out, \"============================\\n\");\n    for (j = 0; j < self->num_items; j++) {\n        item = self->items + j;\n        fprintf(out,\n            \"%.*s: type=%d, key_start=%zu, key_len=%zu, key=%p, \"\n            \"array_start=%zu, array_len=%zu, array=%p\\n\",\n            (int) item->key_len, item->key, item->type, item->key_start, item->key_len,\n            (void *) item->key, item->array_start, item->array_len,\n            (void *) item->array);\n    }\n    fprintf(out, \"============================\\n\");\n}\n"
  },
  {
    "path": "c/subprojects/kastore/kastore.h",
    "content": "/**\n * @file kastore.h\n * @brief Public API for kastore.\n *\n * This is the API documentation for kastore.\n */\n#ifndef KASTORE_H\n#define KASTORE_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#ifdef __GNUC__\n#define KAS_WARN_UNUSED __attribute__((warn_unused_result))\n#define KAS_UNUSED(x) KAS_UNUSED_##x __attribute__((__unused__))\n#else\n#define KAS_WARN_UNUSED\n#define KAS_UNUSED(x) KAS_UNUSED_##x\n#endif\n\n#include <stdbool.h>\n#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n\n/**\n@defgroup ERROR_GROUP Error return values.\n@{\n*/\n// clang-format off\n/**\nGeneric error thrown when no other message can be generated.\n*/\n#define KAS_ERR_GENERIC                               -1\n/**\nAn error occured during IO.\n*/\n#define KAS_ERR_IO                                    -2\n/**\nAn unrecognised mode string was passed to open().\n*/\n#define KAS_ERR_BAD_MODE                              -3\n/**\nOut-of-memory condition.\n*/\n#define KAS_ERR_NO_MEMORY                             -4\n/**\nAttempt to read an unknown file format.\n*/\n#define KAS_ERR_BAD_FILE_FORMAT                       -5\n/**\nThe file is in kastore format, but the version is too old for this\nversion of the library to read.\n*/\n#define KAS_ERR_VERSION_TOO_OLD                       -6\n/**\nThe file is in kastore format, but the version is too new for this\nversion of the library to read.\n*/\n#define KAS_ERR_VERSION_TOO_NEW                       -7\n/**\nAn unknown type key was specified.\n*/\n#define KAS_ERR_BAD_TYPE                              -8\n/**\nA zero-length key was specified.\n*/\n#define KAS_ERR_EMPTY_KEY                             -9\n/**\nA duplicate key was specified.\n*/\n#define KAS_ERR_DUPLICATE_KEY                         -10\n/**\nThe requested key does not exist in the store.\n*/\n#define KAS_ERR_KEY_NOT_FOUND                         -11\n/**\nThe requestion function cannot be called in the current mode.\n*/\n#define KAS_ERR_ILLEGAL_OPERATION                     -12\n/**\nThe requested type does not match the type of the stored values.\n*/\n#define KAS_ERR_TYPE_MISMATCH                         -13\n/**\nEnd of file was reached while reading data.\n*/\n#define KAS_ERR_EOF                                   -14\n/**\nUnknown flags were provided to open.\n*/\n#define KAS_ERR_BAD_FLAGS                             -15\n/** @} */\n\n/* Flags for open */\n#define KAS_READ_ALL                       (1 << 0)\n#define KAS_GET_TAKES_OWNERSHIP            (1 << 1)\n\n/* Flags for put */\n#define KAS_BORROWS_ARRAY          (1 << 8)\n\n\n/**\n@defgroup TYPE_GROUP Data types.\n@{\n*/\n#define KAS_INT8                0\n#define KAS_UINT8               1\n#define KAS_INT16               2\n#define KAS_UINT16              3\n#define KAS_INT32               4\n#define KAS_UINT32              5\n#define KAS_INT64               6\n#define KAS_UINT64              7\n#define KAS_FLOAT32             8\n#define KAS_FLOAT64             9\n/** @} */\n\n#define KAS_NUM_TYPES           10\n\n#define KAS_READ                1\n#define KAS_WRITE               2\n\n/**\n@defgroup FILE_VERSION_GROUP File version macros.\n@{\n*/\n/**\nThe file version major number. Incremented when any breaking changes are made\nto the file format.\n*/\n#define KAS_FILE_VERSION_MAJOR  1\n/**\nThe file version minor number. Incremented when non-breaking backward-compatible\nchanges are made to the file format.\n*/\n#define KAS_FILE_VERSION_MINOR  0\n/** @} */\n\n/**\n@defgroup API_VERSION_GROUP API version macros.\n@{\n*/\n/**\nThe library major version. Incremented when breaking changes to the API or ABI are\nintroduced. This includes any changes to the signatures of functions and the\nsizes and types of externally visible structs.\n*/\n#define KAS_VERSION_MAJOR   2\n/**\nThe library minor version. Incremented when non-breaking backward-compatible changes\nto the API or ABI are introduced, i.e., the addition of a new function.\n*/\n#define KAS_VERSION_MINOR   1\n/**\nThe library patch version. Incremented when any changes not relevant to the\nto the API or ABI are introduced, i.e., internal refactors of bugfixes.\n*/\n#define KAS_VERSION_PATCH   2\n/** @} */\n\n#define KAS_HEADER_SIZE             64\n#define KAS_ITEM_DESCRIPTOR_SIZE    64\n#define KAS_MAGIC                   \"\\211KAS\\r\\n\\032\\n\"\n#define KAS_ARRAY_ALIGN             8\n// clang-format on\n\n#ifndef KAS_BUG_ASSERT_MESSAGE\n#define KAS_BUG_ASSERT_MESSAGE                                                          \\\n    \"If you are using kastore directly please open an issue on\"                         \\\n    \" GitHub, ideally with a reproducible example.\"                                     \\\n    \" (https://github.com/tskit-dev/kastore/issues) If you are\"                         \\\n    \" using software that uses kastore, please report an issue\"                         \\\n    \" to that software's issue tracker, at least initially.\"\n#endif\n\n/**\nWe often wish to assert a condition that is unexpected, but using the normal `assert`\nmeans compiling without NDEBUG. This macro still asserts when NDEBUG is defined.\n*/\n#define kas_bug_assert(condition)                                                       \\\n    do {                                                                                \\\n        if (!(condition)) {                                                             \\\n            fprintf(stderr, \"Bug detected in %s at line %d. %s\\n\", __FILE__, __LINE__,  \\\n                KAS_BUG_ASSERT_MESSAGE);                                                \\\n            abort();                                                                    \\\n        }                                                                               \\\n    } while (0)\n\ntypedef struct {\n    int type;\n    size_t key_len;\n    size_t array_len;\n    char *key;\n    /* Used when KAS_BORROWS_ARRAY is set */\n    const void *borrowed_array;\n    void *array;\n    size_t key_start;\n    size_t array_start;\n} kaitem_t;\n\n/**\n@brief A file-backed store of key-array values.\n*/\ntypedef struct {\n    int flags;\n    int mode;\n    int file_version[2];\n    size_t num_items;\n    kaitem_t *items;\n    FILE *file;\n    size_t file_size;\n    long file_offset;\n    char *key_read_buffer;\n} kastore_t;\n\n/**\n@brief Library version information.\n*/\ntypedef struct {\n    /** @brief The major version number. */\n    int major;\n    /** @brief The minor version number. */\n    int minor;\n    /** @brief The patch version number. */\n    int patch;\n} kas_version_t;\n\n/**\n@brief Open a store from a given file in read (\"r\"), write (\"w\") or\nappend (\"a\") mode.\n\n@rst\nIn read mode, a store can be queried using the :ref:`get functions\n<sec_c_api_get>` and any attempts to write to the store will return an error.\nIn write and append mode, the store can written to using the :ref:`put\nfunctions <sec_c_api_put>` and any attempt to read will return an error.\n\nAfter :c:func:`kastore_open` has been called on a particular store,\n:c:func:`kastore_close` must be called to avoid leaking memory. This must also\nbe done when :c:func:`kastore_open` returns an error.\n\nWhen opened in read-mode, the default is to read key/array values from file\non demand. This is useful when a subset of the data is required and we don't\nwish to read the entire file. If the entire file is to be read, the\n``KAS_READ_ALL`` flag may be specified to improve performance.\n\n**Flags**\n\nKAS_READ_ALL\n    If this option is specified, read the entire file at\n    open time. This will give slightly better performance as the file can\n    be read sequentially in a single pass.\n\nKAS_GET_TAKES_OWNERSHIP\n    If this option is specified, all ``get`` operations will transfer\n    ownership of the array to the caller. ``kastore`` will not ``free``\n    the array memory and this is the responsibility of the caller.\n    If ``get`` is called on the same key multiple times, a new buffer will be\n    returned each time. Note that second and subsequent ``get`` calls\n    on a given key will result in ``seek`` operations even when the\n    KAS_READ_ALL flag is set, and will therefore fail on unseekable\n    streams.\n\n@endrst\n\n@param self A pointer to a kastore object.\n@param filename The file path to open.\n@param mode The open mode: can be read (\"r\"), write (\"w\") or append (\"a\").\n@param flags The open flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_open(kastore_t *self, const char *filename, const char *mode, int flags);\n\n/**\n@brief Open a store from a given FILE pointer.\n\n@rst\nBehaviour, mode and flags follow that of :c:func:`kastore_open`,\nexcept append mode is not supported.\nThe ``file`` argument must be opened in an appropriate mode (e.g. \"r\"\nfor a kastore in \"r\" mode).  Files open with other modes will result\nin KAS_ERR_IO being returned when read/write operations are attempted.\n\nThe FILE will not be closed when :c:func:`kastore_close` is called.\nIf the KAS_READ_ALL flag is supplied, no ``seek`` operations will be\nperformed on the FILE and so streams such as stdin, FIFOs etc are\nsupported. The FILE pointer will be positioned exactly at the end\nof the kastore encoded bytes once reading is completed, and reading\nmultiple stores from the same FILE sequentially is fully supported.\n@endrst\n\n@param self A pointer to a kastore object.\n@param file The FILE* to read/write the store from/to.\n@param mode The open mode: can be read (\"r\") or write (\"w\").\n@param flags The open flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_openf(kastore_t *self, FILE *file, const char *mode, int flags);\n\n/**\n@brief Close an opened store, freeing all resources.\n\nAny store that has been opened must be closed to avoid memory leaks\n(including cases in which errors have occured). It is not an error to\ncall ``kastore_close`` multiple times on the same object, but\n``kastore_open`` must be called before ``kastore_close``.\n\n@param self A pointer to a kastore object.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_close(kastore_t *self);\n\n/**\n@brief Return 1 if the store contains the specified key and 0 if it does not.\n\n@rst\nQueries the store for the specified key and returns 1 if it exists. If the\nkey does not exist, 0 is returned. If an error occurs (for example, if querying\nthe store while it is in write-mode), a negative value is returned.\n\nFor keys that are standard NULL terminated strings, the :c:func:`kastore_containss`\nfunction may be more convenient.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param key_len The length of the key.\n@return Return 1 if the key is present and 0 if it does not. If an error occurs,\n    return a negative value.\n*/\nint kastore_contains(kastore_t *self, const char *key, size_t key_len);\n\n/**\n@brief Return 1 if the store contains the specified NULL terminated key\nand 0 if it does not.\n\n@rst\nQueries the store for the specified key, which must be a NULL terminated string,\nand returns 1 if it exists. If the\nkey does not exist, 0 is returned. If an error occurs (for example, if querying\nthe store while it is in write-mode), a negative value is returned.\nthe array in the specified destination pointers.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@return Return 1 if the key is present and 0 if it does not. If an error occurs,\n    return a negative value.\n*/\nint kastore_containss(kastore_t *self, const char *key);\n\n/**\n@brief Get the array for the specified key.\n\n@rst\nQueries the store for the specified key and stores pointers to the memory for\nthe corresponding array, the number of elements in this array and the type of\nthe array in the specified destination pointers. This is the most general form\nof ``get`` query in kastore, as non NULL-terminated strings can be used as\nkeys and the resulting array is returned in a generic pointer. When standard C\nstrings are used as keys and the type of the array is known, it is more\nconvenient to use the :ref:`typed variants <sec_c_api_typed_get>` of this function.\n\nThe returned array points to memory that is internally managed by the store\nand must not be freed or modified. The pointer is guaranteed to be valid\nuntil :c:func:`kastore_close` is called.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param key_len The length of the key.\n@param array The destination pointer for the array.\n@param array_len The destination pointer for the number of elements\nin the array.\n@param type The destination pointer for the type code of the array.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_get(kastore_t *self, const char *key, size_t key_len, void **array,\n    size_t *array_len, int *type);\n\n/**\n@brief Get the array for the specified NULL-terminated key.\n\n@rst\nAs for :c:func:`kastore_get()` except the key is a NULL-terminated string.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param array The destination pointer for the array.\n@param array_len The destination pointer for the number of elements\nin the array.\n@param type The destination pointer for the type code of the array.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_gets(\n    kastore_t *self, const char *key, void **array, size_t *array_len, int *type);\n\n/**\n@defgroup TYPED_GETS_GROUP Typed get functions.\n@{\n*/\n\nint kastore_gets_int8(\n    kastore_t *self, const char *key, int8_t **array, size_t *array_len);\nint kastore_gets_uint8(\n    kastore_t *self, const char *key, uint8_t **array, size_t *array_len);\nint kastore_gets_int16(\n    kastore_t *self, const char *key, int16_t **array, size_t *array_len);\nint kastore_gets_uint16(\n    kastore_t *self, const char *key, uint16_t **array, size_t *array_len);\nint kastore_gets_int32(\n    kastore_t *self, const char *key, int32_t **array, size_t *array_len);\nint kastore_gets_uint32(\n    kastore_t *self, const char *key, uint32_t **array, size_t *array_len);\nint kastore_gets_int64(\n    kastore_t *self, const char *key, int64_t **array, size_t *array_len);\nint kastore_gets_uint64(\n    kastore_t *self, const char *key, uint64_t **array, size_t *array_len);\nint kastore_gets_float32(\n    kastore_t *self, const char *key, float **array, size_t *array_len);\nint kastore_gets_float64(\n    kastore_t *self, const char *key, double **array, size_t *array_len);\n\n/** @} */\n\n/**\n@brief Insert the specified key-array pair into the store.\n\n@rst\nA key with the specified length is inserted into the store and associated with\nan array of the specified type and number of elements. The contents of the\nspecified key and array are copied unless the KAS_BORROWS_ARRAY flag is specified.\nIf KAS_BORROWS_ARRAY is specified the array buffer must persist until the\nkastore is closed.\nKeys can be any sequence of bytes but must be at least one byte long and be\nunique. There is no restriction on the contents of arrays. This is the most\ngeneral form of ``put`` operation in kastore; when the type of the array\nis known and the keys are standard C strings, it is usually more convenient\nto use the :ref:`typed variants <sec_c_api_typed_put>` of this function.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param key_len The length of the key.\n@param array The array.\n@param array_len The number of elements in the array.\n@param type The type of the array.\n@param flags The insertion flags, only KAS_BORROWS_ARRAY or 0 is a valid.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_put(kastore_t *self, const char *key, size_t key_len, const void *array,\n    size_t array_len, int type, int flags);\n/**\n@brief Insert the specified NULL terminated key and array pair into the store.\n\n@rst\nAs for :c:func:`kastore_put` except the key must be NULL-terminated C string.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param array The array.\n@param array_len The number of elements in the array.\n@param type The type of the array.\n@param flags The insertion flags, only KAS_BORROWS_ARRAY or 0 is a valid.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_puts(kastore_t *self, const char *key, const void *array, size_t array_len,\n    int type, int flags);\n\n/**\n @defgroup TYPED_PUTS_GROUP Typed put functions.\n @{\n */\n\nint kastore_puts_int8(\n    kastore_t *self, const char *key, const int8_t *array, size_t array_len, int flags);\nint kastore_puts_uint8(\n    kastore_t *self, const char *key, const uint8_t *array, size_t array_len, int flags);\nint kastore_puts_int16(\n    kastore_t *self, const char *key, const int16_t *array, size_t array_len, int flags);\nint kastore_puts_uint16(kastore_t *self, const char *key, const uint16_t *array,\n    size_t array_len, int flags);\nint kastore_puts_int32(\n    kastore_t *self, const char *key, const int32_t *array, size_t array_len, int flags);\nint kastore_puts_uint32(kastore_t *self, const char *key, const uint32_t *array,\n    size_t array_len, int flags);\nint kastore_puts_int64(\n    kastore_t *self, const char *key, const int64_t *array, size_t array_len, int flags);\nint kastore_puts_uint64(kastore_t *self, const char *key, const uint64_t *array,\n    size_t array_len, int flags);\nint kastore_puts_float32(\n    kastore_t *self, const char *key, const float *array, size_t array_len, int flags);\nint kastore_puts_float64(\n    kastore_t *self, const char *key, const double *array, size_t array_len, int flags);\n\n/** @} */\n\n/**\n@brief Insert the specified key-array pair into the store, transferring ownership\nof the malloced array buffer to the store (own-put).\n\n@rst\nA key with the specified length is inserted into the store and associated with\nan array of the specified type and number of elements. The contents of the\nspecified key is copied, but the array buffer is taken directly and freed when\nthe store is closed. The array buffer must be a pointer returned by ``malloc``\nor ``calloc``. Ownership of the buffer is not taken unless the function returns\nsuccessfully.\n\nApart from taking ownership of the array buffer, the semantics of this\nfunction are identical to :c:func:`kastore_put`.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param key_len The length of the key.\n@param array The array. Must be a pointer returned by malloc/calloc.\n@param array_len The number of elements in the array.\n@param type The type of the array.\n@param flags The insertion flags. Currently unused.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_oput(kastore_t *self, const char *key, size_t key_len, void *array,\n    size_t array_len, int type, int flags);\n/**\n@brief Insert the specified NULL terminated key and array pair into the store,\ntransferring ownership of the malloced array buffer to the store (own-put).\n\n@rst\nAs for :c:func:`kastore_oput` except the key must be NULL-terminated C string.\n@endrst\n\n@param self A pointer to a kastore object.\n@param key The key.\n@param array The array. Must be a pointer returned by malloc/calloc.\n@param array_len The number of elements in the array.\n@param type The type of the array.\n@param flags The insertion flags. Currently unused.\n@return Return 0 on success or a negative value on failure.\n*/\nint kastore_oputs(kastore_t *self, const char *key, void *array, size_t array_len,\n    int type, int flags);\n\n/**\n @defgroup TYPED_OPUTS_GROUP Typed own-and-put functions.\n @{\n */\n\nint kastore_oputs_int8(\n    kastore_t *self, const char *key, int8_t *array, size_t array_len, int flags);\nint kastore_oputs_uint8(\n    kastore_t *self, const char *key, uint8_t *array, size_t array_len, int flags);\nint kastore_oputs_int16(\n    kastore_t *self, const char *key, int16_t *array, size_t array_len, int flags);\nint kastore_oputs_uint16(\n    kastore_t *self, const char *key, uint16_t *array, size_t array_len, int flags);\nint kastore_oputs_int32(\n    kastore_t *self, const char *key, int32_t *array, size_t array_len, int flags);\nint kastore_oputs_uint32(\n    kastore_t *self, const char *key, uint32_t *array, size_t array_len, int flags);\nint kastore_oputs_int64(\n    kastore_t *self, const char *key, int64_t *array, size_t array_len, int flags);\nint kastore_oputs_uint64(\n    kastore_t *self, const char *key, uint64_t *array, size_t array_len, int flags);\nint kastore_oputs_float32(\n    kastore_t *self, const char *key, float *array, size_t array_len, int flags);\nint kastore_oputs_float64(\n    kastore_t *self, const char *key, double *array, size_t array_len, int flags);\n\n/** @} */\n\nvoid kastore_print_state(kastore_t *self, FILE *out);\n\n/**\n@brief Returns a description of the specified error code.\n\n@param err The error code.\n@return String describing the error code.\n*/\nconst char *kas_strerror(int err);\n\n/**\n@brief Returns the API version.\n\n@rst\nThe API follows the `semver convention <https://semver.org/>`_, where the\nmajor, minor and patch numbers have specific meanings. The versioning\nscheme here also takes into account ABI compatability.\n@endrst\n*/\nkas_version_t kas_version(void);\n\n#define kas_safe_free(pointer)                                                          \\\n    do {                                                                                \\\n        if (pointer != NULL) {                                                          \\\n            free(pointer);                                                              \\\n            pointer = NULL;                                                             \\\n        }                                                                               \\\n    } while (0)\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "c/subprojects/kastore/meson.build",
    "content": "project('kastore', ['c', 'cpp'],\n  version: files('VERSION.txt'),\n  default_options: [\n    'c_std=c99', \n    'cpp_std=c++11', \n    'warning_level=3', \n    'werror=true'])\n\nif not meson.is_subproject()\n    add_global_arguments([\n        '-W', '-Wmissing-prototypes',  '-Wstrict-prototypes',\n        '-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align',\n        '-Wcast-qual', '-Wwrite-strings', '-Wnested-externs',\n        '-fshort-enums', '-fno-common'], language : 'c')\nendif\n\n# Subprojects should compile in the static library for simplicity.\nkastore_inc = include_directories('.')\nkastore = static_library('kastore', 'kastore.c')\nkastore_dep = declare_dependency(link_with : kastore, include_directories: kastore_inc)\n\nif not meson.is_subproject()\n\n    # The shared library can be installed into the system.\n    install_headers('kastore.h')\n    shared_library('kastore', 'kastore.c', install: true)\n    executable('example', ['example.c'], link_with: kastore)\n\n    cunit_dep = dependency('cunit')\n    src_root = meson.project_source_root()\n\n    tests_exe = executable('tests', ['tests.c', 'kastore.c'], dependencies: cunit_dep,\n      c_args: ['-DMESON_VERSION=\"@0@\"'.format(meson.project_version())])\n    test('tests', tests_exe,\n      env: ['KAS_TEST_DATA_PREFIX=' + src_root + '/test-data/'])\n\n    cpp_tests_exe = executable('cpp_tests', ['cpp_tests.cpp'], link_with: kastore)\n    test('cpp_tests', cpp_tests_exe)\n\n    malloc_tests_exe = executable('malloc_tests', ['malloc_tests.c', 'kastore.c'],\n        dependencies: cunit_dep,\n        link_args:['-Wl,--wrap=malloc', '-Wl,--wrap=realloc', '-Wl,--wrap=calloc'])\n    test('malloc_tests', malloc_tests_exe, workdir: src_root)\n\n    io_tests_exe = executable('io_tests', ['io_tests.c', 'kastore.c'],\n        dependencies: cunit_dep,\n        link_args:[\n            '-Wl,--wrap=fwrite',\n            '-Wl,--wrap=fread',\n            '-Wl,--wrap=fclose',\n            '-Wl,--wrap=ftell',\n            '-Wl,--wrap=fseek'])\n    test('io_tests', io_tests_exe, workdir: src_root)\nendif\n"
  },
  {
    "path": "c/tests/meson-subproject/example.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2022 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/* Simple example testing that we compile and link in tskit and kastore\n * when we use meson submodules.\n */\n#include <stdio.h>\n#include <tskit.h>\n#include <assert.h>\n#include <string.h>\n\nvoid\ntest_kas_strerror()\n{\n    printf(\"test_kas_strerror\\n\");\n    const char *str = kas_strerror(KAS_ERR_NO_MEMORY);\n    assert(strcmp(str, \"Out of memory\") == 0);\n}\n\nvoid\ntest_strerror()\n{\n    printf(\"test_strerror\\n\");\n    const char *str = tsk_strerror(TSK_ERR_NO_MEMORY);\n    assert(strcmp(str, \"Out of memory. (TSK_ERR_NO_MEMORY)\") == 0);\n}\n\nvoid\ntest_load_error()\n{\n    printf(\"test_open_error\\n\");\n    tsk_treeseq_t ts;\n    int ret = tsk_treeseq_load(&ts, \"no such file\", 0);\n    assert(ret == TSK_ERR_IO);\n    tsk_treeseq_free(&ts);\n}\n\nvoid\ntest_table_basics()\n{\n    printf(\"test_table_basics\\n\");\n    tsk_table_collection_t tables;\n    int ret = tsk_table_collection_init(&tables, 0);\n    assert(ret == 0);\n\n    ret = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    assert(ret == 0);\n    ret = tsk_node_table_add_row(&tables.nodes, 0, 2.0, TSK_NULL, TSK_NULL, NULL, 0);\n    assert(ret == 1);\n    assert(tables.nodes.num_rows == 2);\n\n    tsk_table_collection_free(&tables);\n}\n\nint\nmain()\n{\n    test_kas_strerror();\n    test_strerror();\n    test_load_error();\n    test_table_basics();\n    return 0;\n}\n"
  },
  {
    "path": "c/tests/meson-subproject/meson.build",
    "content": "project('example', 'c')\n\ntskit_proj = subproject('tskit')\ntskit_dep = tskit_proj.get_variable('tskit_dep')\n\nexecutable('example',\n  'example.c',\n   dependencies : [tskit_dep],\n   install : true)\n\n"
  },
  {
    "path": "c/tests/test_convert.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2022 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <tskit/convert.h>\n\n#include <unistd.h>\n#include <stdlib.h>\n\nstatic void\ntest_single_tree_newick(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    size_t buffer_size = 1024;\n    char newick[buffer_size];\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0)\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK)\n\n    ret = tsk_convert_newick(&t, 0, 0, TSK_NEWICK_LEGACY_MS_LABELS, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Seems odd, but this is what a single node newick tree looks like.\n     * Newick parsers seems to accept it in any case */\n    CU_ASSERT_STRING_EQUAL(newick, \"1;\");\n\n    ret = tsk_convert_newick(&t, 0, 0, 0, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_STRING_EQUAL(newick, \"n0;\");\n\n    ret = tsk_convert_newick(&t, 4, 0, TSK_NEWICK_LEGACY_MS_LABELS, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_STRING_EQUAL(newick, \"(1:1,2:1);\");\n    ret = tsk_convert_newick(&t, 4, 0, 0, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_STRING_EQUAL(newick, \"(n0:1,n1:1);\");\n\n    ret = tsk_convert_newick(&t, 6, 0, TSK_NEWICK_LEGACY_MS_LABELS, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_STRING_EQUAL(newick, \"((1:1,2:1):2,(3:2,4:2):1);\");\n\n    ret = tsk_convert_newick(&t, 6, 0, 0, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_STRING_EQUAL(newick, \"((n0:1,n1:1):2,(n2:2,n3:2):1);\");\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_newick_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    size_t j, len;\n    size_t buffer_size = 1024;\n    char newick[buffer_size];\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0)\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK)\n\n    ret = tsk_convert_newick(&t, -1, 1, 0, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_convert_newick(&t, 7, 1, 0, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_convert_newick(&t, 6, 0, 0, buffer_size, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_convert_newick(&t, 6, 0, 0, buffer_size, newick);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    len = 1 + strlen(newick);\n    for (j = 0; j < len; j++) {\n        ret = tsk_convert_newick(&t, 6, 0, 0, j, newick);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BUFFER_OVERFLOW);\n    }\n    ret = tsk_convert_newick(&t, 6, 0, TSK_NEWICK_LEGACY_MS_LABELS, len, newick);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_STRING_EQUAL(newick, \"((1:1,2:1):2,(3:2,4:2):1);\");\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_single_tree_newick\", test_single_tree_newick },\n        { \"test_single_tree_newick_errors\", test_single_tree_newick_errors },\n        { NULL, NULL },\n    };\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_core.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <tskit/core.h>\n#include <math.h>\n#include <string.h>\n\n#include <unistd.h>\n\nstatic void\ntest_strerror(void)\n{\n    int j;\n    const char *msg;\n    int max_error_code = 8192; /* totally arbitrary */\n\n    for (j = 0; j < max_error_code; j++) {\n        msg = tsk_strerror(-j);\n        CU_ASSERT_FATAL(msg != NULL);\n        CU_ASSERT(strlen(msg) > 0);\n    }\n    CU_ASSERT_STRING_EQUAL(\n        tsk_strerror(0), \"Normal exit condition. This is not an error!\");\n}\n\nstatic void\ntest_strerror_kastore(void)\n{\n    int kastore_errors[]\n        = { KAS_ERR_NO_MEMORY, KAS_ERR_KEY_NOT_FOUND, KAS_ERR_BAD_FILE_FORMAT };\n    size_t j;\n    int err;\n\n    for (j = 0; j < sizeof(kastore_errors) / sizeof(*kastore_errors); j++) {\n        err = tsk_set_kas_error(kastore_errors[j]);\n        CU_ASSERT_TRUE(tsk_is_kas_error(err));\n        CU_ASSERT_EQUAL_FATAL(tsk_get_kas_error(err), kastore_errors[j]);\n        CU_ASSERT_STRING_EQUAL(tsk_strerror(err), kas_strerror(kastore_errors[j]));\n    }\n}\n\nstatic void\ntest_generate_uuid(void)\n{\n    size_t uuid_size = 36;\n    char uuid[uuid_size + 1];\n    char other_uuid[uuid_size + 1];\n    int ret;\n\n    ret = tsk_generate_uuid(uuid, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(strlen(uuid), uuid_size);\n    CU_ASSERT_EQUAL(uuid[8], '-');\n    CU_ASSERT_EQUAL(uuid[13], '-');\n    CU_ASSERT_EQUAL(uuid[18], '-');\n    CU_ASSERT_EQUAL(uuid[23], '-');\n\n    ret = tsk_generate_uuid(other_uuid, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(strlen(other_uuid), uuid_size);\n    CU_ASSERT_STRING_NOT_EQUAL(uuid, other_uuid);\n}\n\nstatic void\ntest_double_round(void)\n{\n    struct test_case {\n        double source;\n        unsigned int num_digits;\n        double result;\n    };\n    struct test_case test_cases[] = {\n        { 1.555, 3, 1.555 },\n        { 1.5555, 2, 1.56 },\n        /* catch the halfway between integers case */\n        { 1.5555, 3, 1.556 },\n\n        { 1.5111, 3, 1.511 },\n        { 1.5112, 3, 1.511 },\n        { 3.141592653589793, 0, 3.0 },\n        { 3.141592653589793, 1, 3.1 },\n        { 3.141592653589793, 2, 3.14 },\n        { 3.141592653589793, 3, 3.142 },\n        { 3.141592653589793, 4, 3.1416 },\n        { 3.141592653589793, 5, 3.14159 },\n        { 3.141592653589793, 6, 3.141593 },\n        { 3.141592653589793, 7, 3.1415927 },\n        { 3.141592653589793, 8, 3.14159265 },\n        { 3.141592653589793, 9, 3.141592654 },\n        { 3.141592653589793, 10, 3.1415926536 },\n        { 3.141592653589793, 11, 3.14159265359 },\n        { 3.141592653589793, 12, 3.14159265359 },\n        { 3.141592653589793, 13, 3.1415926535898 },\n        { 3.141592653589793, 14, 3.14159265358979 },\n        { 3.141592653589793, 15, 3.141592653589793 },\n        { 3.141592653589793, 16, 3.141592653589793 },\n        { 3.141592653589793, 17, 3.141592653589793 },\n        { 3.141592653589793, 18, 3.141592653589793 },\n        { 3.141592653589793, 19, 3.141592653589793 },\n        /* We have tiny differences in precision at k=20; not worth worrying about. */\n        { 3.141592653589793, 21, 3.141592653589793 },\n        { 3.141592653589793, 22, 3.141592653589793 },\n        { 3.141592653589793, 23, 3.141592653589793 },\n\n        { 0.3333333333333333, 0, 0.0 },\n        { 0.3333333333333333, 1, 0.3 },\n        { 0.3333333333333333, 2, 0.33 },\n        { 0.3333333333333333, 3, 0.333 },\n        { 0.3333333333333333, 4, 0.3333 },\n        { 0.3333333333333333, 5, 0.33333 },\n        { 0.3333333333333333, 6, 0.333333 },\n        { 0.3333333333333333, 7, 0.3333333 },\n        { 0.3333333333333333, 8, 0.33333333 },\n        { 0.3333333333333333, 9, 0.333333333 },\n        { 0.3333333333333333, 10, 0.3333333333 },\n        { 0.3333333333333333, 11, 0.33333333333 },\n        { 0.3333333333333333, 12, 0.333333333333 },\n        { 0.3333333333333333, 13, 0.3333333333333 },\n        { 0.3333333333333333, 14, 0.33333333333333 },\n        { 0.3333333333333333, 15, 0.333333333333333 },\n        { 0.3333333333333333, 16, 0.3333333333333333 },\n        { 0.3333333333333333, 17, 0.3333333333333333 },\n        { 0.3333333333333333, 18, 0.3333333333333333 },\n        { 0.3333333333333333, 19, 0.3333333333333333 },\n        { 0.3333333333333333, 20, 0.3333333333333333 },\n        { 0.3333333333333333, 21, 0.3333333333333333 },\n        { 0.3333333333333333, 22, 0.3333333333333333 },\n        { 0.3333333333333333, 23, 0.3333333333333333 },\n\n        { 0.6666666666666666, 0, 1.0 },\n        { 0.6666666666666666, 1, 0.7 },\n        { 0.6666666666666666, 2, 0.67 },\n        { 0.6666666666666666, 3, 0.667 },\n        { 0.6666666666666666, 4, 0.6667 },\n        { 0.6666666666666666, 5, 0.66667 },\n        { 0.6666666666666666, 6, 0.666667 },\n        { 0.6666666666666666, 7, 0.6666667 },\n        { 0.6666666666666666, 8, 0.66666667 },\n        { 0.6666666666666666, 9, 0.666666667 },\n        { 0.6666666666666666, 10, 0.6666666667 },\n        { 0.6666666666666666, 11, 0.66666666667 },\n        { 0.6666666666666666, 12, 0.666666666667 },\n        { 0.6666666666666666, 13, 0.6666666666667 },\n        { 0.6666666666666666, 14, 0.66666666666667 },\n        { 0.6666666666666666, 15, 0.666666666666667 },\n        { 0.6666666666666666, 16, 0.6666666666666666 },\n        { 0.6666666666666666, 17, 0.6666666666666666 },\n        { 0.6666666666666666, 18, 0.6666666666666666 },\n        { 0.6666666666666666, 19, 0.6666666666666666 },\n        { 0.6666666666666666, 20, 0.6666666666666666 },\n        { 0.6666666666666666, 21, 0.6666666666666666 },\n        { 0.6666666666666666, 22, 0.6666666666666666 },\n        { 0.6666666666666666, 23, 0.6666666666666666 },\n\n        { 0.07692307692307693, 0, 0.0 },\n        { 0.07692307692307693, 1, 0.1 },\n        { 0.07692307692307693, 2, 0.08 },\n        { 0.07692307692307693, 3, 0.077 },\n        { 0.07692307692307693, 4, 0.0769 },\n        { 0.07692307692307693, 5, 0.07692 },\n        { 0.07692307692307693, 6, 0.076923 },\n        { 0.07692307692307693, 7, 0.0769231 },\n        { 0.07692307692307693, 8, 0.07692308 },\n        { 0.07692307692307693, 9, 0.076923077 },\n        { 0.07692307692307693, 10, 0.0769230769 },\n        { 0.07692307692307693, 11, 0.07692307692 },\n        { 0.07692307692307693, 12, 0.076923076923 },\n        { 0.07692307692307693, 13, 0.0769230769231 },\n        { 0.07692307692307693, 14, 0.07692307692308 },\n        { 0.07692307692307693, 15, 0.076923076923077 },\n        { 0.07692307692307693, 16, 0.0769230769230769 },\n        { 0.07692307692307693, 17, 0.07692307692307693 },\n        { 0.07692307692307693, 18, 0.07692307692307693 },\n        { 0.07692307692307693, 19, 0.07692307692307693 },\n        { 0.07692307692307693, 20, 0.07692307692307693 },\n        /* Tiny difference in precision at k=21 */\n        { 0.07692307692307693, 22, 0.07692307692307693 },\n        { 0.07692307692307693, 23, 0.07692307692307693 },\n\n        { 1e-21, 0, 0.0 },\n        { 1e-21, 1, 0.0 },\n        { 1e-21, 2, 0.0 },\n        { 1e-21, 3, 0.0 },\n        { 1e-21, 4, 0.0 },\n        { 1e-21, 5, 0.0 },\n        { 1e-21, 6, 0.0 },\n        { 1e-21, 7, 0.0 },\n        { 1e-21, 8, 0.0 },\n        { 1e-21, 9, 0.0 },\n        { 1e-21, 10, 0.0 },\n        { 1e-21, 11, 0.0 },\n        { 1e-21, 12, 0.0 },\n        { 1e-21, 13, 0.0 },\n        { 1e-21, 14, 0.0 },\n        { 1e-21, 15, 0.0 },\n        { 1e-21, 16, 0.0 },\n        { 1e-21, 17, 0.0 },\n        { 1e-21, 18, 0.0 },\n        { 1e-21, 19, 0.0 },\n        { 1e-21, 20, 0.0 },\n        { 1e-21, 21, 1e-21 },\n        { 1e-21, 22, 1e-21 },\n        { 1e-21, 23, 1e-21 },\n\n        { 1e-10, 0, 0.0 },\n        { 1e-10, 1, 0.0 },\n        { 1e-10, 2, 0.0 },\n        { 1e-10, 3, 0.0 },\n        { 1e-10, 4, 0.0 },\n        { 1e-10, 5, 0.0 },\n        { 1e-10, 6, 0.0 },\n        { 1e-10, 7, 0.0 },\n        { 1e-10, 8, 0.0 },\n        { 1e-10, 9, 0.0 },\n        { 1e-10, 10, 1e-10 },\n        { 1e-10, 11, 1e-10 },\n        { 1e-10, 12, 1e-10 },\n        { 1e-10, 13, 1e-10 },\n        { 1e-10, 14, 1e-10 },\n        { 1e-10, 15, 1e-10 },\n        { 1e-10, 16, 1e-10 },\n        { 1e-10, 17, 1e-10 },\n        { 1e-10, 18, 1e-10 },\n        { 1e-10, 19, 1e-10 },\n        { 1e-10, 20, 1e-10 },\n        { 1e-10, 21, 1e-10 },\n        { 1e-10, 22, 1e-10 },\n        { 1e-10, 23, 1e-10 },\n\n        { 3.141592653589793e-08, 0, 0.0 },\n        { 3.141592653589793e-08, 1, 0.0 },\n        { 3.141592653589793e-08, 2, 0.0 },\n        { 3.141592653589793e-08, 3, 0.0 },\n        { 3.141592653589793e-08, 4, 0.0 },\n        { 3.141592653589793e-08, 5, 0.0 },\n        { 3.141592653589793e-08, 6, 0.0 },\n        { 3.141592653589793e-08, 7, 0.0 },\n        { 3.141592653589793e-08, 8, 3e-08 },\n        { 3.141592653589793e-08, 9, 3.1e-08 },\n        { 3.141592653589793e-08, 10, 3.14e-08 },\n        { 3.141592653589793e-08, 11, 3.142e-08 },\n        { 3.141592653589793e-08, 12, 3.1416e-08 },\n        { 3.141592653589793e-08, 13, 3.14159e-08 },\n        { 3.141592653589793e-08, 14, 3.141593e-08 },\n        { 3.141592653589793e-08, 15, 3.1415927e-08 },\n        { 3.141592653589793e-08, 16, 3.14159265e-08 },\n        { 3.141592653589793e-08, 17, 3.141592654e-08 },\n        { 3.141592653589793e-08, 18, 3.1415926536e-08 },\n        { 3.141592653589793e-08, 19, 3.14159265359e-08 },\n        { 3.141592653589793e-08, 20, 3.14159265359e-08 },\n        { 3.141592653589793e-08, 21, 3.1415926535898e-08 },\n        /* Tiny precision mismatch at k=22 */\n        { 3.141592653589793e-08, 23, 3.141592653589793e-08 },\n\n    };\n    size_t num_test_cases = sizeof(test_cases) / sizeof(*test_cases);\n    size_t j;\n\n    for (j = 0; j < num_test_cases; j++) {\n        CU_ASSERT_EQUAL_FATAL(tsk_round(test_cases[j].source, test_cases[j].num_digits),\n            test_cases[j].result);\n    }\n}\n\nstatic void\ntest_blkalloc(void)\n{\n    tsk_blkalloc_t alloc;\n    int ret;\n    size_t j, block_size;\n    void *mem;\n\n    ret = tsk_blkalloc_init(&alloc, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_blkalloc_free(&alloc);\n\n    for (block_size = 1; block_size < 10; block_size++) {\n        ret = tsk_blkalloc_init(&alloc, block_size);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        for (j = 0; j < 10; j++) {\n            mem = tsk_blkalloc_get(&alloc, block_size);\n            CU_ASSERT_TRUE(mem != NULL);\n            CU_ASSERT_EQUAL(alloc.num_chunks, j + 1);\n            tsk_memset(mem, 0, block_size);\n        }\n\n        mem = tsk_blkalloc_get(&alloc, block_size + 1);\n        CU_ASSERT_EQUAL(mem, NULL);\n        mem = tsk_blkalloc_get(&alloc, block_size + 2);\n        CU_ASSERT_EQUAL(mem, NULL);\n\n        tsk_blkalloc_print_state(&alloc, _devnull);\n        tsk_blkalloc_free(&alloc);\n    }\n\n    /* Allocate awkward sized chunk */\n    ret = tsk_blkalloc_init(&alloc, 100);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    mem = tsk_blkalloc_get(&alloc, 90);\n    CU_ASSERT_FATAL(mem != NULL);\n    tsk_memset(mem, 0, 90);\n    mem = tsk_blkalloc_get(&alloc, 10);\n    CU_ASSERT_FATAL(mem != NULL);\n    tsk_memset(mem, 0, 10);\n    CU_ASSERT_EQUAL(alloc.num_chunks, 1);\n    mem = tsk_blkalloc_get(&alloc, 90);\n    CU_ASSERT_FATAL(mem != NULL);\n    tsk_memset(mem, 0, 90);\n    CU_ASSERT_EQUAL(alloc.num_chunks, 2);\n    mem = tsk_blkalloc_get(&alloc, 11);\n    CU_ASSERT_FATAL(mem != NULL);\n    tsk_memset(mem, 0, 11);\n    CU_ASSERT_EQUAL(alloc.num_chunks, 3);\n\n    tsk_blkalloc_free(&alloc);\n}\n\nstatic void\ntest_unknown_time(void)\n{\n    CU_ASSERT_TRUE(tsk_isnan(TSK_UNKNOWN_TIME));\n    CU_ASSERT_TRUE(tsk_is_unknown_time(TSK_UNKNOWN_TIME));\n    CU_ASSERT_FALSE(tsk_is_unknown_time(NAN));\n    CU_ASSERT_FALSE(tsk_is_unknown_time(0));\n    CU_ASSERT_FALSE(tsk_is_unknown_time(INFINITY));\n    CU_ASSERT_FALSE(tsk_is_unknown_time(1));\n}\n\nstatic void\ntest_malloc_zero(void)\n{\n    void *p = tsk_malloc(0);\n\n    CU_ASSERT_FATAL(p != NULL);\n    free(p);\n\n    p = tsk_calloc(0, 1);\n    CU_ASSERT_FATAL(p != NULL);\n    free(p);\n}\n\nstatic void\ntest_malloc_overflow(void)\n{\n#if TSK_MAX_SIZE > SIZE_MAX\n    tsk_size_t size_max = SIZE_MAX;\n    void *p = tsk_malloc(size_max + 1);\n    CU_ASSERT_FATAL(p == NULL);\n\n    p = tsk_calloc(size_max + 1, 1);\n    CU_ASSERT_FATAL(p == NULL);\n#endif\n}\n\nstatic void\ntest_debug_stream(void)\n{\n    FILE *f = fopen(_tmp_file_name, \"w\");\n    CU_ASSERT_FATAL(tsk_get_debug_stream() == stdout);\n    CU_ASSERT_FATAL(tsk_get_debug_stream() == stdout);\n\n    tsk_set_debug_stream(f);\n    CU_ASSERT_FATAL(tsk_get_debug_stream() == f);\n    tsk_set_debug_stream(stdout);\n    CU_ASSERT_FATAL(tsk_get_debug_stream() == stdout);\n\n    fclose(f);\n}\n\nstatic int\nvalidate_avl_node(tsk_avl_node_int_t *node)\n{\n    int height, lheight, rheight;\n\n    if (node == NULL) {\n        return 0;\n    }\n    lheight = validate_avl_node(node->llink);\n    rheight = validate_avl_node(node->rlink);\n    height = 1 + TSK_MAX(lheight, rheight);\n\n    if (lheight != 0 && rheight != 0) {\n        CU_ASSERT_FATAL(node->balance == rheight - lheight);\n    } else if (lheight == 0 && rheight == 0) {\n        CU_ASSERT_FATAL(height == 1);\n        CU_ASSERT_FATAL(node->balance == 0);\n    } else {\n        CU_ASSERT_FATAL(height == 2);\n        if (lheight == 0) {\n            CU_ASSERT_FATAL(node->balance == 1);\n        } else {\n            CU_ASSERT_FATAL(node->balance == -1);\n        }\n    }\n    return height;\n}\n\nstatic void\ntest_avl_empty(void)\n{\n    int height;\n    tsk_avl_tree_int_t tree;\n\n    tsk_avl_tree_int_init(&tree);\n\n    height = validate_avl_node(tree.head.rlink);\n    CU_ASSERT_EQUAL((tsk_size_t) height, tree.height);\n    CU_ASSERT_EQUAL(0, tree.size);\n    tsk_avl_tree_int_print_state(&tree, _devnull);\n\n    CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, -1), NULL);\n    CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, 0), NULL);\n    CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, 1), NULL);\n\n    tsk_avl_tree_int_free(&tree);\n}\n\nstatic void\nvalidate_avl(size_t num_keys, int64_t *keys)\n{\n    size_t j, k;\n    int ret, height;\n    tsk_avl_tree_int_t tree;\n    tsk_avl_node_int_t *nodes = malloc(num_keys * sizeof(*nodes));\n    tsk_avl_node_int_t **ordered_nodes = malloc(num_keys * sizeof(*ordered_nodes));\n    tsk_avl_node_int_t *node;\n    tsk_avl_node_int_t tmp_node;\n\n    CU_ASSERT_FATAL(nodes != NULL);\n    CU_ASSERT_FATAL(ordered_nodes != NULL);\n    tsk_avl_tree_int_init(&tree);\n\n    /* Assumes the keys are unique */\n    for (j = 0; j < num_keys; j++) {\n        node = nodes + j;\n        node->key = keys[j];\n        CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, keys[j]), NULL);\n        ret = tsk_avl_tree_int_insert(&tree, node);\n        CU_ASSERT_FATAL(ret == 0);\n        CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, keys[j]), node);\n        tmp_node.key = keys[j];\n        ret = tsk_avl_tree_int_insert(&tree, &tmp_node);\n        CU_ASSERT_FATAL(ret == 1);\n\n        height = validate_avl_node(tree.head.rlink);\n        CU_ASSERT_EQUAL((tsk_size_t) height, tree.height);\n        CU_ASSERT_EQUAL(j + 1, tree.size);\n        tsk_avl_tree_int_print_state(&tree, _devnull);\n        for (k = j + 1; k < num_keys; k++) {\n            CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, keys[k]), NULL);\n        }\n    }\n\n    tsk_avl_tree_int_ordered_nodes(&tree, ordered_nodes);\n    for (j = 0; j < num_keys; j++) {\n        if (j > 0) {\n            CU_ASSERT_FATAL(ordered_nodes[j - 1]->key < ordered_nodes[j]->key);\n        }\n    }\n\n    tsk_avl_tree_int_free(&tree);\n    free(nodes);\n    free(ordered_nodes);\n}\n\nstatic void\ntest_avl_sequential(void)\n{\n    int64_t keys[] = { 0, 1, 2, 3, 4, 5, 6, 7 };\n    int64_t reversed_keys[] = { 7, 6, 5, 4, 3, 2, 1, 0 };\n\n    validate_avl(8, keys);\n    validate_avl(8, reversed_keys);\n}\n\nstatic void\ntest_avl_interleaved(void)\n{\n    size_t num_keys = 100;\n    size_t j;\n    int64_t *keys = malloc(num_keys * sizeof(*keys));\n\n    CU_ASSERT_FATAL(keys != NULL);\n    for (j = 0; j < num_keys; j++) {\n        keys[j] = (int64_t) j;\n        if (j % 2 == 0) {\n            keys[j] *= -1;\n        }\n    }\n    validate_avl(num_keys, keys);\n    free(keys);\n}\n\nstatic void\ntest_avl_random(void)\n{\n    /* This example goes through all the code paths in the AVL insert algorithm */\n    int64_t keys[] = { 2, 79, -8, -86, 6, -29, 88, -80, 21, -26, -13, 16, -1, 3, 51, 30,\n        49, -48, -99, 57, -63, 29, 91, 87, 60, -43, -79, -12, -52, -42, 69, 89, 74, -50,\n        7, -46, -37, 34, -28, 66, -83, 31, -41, -87, -92, -11, -17, -9, 10, 98, 71, -93,\n        -66, -20, 63, -51, 33, -47, 5, -97, 90, 45, -57, 61, -6, -53, 99, -61, -19, -77,\n        53, 23, -60, 56, -56, -36, -30, 28, 35, -38, 38, 62, -68, 22, -96, -73, -89,\n        50 };\n\n    validate_avl(sizeof(keys) / sizeof(*keys), keys);\n}\n\nstatic void\ntest_bit_arrays(void)\n{\n    // NB: This test is only valid for the 32 bit implementation of bit arrays. If we\n    //     were to change the chunk size of a bit array, we'd need to update these tests\n    tsk_bitset_t arr;\n    tsk_id_t items_truth[64] = { 0 }, items[64] = { 0 };\n    tsk_size_t n_items = 0, n_items_truth = 0;\n\n    // test item retrieval\n    tsk_bitset_init(&arr, 90, 1);\n    CU_ASSERT_EQUAL_FATAL(arr.len, 1);\n    CU_ASSERT_EQUAL_FATAL(arr.row_len, 3);\n    tsk_bitset_get_items(&arr, 0, items, &n_items);\n    assert_arrays_equal(n_items_truth, items, items_truth);\n\n    for (tsk_bitset_val_t i = 0; i < 20; i++) {\n        tsk_bitset_set_bit(&arr, 0, i);\n        items_truth[n_items_truth] = (tsk_id_t) i;\n        n_items_truth++;\n    }\n    tsk_bitset_set_bit(&arr, 0, 63);\n    tsk_bitset_set_bit(&arr, 0, 65);\n\n    // these assertions are only valid for 32-bit values\n    CU_ASSERT_EQUAL_FATAL(arr.data[0], 1048575);\n    CU_ASSERT_EQUAL_FATAL(arr.data[1], 2147483648);\n    CU_ASSERT_EQUAL_FATAL(arr.data[2], 2);\n\n    // verify our assumptions about bit array counting\n    CU_ASSERT_EQUAL_FATAL(tsk_bitset_count(&arr, 0), 22);\n\n    tsk_bitset_get_items(&arr, 0, items, &n_items);\n    assert_arrays_equal(n_items_truth, items, items_truth);\n\n    tsk_memset(items, 0, 64);\n    tsk_memset(items_truth, 0, 64);\n    n_items = n_items_truth = 0;\n    tsk_bitset_free(&arr);\n\n    // create a length-2 array with 64 bit capacity (two chunks per row)\n    tsk_bitset_init(&arr, 64, 2);\n    CU_ASSERT_EQUAL_FATAL(arr.len, 2);\n    CU_ASSERT_EQUAL_FATAL(arr.row_len, 2);\n\n    // fill the first 50 bits of the first row\n    for (tsk_bitset_val_t i = 0; i < 50; i++) {\n        tsk_bitset_set_bit(&arr, 0, i);\n        items_truth[n_items_truth] = (tsk_id_t) i;\n        n_items_truth++;\n    }\n\n    tsk_bitset_get_items(&arr, 0, items, &n_items);\n    assert_arrays_equal(n_items_truth, items, items_truth);\n\n    tsk_memset(items, 0, 64);\n    tsk_memset(items_truth, 0, 64);\n    n_items = n_items_truth = 0;\n\n    // fill bits 20-40 of the second row\n    for (tsk_bitset_val_t i = 20; i < 40; i++) {\n        tsk_bitset_set_bit(&arr, 1, i);\n        items_truth[n_items_truth] = (tsk_id_t) i;\n        n_items_truth++;\n    }\n\n    tsk_bitset_get_items(&arr, 1, items, &n_items);\n    assert_arrays_equal(n_items_truth, items, items_truth);\n\n    tsk_memset(items, 0, 64);\n    tsk_memset(items_truth, 0, 64);\n    n_items = n_items_truth = 0;\n\n    // verify our assumptions about row selection\n    CU_ASSERT_EQUAL_FATAL(arr.data[0], 4294967295); // row1 elem1\n    CU_ASSERT_EQUAL_FATAL(arr.data[1], 262143);     // row1 elem2\n    CU_ASSERT_EQUAL_FATAL(arr.data[2], 4293918720); // row2 elem1\n    CU_ASSERT_EQUAL_FATAL(arr.data[3], 255);        // row2 elem2\n\n    // subtract the second from the first row, store in first\n    tsk_bitset_subtract(&arr, 0, &arr, 1);\n\n    // verify our assumptions about subtraction\n    CU_ASSERT_EQUAL_FATAL(arr.data[0], 1048575);\n    CU_ASSERT_EQUAL_FATAL(arr.data[1], 261888);\n\n    tsk_bitset_t int_result;\n    tsk_bitset_init(&int_result, 64, 1);\n    CU_ASSERT_EQUAL_FATAL(int_result.len, 1);\n    CU_ASSERT_EQUAL_FATAL(int_result.row_len, 2);\n\n    // their intersection should be zero\n    tsk_bitset_intersect(&arr, 0, &arr, 1, &int_result);\n    CU_ASSERT_EQUAL_FATAL(int_result.data[0], 0);\n    CU_ASSERT_EQUAL_FATAL(int_result.data[1], 0);\n\n    // now, add them back together, storing back in a\n    tsk_bitset_union(&arr, 0, &arr, 1);\n\n    // now, their intersection should be the subtracted chunk (20-40)\n    tsk_bitset_intersect(&arr, 0, &arr, 1, &int_result);\n    CU_ASSERT_EQUAL_FATAL(int_result.data[0], 4293918720);\n    CU_ASSERT_EQUAL_FATAL(int_result.data[1], 255);\n\n    tsk_bitset_free(&int_result);\n    tsk_bitset_free(&arr);\n}\n\nstatic void\ntest_meson_version(void)\n{\n    char version[100];\n\n    sprintf(\n        version, \"%d.%d.%d\", TSK_VERSION_MAJOR, TSK_VERSION_MINOR, TSK_VERSION_PATCH);\n    /* the MESON_PROJECT_VERSION define is passed in by meson when compiling */\n    CU_ASSERT_STRING_EQUAL(version, MESON_PROJECT_VERSION);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_strerror\", test_strerror },\n        { \"test_strerror_kastore\", test_strerror_kastore },\n        { \"test_generate_uuid\", test_generate_uuid },\n        { \"test_double_round\", test_double_round },\n        { \"test_blkalloc\", test_blkalloc },\n        { \"test_unknown_time\", test_unknown_time },\n        { \"test_malloc_zero\", test_malloc_zero },\n        { \"test_malloc_overflow\", test_malloc_overflow },\n        { \"test_debug_stream\", test_debug_stream },\n        { \"test_avl_empty\", test_avl_empty },\n        { \"test_avl_sequential\", test_avl_sequential },\n        { \"test_avl_interleaved\", test_avl_interleaved },\n        { \"test_avl_random\", test_avl_random },\n        { \"test_bit_arrays\", test_bit_arrays },\n        { \"test_meson_version\", test_meson_version },\n        { NULL, NULL },\n    };\n\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_file_format.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2022 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, mergetest, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <tskit/tables.h>\n\ntypedef struct {\n    const char *name;\n    void *array;\n    tsk_size_t len;\n    int type;\n} write_table_col_t;\n\nstatic void\nwrite_table_cols(kastore_t *store, write_table_col_t *write_cols, size_t num_cols)\n{\n    size_t j;\n    int ret;\n\n    for (j = 0; j < num_cols; j++) {\n        ret = kastore_puts(store, write_cols[j].name, write_cols[j].array,\n            (size_t) write_cols[j].len, write_cols[j].type, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n}\n\nstatic void\ncopy_store_drop_columns(\n    tsk_treeseq_t *ts, size_t num_drop_cols, const char **drop_cols, const char *outfile)\n{\n    int ret = 0;\n    char tmpfile[] = \"/tmp/tsk_c_test_copy_XXXXXX\";\n    int fd;\n    kastore_t read_store, write_store;\n    kaitem_t *item;\n    size_t j, k;\n    bool keep;\n\n    fd = mkstemp(tmpfile);\n    CU_ASSERT_FATAL(fd != -1);\n    close(fd);\n\n    ret = tsk_treeseq_dump(ts, tmpfile, 0);\n    if (ret != 0) {\n        unlink(tmpfile);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n\n    ret = kastore_open(&read_store, tmpfile, \"r\", KAS_READ_ALL);\n    /* We can now unlink the file as either kastore has read it all, or failed */\n    unlink(tmpfile);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = kastore_open(&write_store, outfile, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Note: this API is not a documented part of kastore, so may be subject to\n     * change. */\n    for (j = 0; j < read_store.num_items; j++) {\n        item = &read_store.items[j];\n        keep = true;\n        for (k = 0; k < num_drop_cols; k++) {\n            if (strlen(drop_cols[k]) == item->key_len\n                && strncmp(drop_cols[k], item->key, item->key_len) == 0) {\n                keep = false;\n                break;\n            }\n        }\n        if (keep) {\n            ret = kastore_put(&write_store, item->key, item->key_len, item->array,\n                item->array_len, item->type, 0);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n        }\n    }\n    kastore_close(&read_store);\n    ret = kastore_close(&write_store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n}\n\nstatic void\ntest_format_data_load_errors(void)\n{\n    size_t uuid_size = 36;\n    char uuid[uuid_size];\n    char format_name[TSK_FILE_FORMAT_NAME_LENGTH];\n    double L[2];\n    uint32_t version[2]\n        = { TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR };\n    write_table_col_t write_cols[] = {\n        { \"format/name\", (void *) format_name, sizeof(format_name), KAS_INT8 },\n        { \"format/version\", (void *) version, 2, KAS_UINT32 },\n        { \"sequence_length\", (void *) L, 1, KAS_FLOAT64 },\n        { \"uuid\", (void *) uuid, (tsk_size_t) uuid_size, KAS_INT8 },\n    };\n    tsk_table_collection_t tables;\n    kastore_t store;\n    size_t j;\n    int ret;\n\n    L[0] = 1;\n    L[1] = 0;\n    tsk_memcpy(format_name, TSK_FILE_FORMAT_NAME, sizeof(format_name));\n    /* Note: this will fail if we ever start parsing the form of the UUID */\n    tsk_memset(uuid, 0, uuid_size);\n\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    /* We've only defined the format headers, so we should fail immediately\n     * after with required columns not found */\n    CU_ASSERT_FALSE(tsk_is_kas_error(ret));\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Version too old */\n    version[0] = TSK_FILE_FORMAT_VERSION_MAJOR - 1;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_VERSION_TOO_OLD);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Version too new */\n    version[0] = TSK_FILE_FORMAT_VERSION_MAJOR + 1;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_VERSION_TOO_NEW);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    version[0] = TSK_FILE_FORMAT_VERSION_MAJOR;\n\n    /* Bad version length */\n    write_cols[1].len = 0;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[1].len = 2;\n\n    /* Bad format name length */\n    write_cols[0].len = 0;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[0].len = TSK_FILE_FORMAT_NAME_LENGTH;\n\n    /* Bad format name */\n    format_name[0] = 'X';\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    format_name[0] = 't';\n\n    /* Bad type for sequence length. */\n    write_cols[2].type = KAS_FLOAT32;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_TRUE(tsk_is_kas_error(ret));\n    CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_TYPE_MISMATCH);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[2].type = KAS_FLOAT64;\n\n    /* Bad length for sequence length. */\n    write_cols[2].len = 2;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[2].len = 1;\n\n    /* Bad value for sequence length. */\n    L[0] = -1;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    L[0] = 1;\n\n    /* Wrong length for uuid */\n    write_cols[3].len = 1;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[3].len = (tsk_size_t) uuid_size;\n\n    /* Missing keys */\n    for (j = 0; j < sizeof(write_cols) / sizeof(*write_cols) - 1; j++) {\n        ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        write_table_cols(&store, write_cols, j);\n        ret = kastore_close(&store);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);\n        ret = tsk_table_collection_free(&tables);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n}\n\nstatic void\ntest_missing_optional_column_pairs(void)\n{\n    int ret;\n    size_t j;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t1, t2;\n    const char *required_cols[][2] = { { \"edges/metadata\", \"edges/metadata_offset\" },\n        { \"migrations/metadata\", \"migrations/metadata_offset\" },\n        { \"individuals/parents\", \"individuals/parents_offset\" } };\n    const char *drop_cols[2];\n\n    ret = tsk_treeseq_copy_tables(ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) {\n        drop_cols[0] = required_cols[j][0];\n        copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);\n        tsk_table_collection_free(&t2);\n\n        drop_cols[0] = required_cols[j][1];\n        copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);\n        tsk_table_collection_free(&t2);\n\n        drop_cols[0] = required_cols[j][0];\n        drop_cols[1] = required_cols[j][1];\n        copy_store_drop_columns(ts, 2, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));\n        tsk_table_collection_free(&t2);\n    }\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_missing_required_column_pairs(void)\n{\n    int ret;\n    size_t j;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t;\n    const char *required_cols[][2] = {\n        { \"individuals/location\", \"individuals/location_offset\" },\n        { \"individuals/metadata\", \"individuals/metadata_offset\" },\n        { \"mutations/derived_state\", \"mutations/derived_state_offset\" },\n        { \"mutations/metadata\", \"mutations/metadata_offset\" },\n        { \"nodes/metadata\", \"nodes/metadata_offset\" },\n        { \"populations/metadata\", \"populations/metadata_offset\" },\n        { \"provenances/record\", \"provenances/record_offset\" },\n        { \"provenances/timestamp\", \"provenances/timestamp_offset\" },\n        { \"sites/ancestral_state\", \"sites/ancestral_state_offset\" },\n        { \"sites/metadata\", \"sites/metadata_offset\" },\n    };\n    const char *drop_cols[2];\n\n    for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) {\n        drop_cols[0] = required_cols[j][0];\n        copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);\n        tsk_table_collection_free(&t);\n\n        drop_cols[0] = required_cols[j][1];\n        copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);\n        tsk_table_collection_free(&t);\n\n        copy_store_drop_columns(ts, 2, required_cols[j], _tmp_file_name);\n        ret = tsk_table_collection_load(&t, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);\n        tsk_table_collection_free(&t);\n    }\n\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\nverify_bad_offset_columns(tsk_treeseq_t *ts, const char *offset_col)\n{\n    int ret = 0;\n    kastore_t store;\n    tsk_table_collection_t tables;\n    uint32_t *offset_array, *offset_copy;\n    size_t offset_len;\n    int type;\n    uint32_t data_len;\n\n    ret = tsk_treeseq_dump(ts, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_open(&store, _tmp_file_name, \"r\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = kastore_gets(&store, offset_col, (void **) &offset_array, &offset_len, &type);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(type, KAS_UINT32);\n    offset_copy = malloc(offset_len * sizeof(*offset_array));\n    CU_ASSERT_FATAL(offset_copy != NULL);\n    tsk_memcpy(offset_copy, offset_array, offset_len * sizeof(*offset_array));\n    data_len = offset_array[offset_len - 1];\n    CU_ASSERT_TRUE(data_len > 0);\n    kastore_close(&store);\n\n    offset_copy[0] = UINT32_MAX;\n    copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_UINT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_OFFSET);\n    tsk_table_collection_free(&tables);\n\n    offset_copy[0] = 0;\n    offset_copy[offset_len - 1] = 0;\n    copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_UINT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_OFFSET);\n    tsk_table_collection_free(&tables);\n\n    offset_copy[offset_len - 1] = data_len + 1;\n    copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_UINT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_OFFSET);\n    tsk_table_collection_free(&tables);\n\n    copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, offset_col, NULL, 0, KAS_UINT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    tsk_table_collection_free(&tables);\n\n    copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_table_collection_free(&tables);\n\n    free(offset_copy);\n}\n\nstatic void\ntest_bad_offset_columns(void)\n{\n    size_t j;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    /* We exclude \"provenances/timestamp_offset\" here because there are no\n     * non-ragged columns in the provenances table, so this doesn't quite\n     * fit into the same pattern as the other tables */\n    const char *cols[] = {\n        \"edges/metadata_offset\",\n        \"migrations/metadata_offset\",\n        \"individuals/location_offset\",\n        \"individuals/parents_offset\",\n        \"individuals/metadata_offset\",\n        \"mutations/derived_state_offset\",\n        \"mutations/metadata_offset\",\n        \"nodes/metadata_offset\",\n        \"populations/metadata_offset\",\n        \"provenances/record_offset\",\n        \"sites/ancestral_state_offset\",\n        \"sites/metadata_offset\",\n    };\n\n    for (j = 0; j < sizeof(cols) / sizeof(*cols); j++) {\n        verify_bad_offset_columns(ts, cols[j]);\n    }\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_force_offset_64(void)\n{\n    int ret;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t1;\n    tsk_table_collection_t t2;\n    kastore_t store;\n    kaitem_t *item;\n    const char *suffix;\n    const char *offset_str = \"_offset\";\n    int num_found = 0;\n    size_t j;\n\n    ret = tsk_treeseq_dump(ts, _tmp_file_name, TSK_DUMP_FORCE_OFFSET_64);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = kastore_open(&store, _tmp_file_name, \"r\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < store.num_items; j++) {\n        item = &store.items[j];\n        /* Does the key end in \"_offset\"? */\n        if (item->key_len > strlen(offset_str)) {\n            suffix = item->key + (item->key_len - strlen(offset_str));\n            if (strncmp(suffix, offset_str, strlen(offset_str)) == 0) {\n                CU_ASSERT_EQUAL(item->type, KAS_UINT64);\n                num_found++;\n            }\n        }\n    }\n    CU_ASSERT_TRUE(num_found > 0);\n    kastore_close(&store);\n\n    ret = tsk_table_collection_load(&t1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_copy_tables(ts, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_missing_indexes(void)\n{\n    int ret;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t1, t2;\n    const char *cols[]\n        = { \"indexes/edge_insertion_order\", \"indexes/edge_removal_order\" };\n    const char *drop_cols[2];\n\n    ret = tsk_treeseq_copy_tables(ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    drop_cols[0] = cols[0];\n    copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);\n    tsk_table_collection_free(&t2);\n\n    drop_cols[0] = cols[1];\n    copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);\n    tsk_table_collection_free(&t2);\n\n    copy_store_drop_columns(ts, 2, cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&t2, 0));\n    tsk_table_collection_free(&t2);\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_malformed_indexes(void)\n{\n    int ret;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts2;\n    tsk_size_t num_edges = tsk_treeseq_get_num_edges(ts);\n    tsk_id_t *bad_index = tsk_calloc(num_edges, sizeof(tsk_id_t));\n    tsk_id_t *good_index = tsk_calloc(num_edges, sizeof(tsk_id_t));\n    kastore_t store;\n    const char *cols[]\n        = { \"indexes/edge_insertion_order\", \"indexes/edge_removal_order\" };\n\n    CU_ASSERT_FATAL(bad_index != NULL);\n    CU_ASSERT_FATAL(good_index != NULL);\n\n    /* If both columns are not the same length as the number of edges we\n     * should raise an error */\n    copy_store_drop_columns(ts, 2, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[0], NULL, 0, TSK_ID_STORAGE_TYPE, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[1], NULL, 0, TSK_ID_STORAGE_TYPE, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    tsk_table_collection_free(&tables);\n\n    bad_index[0] = -1;\n\n    copy_store_drop_columns(ts, 2, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(\n        &store, cols[0], good_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(\n        &store, cols[1], bad_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts2);\n\n    copy_store_drop_columns(ts, 2, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(\n        &store, cols[0], bad_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(\n        &store, cols[1], good_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts2);\n\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[0], bad_index, (size_t) num_edges, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_treeseq_free(&ts2);\n\n    free(good_index);\n    free(bad_index);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_missing_reference_sequence(void)\n{\n    int ret;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t1, t2;\n    const char *cols[] = { \"reference_sequence/data\", \"reference_sequence/url\",\n        \"reference_sequence/metadata_schema\", \"reference_sequence/metadata\" };\n\n    CU_ASSERT_TRUE(tsk_treeseq_has_reference_sequence(ts));\n\n    ret = tsk_treeseq_copy_tables(ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&t2));\n    tsk_table_collection_free(&t2);\n\n    copy_store_drop_columns(ts, 2, cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&t2));\n    tsk_table_collection_free(&t2);\n\n    copy_store_drop_columns(ts, 3, cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&t2));\n    tsk_table_collection_free(&t2);\n\n    /* Dropping all the columns gives us a NULL reference_sequence, though */\n    copy_store_drop_columns(ts, 4, cols, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&t2));\n    tsk_table_collection_free(&t2);\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_bad_column_types(void)\n{\n    int ret;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t tables;\n    tsk_size_t num_edges = tsk_treeseq_get_num_edges(ts);\n    /* make sure we have enough memory in all cases */\n    tsk_id_t *col_memory = tsk_calloc(num_edges + 1, sizeof(double));\n    kastore_t store;\n    const char *cols[1];\n\n    CU_ASSERT_FATAL(col_memory != NULL);\n\n    cols[0] = \"edges/left\";\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[0], col_memory, (size_t) num_edges, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_table_collection_free(&tables);\n\n    cols[0] = \"edges/metadata_offset\";\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(\n        &store, cols[0], col_memory, (size_t) num_edges + 1, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_table_collection_free(&tables);\n\n    cols[0] = \"edges/metadata\";\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[0], NULL, 0, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_table_collection_free(&tables);\n\n    cols[0] = \"edges/metadata_schema\";\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[0], NULL, 0, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_table_collection_free(&tables);\n\n    cols[0] = \"reference_sequence/metadata\";\n    copy_store_drop_columns(ts, 1, cols, _tmp_file_name);\n    ret = kastore_open(&store, _tmp_file_name, \"a\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_puts(&store, cols[0], NULL, 0, KAS_FLOAT32, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    tsk_table_collection_free(&tables);\n\n    free(col_memory);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_missing_required_columns(void)\n{\n    int ret;\n    size_t j;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t;\n    const char *required_cols[] = {\n        \"edges/child\",\n        \"edges/left\",\n        \"edges/parent\",\n        \"edges/right\",\n        \"format/name\",\n        \"format/version\",\n        \"individuals/flags\",\n        \"migrations/dest\",\n        \"migrations/left\",\n        \"migrations/node\",\n        \"migrations/right\",\n        \"migrations/source\",\n        \"migrations/time\",\n        \"mutations/node\",\n        \"mutations/parent\",\n        \"mutations/site\",\n        \"nodes/flags\",\n        \"nodes/individual\",\n        \"nodes/population\",\n        \"nodes/time\",\n        \"sequence_length\",\n        \"sites/position\",\n        \"uuid\",\n    };\n    const char *drop_cols[1];\n\n    for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) {\n        drop_cols[0] = required_cols[j];\n        copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);\n        tsk_table_collection_free(&t);\n    }\n\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_metadata_schemas_optional(void)\n{\n    int ret;\n    size_t j;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t1, t2;\n    const char *cols[] = {\n        \"metadata\",\n        \"metadata_schema\",\n        \"reference_sequence/metadata\",\n        \"reference_sequence/metadata_schema\",\n        \"individuals/metadata_schema\",\n        \"populations/metadata_schema\",\n        \"nodes/metadata_schema\",\n        \"edges/metadata_schema\",\n        \"sites/metadata_schema\",\n        \"mutations/metadata_schema\",\n        \"migrations/metadata_schema\",\n    };\n    const char *drop_cols[1];\n\n    ret = tsk_treeseq_copy_tables(ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < sizeof(cols) / sizeof(*cols); j++) {\n        drop_cols[0] = cols[j];\n        copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);\n        ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        /* metadata schemas are included in data comparisons */\n        CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));\n        tsk_table_collection_free(&t2);\n    }\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\n/* This test is problematic on windows because of the different off_t\n * types. Doesn't seem worth the trouble of getting it working.\n */\nstatic void\ntest_load_bad_file_formats(void)\n{\n#if !defined(_WIN32)\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    int ret, ret2;\n    off_t offset;\n    FILE *f;\n\n    /* A zero byte file is TSK_ERR_EOF */\n    f = fopen(_tmp_file_name, \"w+\");\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    ret2 = tsk_treeseq_loadf(&ts, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret2);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n    fclose(f);\n\n    for (offset = 1; offset < 100; offset++) {\n        ret = tsk_table_collection_init(&tables, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tables.sequence_length = 1.0;\n        ret = tsk_table_collection_dump(&tables, _tmp_file_name, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        ret2 = truncate(_tmp_file_name, offset);\n        CU_ASSERT_EQUAL_FATAL(ret2, 0);\n        ret = tsk_table_collection_load(&tables, _tmp_file_name, TSK_NO_INIT);\n        CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_BAD_FILE_FORMAT);\n        tsk_table_collection_free(&tables);\n    }\n#endif\n}\n\nstatic void\ntest_load_errors(void)\n{\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    int ret, ret2;\n    const char *str;\n    FILE *f;\n\n    ret = tsk_table_collection_load(&tables, \"/\", 0);\n    ret2 = tsk_treeseq_load(&ts, \"/\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret2);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    CU_ASSERT_STRING_EQUAL(str, strerror(EISDIR));\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_table_collection_load(&tables, \"/bin/theres_no_way_this_file_exists\", 0);\n    ret2 = tsk_treeseq_load(&ts, \"/bin/theres_no_way_this_file_exists\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret2);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    CU_ASSERT_STRING_EQUAL(str, strerror(ENOENT));\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_table_collection_load(&tables, \"/bin/sh\", 0);\n    ret2 = tsk_treeseq_load(&ts, \"/bin/sh\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret2);\n    CU_ASSERT_TRUE(tsk_is_kas_error(ret));\n    CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_BAD_FILE_FORMAT);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    tsk_table_collection_free(&tables);\n\n    /* open a file in the wrong mode */\n    f = fopen(_tmp_file_name, \"w\");\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    ret2 = tsk_treeseq_loadf(&ts, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret2);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    CU_ASSERT_STRING_EQUAL(str, strerror(EBADF));\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n    fclose(f);\n}\n\nstatic void\ntest_load_eof(void)\n{\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t tables;\n    int ret;\n    FILE *f;\n\n    f = fopen(_tmp_file_name, \"w+\");\n    CU_ASSERT_NOT_EQUAL(f, NULL);\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    fclose(f);\n    tsk_table_collection_free(&tables);\n\n    /* Reading an empty file also returns EOF */\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    tsk_table_collection_free(&tables);\n\n    f = fopen(_tmp_file_name, \"w+\");\n    CU_ASSERT_NOT_EQUAL(f, NULL);\n    ret = tsk_treeseq_dumpf(ts, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Reading from the end of the stream gives EOF */\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    tsk_table_collection_free(&tables);\n\n    /* Reading the start of the stream is fine */\n    fseek(f, 0, SEEK_SET);\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_table_collection_free(&tables);\n\n    /* And we should be back to the end of the stream */\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    tsk_table_collection_free(&tables);\n\n    /* Trying to read the same end stream should give the same\n     * result. */\n    ret = tsk_table_collection_loadf(&tables, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    tsk_table_collection_free(&tables);\n\n    /* A previously init'd tables should be good too */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_loadf(&tables, f, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);\n    tsk_table_collection_free(&tables);\n\n    fclose(f);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_dump_errors(void)\n{\n    tsk_table_collection_t tables;\n    int ret;\n    FILE *f;\n    const char *str;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_table_collection_dump(&tables, \"/\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    CU_ASSERT_STRING_EQUAL(str, strerror(EISDIR));\n\n    /* We're assuming that we don't have write access to /bin, so don't run this\n     * as root! */\n    ret = tsk_table_collection_dump(&tables, \"/bin/theres_no_way_this_file_exists\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    CU_ASSERT_TRUE(\n        (strcmp(str, strerror(EACCES)) == 0) || (strcmp(str, strerror(EPERM)) == 0));\n\n    /* open a file in the wrong mode */\n    f = fopen(_tmp_file_name, \"r\");\n    ret = tsk_table_collection_dumpf(&tables, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);\n    str = tsk_strerror(ret);\n    CU_ASSERT_TRUE(strlen(str) > 0);\n    CU_ASSERT_STRING_EQUAL(str, strerror(EBADF));\n    fclose(f);\n\n    /* We'd like to catch close errors also, but it's hard to provoke them\n     * without intercepting calls to fclose() */\n\n    tsk_table_collection_free(&tables);\n}\n\n/* FIXME these are good tests, but we want to make them more general so that\n * they can be applied to other tables.*/\nstatic void\ntest_load_node_table_errors(void)\n{\n    char format_name[TSK_FILE_FORMAT_NAME_LENGTH];\n    size_t uuid_size = 36;\n    char uuid[uuid_size];\n    double L = 1;\n    double time = 0;\n    double flags = 0;\n    tsk_id_t population = 0;\n    tsk_id_t individual = 0;\n    int8_t metadata = 0;\n    uint32_t metadata_offset[] = { 0, 1 };\n    uint32_t version[2]\n        = { TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR };\n    write_table_col_t write_cols[] = {\n        { \"nodes/time\", (void *) &time, 1, KAS_FLOAT64 },\n        { \"nodes/flags\", (void *) &flags, 1, TSK_FLAGS_STORAGE_TYPE },\n        { \"nodes/population\", (void *) &population, 1, TSK_ID_STORAGE_TYPE },\n        { \"nodes/individual\", (void *) &individual, 1, TSK_ID_STORAGE_TYPE },\n        { \"nodes/metadata\", (void *) &metadata, 1, KAS_UINT8 },\n        { \"nodes/metadata_offset\", (void *) metadata_offset, 2, KAS_UINT32 },\n        { \"format/name\", (void *) format_name, sizeof(format_name), KAS_INT8 },\n        { \"format/version\", (void *) version, 2, KAS_UINT32 },\n        { \"uuid\", (void *) uuid, uuid_size, KAS_INT8 },\n        { \"sequence_length\", (void *) &L, 1, KAS_FLOAT64 },\n    };\n    tsk_table_collection_t tables;\n    kastore_t store;\n    int ret;\n\n    tsk_memcpy(format_name, TSK_FILE_FORMAT_NAME, sizeof(format_name));\n    /* Note: this will fail if we ever start parsing the form of the UUID */\n    tsk_memset(uuid, 0, uuid_size);\n\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    /* We've only defined the format headers and nodes, so we should fail immediately\n     * after with key not found */\n    CU_ASSERT_FALSE(tsk_is_kas_error(ret));\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Wrong type for time */\n    write_cols[0].type = KAS_INT64;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[0].type = KAS_FLOAT64;\n\n    /* Wrong length for flags */\n    write_cols[1].len = 0;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[1].len = 1;\n\n    /* Wrong length for metadata offset */\n    write_cols[5].len = 1;\n    ret = kastore_open(&store, _tmp_file_name, \"w\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));\n    ret = kastore_close(&store);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    write_cols[5].len = 2;\n}\n\nstatic void\ntest_example_round_trip(void)\n{\n    int ret;\n    tsk_treeseq_t *ts1 = caterpillar_tree(5, 3, 3);\n    tsk_treeseq_t ts2;\n    tsk_table_collection_t t1, t2;\n    FILE *f;\n\n    ret = tsk_treeseq_copy_tables(ts1, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    /* Reading multiple times into the same tables with TSK_NO_INIT is supported. */\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    /* Do the same thing with treeseq API */\n    remove(_tmp_file_name);\n    ret = tsk_treeseq_dump(ts1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));\n    tsk_treeseq_free(&ts2);\n\n    /* Use loadf form */\n    f = fopen(_tmp_file_name, \"w+\");\n    ret = tsk_table_collection_dumpf(&t1, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    fseek(f, 0, SEEK_SET);\n    ret = tsk_table_collection_loadf(&t2, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n    fclose(f);\n\n    /* Do the same thing with treeseq API */\n    f = fopen(_tmp_file_name, \"w+\");\n    ret = tsk_treeseq_dumpf(ts1, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    fseek(f, 0, SEEK_SET);\n    ret = tsk_treeseq_loadf(&ts2, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));\n    tsk_treeseq_free(&ts2);\n\n    fclose(f);\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts1);\n    free(ts1);\n}\n\nstatic void\ntest_multiple_round_trip(void)\n{\n    int ret;\n    tsk_size_t j;\n    tsk_size_t num_examples = 10;\n    tsk_treeseq_t *ts;\n    tsk_table_collection_t in_tables[num_examples];\n    tsk_table_collection_t out_tables;\n    FILE *f = fopen(_tmp_file_name, \"w+\");\n\n    CU_ASSERT_NOT_EQUAL_FATAL(f, NULL);\n\n    for (j = 0; j < num_examples; j++) {\n        ts = caterpillar_tree(5 + j, 3 + j, 3 + j);\n        ret = tsk_treeseq_copy_tables(ts, &in_tables[j], 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_treeseq_dumpf(ts, f, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_treeseq_free(ts);\n        free(ts);\n    }\n\n    fseek(f, 0, SEEK_SET);\n    for (j = 0; j < num_examples; j++) {\n        ret = tsk_table_collection_loadf(&out_tables, f, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0));\n        tsk_table_collection_free(&out_tables);\n    }\n\n    /* Can do the same with the same set of previously init'd tables. */\n    ret = tsk_table_collection_init(&out_tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    fseek(f, 0, SEEK_SET);\n    for (j = 0; j < num_examples; j++) {\n        ret = tsk_table_collection_loadf(&out_tables, f, TSK_NO_INIT);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0));\n    }\n    tsk_table_collection_free(&out_tables);\n\n    /* Can also read until EOF to do the same thing */\n    ret = tsk_table_collection_init(&out_tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    fseek(f, 0, SEEK_SET);\n    j = 0;\n    while (true) {\n        ret = tsk_table_collection_loadf(&out_tables, f, TSK_NO_INIT);\n        if (ret == TSK_ERR_EOF) {\n            break;\n        }\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0));\n        j++;\n    }\n    tsk_table_collection_free(&out_tables);\n    CU_ASSERT_EQUAL_FATAL(j, num_examples);\n\n    for (j = 0; j < num_examples; j++) {\n        tsk_table_collection_free(&in_tables[j]);\n    }\n    fclose(f);\n}\n\nstatic void\ntest_copy_store_drop_columns(void)\n{\n    int ret;\n    tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);\n    tsk_table_collection_t t1, t2;\n\n    ret = tsk_treeseq_copy_tables(ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Dropping no columns should have no effect on the data */\n    copy_store_drop_columns(ts, 0, NULL, _tmp_file_name);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_skip_tables(void)\n{\n    int ret;\n    tsk_treeseq_t *ts1 = caterpillar_tree(5, 3, 3);\n    tsk_treeseq_t ts2;\n    tsk_table_collection_t t1, t2;\n    FILE *f;\n\n    ret = tsk_treeseq_dump(ts1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&t1, _tmp_file_name, TSK_LOAD_SKIP_TABLES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts1->tables, TSK_CMP_IGNORE_TABLES));\n    CU_ASSERT_EQUAL(t1.individuals.num_rows, 0);\n    CU_ASSERT_EQUAL(t1.nodes.num_rows, 0);\n    CU_ASSERT_EQUAL(t1.edges.num_rows, 0);\n    CU_ASSERT_EQUAL(t1.migrations.num_rows, 0);\n    CU_ASSERT_EQUAL(t1.sites.num_rows, 0);\n    CU_ASSERT_EQUAL(t1.mutations.num_rows, 0);\n    CU_ASSERT_EQUAL(t1.provenances.num_rows, 0);\n\n    /* Test _loadf code path as well */\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_TABLES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    fclose(f);\n    tsk_table_collection_free(&t2);\n\n    /* Without TSK_LOAD_SKIP_TABLES we reach end of file */\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_table_collection_loadf(&t2, f, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(fgetc(f), EOF);\n    fclose(f);\n    tsk_table_collection_free(&t2);\n\n    /* Setting TSK_LOAD_SKIP_TABLES only reads part of the file */\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_TABLES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NOT_EQUAL(fgetc(f), EOF);\n    fclose(f);\n    tsk_table_collection_free(&t2);\n\n    /* We should be able to make a tree sequence */\n    ret = tsk_treeseq_init(&ts2, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts2);\n\n    /* Do the same thing with treeseq API */\n    ret = tsk_treeseq_load(&ts2, _tmp_file_name, TSK_LOAD_SKIP_TABLES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));\n    tsk_treeseq_free(&ts2);\n\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_treeseq_loadf(&ts2, f, TSK_LOAD_SKIP_TABLES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));\n    fclose(f);\n    tsk_treeseq_free(&ts2);\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts1);\n    free(ts1);\n}\n\nstatic void\ntest_skip_reference_sequence(void)\n{\n    int ret;\n    tsk_treeseq_t *ts1 = caterpillar_tree(5, 3, 3);\n    tsk_treeseq_t ts2;\n    tsk_table_collection_t t1, t2;\n    FILE *f;\n\n    CU_ASSERT_TRUE(tsk_treeseq_has_reference_sequence(ts1));\n\n    ret = tsk_treeseq_dump(ts1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(\n        &t1, _tmp_file_name, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, ts1->tables, 0));\n    CU_ASSERT_TRUE(tsk_table_collection_equals(\n        &t1, ts1->tables, TSK_CMP_IGNORE_REFERENCE_SEQUENCE));\n    CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&t1));\n\n    /* Test _loadf code path as well */\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    fclose(f);\n    tsk_table_collection_free(&t2);\n\n    /* Setting TSK_LOAD_SKIP_REFERENCE_SEQUENCE only reads part of the file */\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NOT_EQUAL(fgetc(f), EOF);\n    fclose(f);\n    tsk_table_collection_free(&t2);\n\n    /* We should be able to make a tree sequence */\n    ret = tsk_treeseq_init(&ts2, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts2);\n\n    /* Do the same thing with treeseq API */\n    ret = tsk_treeseq_load(&ts2, _tmp_file_name, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));\n    tsk_treeseq_free(&ts2);\n\n    f = fopen(_tmp_file_name, \"r+\");\n    ret = tsk_treeseq_loadf(&ts2, f, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));\n    fclose(f);\n    tsk_treeseq_free(&ts2);\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(ts1);\n    free(ts1);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_format_data_load_errors\", test_format_data_load_errors },\n        { \"test_missing_indexes\", test_missing_indexes },\n        { \"test_malformed_indexes\", test_malformed_indexes },\n        { \"test_missing_reference_sequence\", test_missing_reference_sequence },\n        { \"test_bad_column_types\", test_bad_column_types },\n        { \"test_missing_required_columns\", test_missing_required_columns },\n        { \"test_missing_optional_column_pairs\", test_missing_optional_column_pairs },\n        { \"test_missing_required_column_pairs\", test_missing_required_column_pairs },\n        { \"test_bad_offset_columns\", test_bad_offset_columns },\n        { \"test_force_offset_64\", test_force_offset_64 },\n        { \"test_metadata_schemas_optional\", test_metadata_schemas_optional },\n        { \"test_load_node_table_errors\", test_load_node_table_errors },\n        { \"test_load_bad_file_formats\", test_load_bad_file_formats },\n        { \"test_load_errors\", test_load_errors },\n        { \"test_load_eof\", test_load_eof },\n        { \"test_dump_errors\", test_dump_errors },\n        { \"test_example_round_trip\", test_example_round_trip },\n        { \"test_multiple_round_trip\", test_multiple_round_trip },\n        { \"test_copy_store_drop_columns\", test_copy_store_drop_columns },\n        { \"test_skip_tables\", test_skip_tables },\n        { \"test_skip_reference_sequence\", test_skip_reference_sequence },\n        { NULL, NULL },\n    };\n\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_genotypes.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2022 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <tskit/genotypes.h>\n\n#include <unistd.h>\n#include <stdlib.h>\n#include <string.h>\n\nstatic void\ntest_simplest_missing_data(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\";\n    const char *sites = \"0.0    A\\n\";\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, \"\", NULL, sites, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_TRUE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);\n    CU_ASSERT_EQUAL(var->genotypes[1], TSK_MISSING_DATA);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_missing_data_user_alleles(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\";\n    const char *sites = \"0.0    A\\n\";\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    const char *alleles[] = { \"A\", NULL };\n    int ret;\n    tsk_id_t samples[] = { 0 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, \"\", NULL, sites, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_TRUE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);\n    CU_ASSERT_EQUAL(var->genotypes[1], TSK_MISSING_DATA);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_TRUE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_missing_data_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\";\n    const char *sites = \"0.0    A\\n\";\n    const char *mutations = \"0    0     T   -1\\n\";\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    const char *alleles[] = { \"A\", \"T\", NULL };\n    int ret;\n    tsk_id_t samples[] = { 0 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, \"\", NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 1);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_TRUE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], TSK_MISSING_DATA);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_missing_data_mutations_all_samples(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\";\n    const char *sites = \"0.0    A\\n\";\n    const char *mutations = \"0    0     T   -1\\n\"\n                            \"0    1     T   -1\\n\";\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    const char *alleles[] = { \"A\", \"T\", NULL };\n    int ret;\n    tsk_id_t samples[] = { 0, 1 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, \"\", NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_user_alleles(void)\n{\n    int ret = 0;\n    const char *sites = \"0.0    G\\n\"\n                        \"0.125  A\\n\"\n                        \"0.25   C\\n\"\n                        \"0.5    A\\n\";\n    const char *mutations\n        = \"0    0     T   -1\\n\"\n          \"1    1     C   -1\\n\"\n          \"2    0     G   -1\\n\"\n          \"2    1     A   -1\\n\"\n          \"2    2     T   -1\\n\" // A bunch of different sample mutations\n          \"3    4     T   -1\\n\"\n          \"3    0     A   5\\n\"; // A back mutation from T -> A\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    const char *alleles[] = { \"A\", \"C\", \"G\", \"T\", NULL };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        sites, mutations, NULL, NULL, 0);\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_EQUAL_FATAL(var->num_alleles, 4);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[2], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[3], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"C\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[2], \"G\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[3], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 3);\n    CU_ASSERT_EQUAL(var->genotypes[1], 2);\n    CU_ASSERT_EQUAL(var->genotypes[2], 2);\n    CU_ASSERT_EQUAL(var->genotypes[3], 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.125);\n    CU_ASSERT_EQUAL(var->num_alleles, 4);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[2], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[3], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"C\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[2], \"G\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[3], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.25);\n    CU_ASSERT_EQUAL(var->num_alleles, 4);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[2], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[3], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"C\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[2], \"G\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[3], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 2);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[2], 3);\n    CU_ASSERT_EQUAL(var->genotypes[3], 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.5);\n    CU_ASSERT_EQUAL(var->num_alleles, 4);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[2], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[3], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"C\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[2], \"G\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[3], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 3);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_vargen_free(&vargen);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_char_alphabet(void)\n{\n    int ret = 0;\n    const char *sites = \"0.0    A\\n\"\n                        \"0.125  A\\n\"\n                        \"0.25   C\\n\"\n                        \"0.5    A\\n\";\n    const char *mutations\n        = \"0    0     T   -1\\n\"\n          \"1    1     TTTAAGGG   -1\\n\"\n          \"2    0     G   -1\\n\"\n          \"2    1     AT  -1\\n\"\n          \"2    2     T   -1\\n\" // A bunch of different sample mutations\n          \"3    4     T   -1\\n\"\n          \"3    0     A   5\\n\"; // A back mutation from T -> A\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        sites, mutations, NULL, NULL, 0);\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.125);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 8);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"TTTAAGGG\", 8);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.25);\n    CU_ASSERT_EQUAL(var->num_alleles, 4);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[2], 2);\n    CU_ASSERT_EQUAL(var->allele_lengths[3], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"C\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"G\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[2], \"AT\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[3], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 2);\n    CU_ASSERT_EQUAL(var->genotypes[2], 3);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->site.position, 0.5);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_EQUAL(var->allele_lengths[0], 1);\n    CU_ASSERT_EQUAL(var->allele_lengths[1], 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"A\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"T\", 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_vargen_free(&vargen);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_binary_alphabet(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[2], 1);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 1);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 1);\n    CU_ASSERT_EQUAL(var->genotypes[3], 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 2);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 4);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_free(&vargen);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_non_samples(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    /* Non sample internal nodes we want to generate genotypes for */\n    tsk_id_t samples[] = { 4, 5 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 1);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 2);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 4);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_free(&vargen);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 1);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 2);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 4);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_free(&vargen);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_isolated_internal_node(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    /* Two sample nodes (0,1), plus an internal non-sample node u=2 with no edges */\n    const char *nodes = \"1  0   -1   -1\\n\"\n                        \"1  0   -1   -1\\n\"\n                        \"0  1   -1   -1\\n\";\n    const char *sites = \"2.0    A\\n\"\n                        \"9.0    T\\n\";\n    tsk_id_t samples[] = { 2 };\n\n    tsk_treeseq_from_text(&ts, 10, nodes, \"\", NULL, sites, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_sites(&ts), 2);\n\n    /* Default options (isolated_as_missing=True): internal node is isolated everywhere\n     */\n    ret = tsk_vargen_init(&vargen, &ts, samples, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_TRUE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_TRUE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    /* Impute missing (isolated_as_missing=False): genotypes should be ancestral (0) */\n    ret = tsk_vargen_init(&vargen, &ts, samples, 1, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_FALSE(var->has_missing_data);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_id_t samples[] = { 0, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    samples[0] = -1;\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_vargen_free(&vargen);\n\n    samples[0] = 7;\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_vargen_free(&vargen);\n\n    samples[0] = 3;\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_user_alleles_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n\n    /* The maximium number of alleles is 127. We need space for one more plus the\n     * sentinel */\n    const char *acct_alleles[] = { \"A\", \"C\", \"G\", \"T\", NULL };\n    const char *zero_allele[] = { \"0\", NULL };\n    const char *no_alleles[] = { NULL };\n    tsk_id_t samples[] = { 0, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    /* these are 0/1 alleles */\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, acct_alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);\n    tsk_vargen_free(&vargen);\n\n    /* pass just the 0 allele alleles at all */\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, zero_allele, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);\n    tsk_vargen_free(&vargen);\n\n    /* Empty allele list is an error */\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, no_alleles, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ZERO_ALLELES);\n    tsk_vargen_free(&vargen);\n\n    // for (j = 0; j < max_alleles; j++) {\n    //     many_alleles[j] = \"0\";\n    // }\n    // many_alleles[128] = NULL;\n    // ret = tsk_vargen_init(&vargen, &ts, samples, 2, many_alleles, 0);\n    // CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TOO_MANY_ALLELES);\n    // tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_subsample(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    tsk_id_t samples[] = { 0, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 1);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 2);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 4);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_free(&vargen);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Zero samples */\n    ret = tsk_vargen_init(&vargen, &ts, samples, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 1);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 2);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 4);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_free(&vargen);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_many_alleles(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n    tsk_size_t num_alleles = 257;\n    tsk_id_t j, k;\n    char alleles[num_alleles];\n    tsk_table_collection_t tables;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_FATAL(ret == 0);\n    tsk_treeseq_free(&ts);\n    tsk_memset(alleles, 'X', (size_t) num_alleles);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, \"Y\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    /* Add j mutations over a single node. */\n    for (j = 0; j < (tsk_id_t) num_alleles; j++) {\n        /* When j = 0 we get a parent of -1, which is the NULL_NODE */\n        ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, j - 1,\n            TSK_UNKNOWN_TIME, alleles, (tsk_size_t) j, NULL, 0);\n        CU_ASSERT_FATAL(ret_id >= 0);\n        ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_vargen_print_state(&vargen, _devnull);\n        ret = tsk_vargen_next(&vargen, &var);\n        /* We have j + 2 alleles. So, if j >= 126, we should fail with 8bit\n         * genotypes */\n        // if (j >= 126) {\n        //     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TOO_MANY_ALLELES);\n        // } else {\n        CU_ASSERT_EQUAL_FATAL(ret, 1);\n        CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"Y\", 1);\n        for (k = 1; k < (tsk_id_t) var->num_alleles; k++) {\n            CU_ASSERT_EQUAL(k - 1, (tsk_id_t) var->allele_lengths[k]);\n            CU_ASSERT_NSTRING_EQUAL(var->alleles[k], alleles, var->allele_lengths[k]);\n        }\n        CU_ASSERT_EQUAL(var->num_alleles, (tsk_size_t) j + 2);\n        // }\n        ret = tsk_vargen_free(&vargen);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        tsk_treeseq_free(&ts);\n    }\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_silent_mutations(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n\n    /* Add some silent mutations */\n    const char *silent_ex_sites = \"0.125  0\\n\"\n                                  \"0.25   0\\n\"\n                                  \"0.5    0\\n\"\n                                  \"0.75    0\\n\";\n    /* site, node, derived_state, [parent, time] */\n    const char *silent_ex_mutations\n        = \"0    5     0   -1\\n\" /* Silent mutation over mutation 1 */\n          \"0    2     1   0\\n\"\n          \"1    4     1   -1\\n\"\n          \"1    0     0   2\\n\"  /* Back mutation over 0 */\n          \"1    0     0   3\\n\"  /* Silent mutation under back mutation */\n          \"2    0     1   -1\\n\" /* recurrent mutations over samples */\n          \"2    1     1   -1\\n\"\n          \"2    2     1   -1\\n\"\n          \"2    3     1   -1\\n\"\n          \"3    0     0   -1\\n\" /* Single silent mutation at a site */\n        ;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        silent_ex_sites, silent_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[2], 1);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 1);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 3);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 1);\n    CU_ASSERT_EQUAL(var->genotypes[3], 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 2);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 4);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->genotypes[3], 0);\n    CU_ASSERT_EQUAL(var->num_alleles, 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->site.id, 3);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 1);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_free(&vargen);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_multiple_variant_decode(void)\n{\n    int ret = 0;\n    tsk_size_t k;\n    tsk_id_t s;\n    tsk_treeseq_t ts;\n    tsk_variant_t var;\n    tsk_variant_t var_subset;\n    tsk_id_t samples[] = { 0, 1, 3 };\n    int32_t genos[12];\n    int32_t genos_expected[] = { 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1 };\n    int32_t genos_subset[9];\n    int32_t genos_expected_subset[] = { 0, 0, 0, 1, 0, 0, 0, 1, 1 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    /* Sample subset, no sample lists */\n    ret = tsk_variant_init(&var_subset, &ts, samples, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (s = 0; (tsk_size_t) s < tsk_treeseq_get_num_sites(&ts); s++) {\n        ret = tsk_variant_decode(&var_subset, s, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        for (k = 0; k < 3; ++k) {\n            genos_subset[k + ((tsk_size_t) s * 3)] = var_subset.genotypes[k];\n        }\n    }\n    CU_ASSERT_EQUAL(\n        0, memcmp(genos_subset, genos_expected_subset, sizeof(genos_expected_subset)));\n    memset(genos_subset, 0, sizeof(genos_subset));\n\n    /* All samples with TSK_SAMPLE_LISTS, at the same time as a subset */\n    s = 0;\n    ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (s = 0; (tsk_size_t) s < tsk_treeseq_get_num_sites(&ts); s++) {\n        ret = tsk_variant_decode(&var, s, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        for (k = 0; k < 4; ++k) {\n            genos[k + ((tsk_size_t) s * 4)] = var.genotypes[k];\n        }\n        ret = tsk_variant_decode(&var_subset, s, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        for (k = 0; k < 3; ++k) {\n            genos_subset[k + ((tsk_size_t) s * 3)] = var_subset.genotypes[k];\n        }\n    }\n    CU_ASSERT_EQUAL(\n        0, memcmp(genos_subset, genos_expected_subset, sizeof(genos_expected_subset)));\n    CU_ASSERT_EQUAL(0, memcmp(genos, genos_expected, sizeof(genos_expected)));\n    tsk_variant_free(&var);\n    tsk_variant_free(&var_subset);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_variant_decode_errors(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_variant_t var;\n    tsk_id_t bad_samples[] = { 0, 1, 32 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    /* Bad samples */\n    ret = tsk_variant_init(&var, &ts, bad_samples, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_variant_free(&var);\n\n    /* Site out of bounds */\n    ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_variant_decode(&var, 42, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tsk_variant_free(&var);\n\n    tsk_treeseq_free(&ts);\n}\n\n/* Checks that the data represented by the specified pair of variants exposed\n * by the public API is equal. */\nstatic void\nassert_variants_equal(const tsk_variant_t *v1, const tsk_variant_t *v2)\n{\n    tsk_size_t j;\n\n    CU_ASSERT_EQUAL(v1->num_samples, v2->num_samples);\n    CU_ASSERT_EQUAL(v1->num_alleles, v2->num_alleles);\n    for (j = 0; j < v1->num_alleles; j++) {\n        CU_ASSERT_EQUAL(v1->allele_lengths[j], v2->allele_lengths[j]);\n        CU_ASSERT_EQUAL(\n            0, memcmp(v1->alleles[j], v2->alleles[j], (size_t) v1->allele_lengths[j]));\n    }\n    CU_ASSERT_EQUAL(v1->has_missing_data, v2->has_missing_data);\n    CU_ASSERT_EQUAL(v1->num_samples, v2->num_samples);\n    for (j = 0; j < v1->num_samples; j++) {\n        CU_ASSERT_EQUAL(v1->samples[j], v2->samples[j]);\n        CU_ASSERT_EQUAL(v1->genotypes[j], v2->genotypes[j]);\n    }\n    CU_ASSERT_EQUAL(v1->site.id, v2->site.id);\n    CU_ASSERT_EQUAL(v1->site.position, v2->site.position);\n    CU_ASSERT_EQUAL(v1->site.ancestral_state_length, v2->site.ancestral_state_length);\n    CU_ASSERT_EQUAL(0, memcmp(v1->site.ancestral_state, v2->site.ancestral_state,\n                           (size_t) v1->site.ancestral_state_length));\n    CU_ASSERT_EQUAL(v1->site.mutations_length, v2->site.mutations_length);\n    /* We're pointing back to the same memory for embedded pointers */\n    CU_ASSERT_EQUAL(v1->site.mutations, v2->site.mutations);\n    CU_ASSERT_EQUAL(v1->site.metadata, v2->site.metadata);\n}\n\nstatic void\ntest_variant_copy(void)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_treeseq_t ts;\n    tsk_variant_t var, var_copy;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {\n        ret = tsk_variant_decode(&var, (tsk_id_t) j, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_variant_restricted_copy(&var, &var_copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_variant_decode(&var_copy, 0, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);\n\n        assert_variants_equal(&var, &var_copy);\n        CU_ASSERT_EQUAL(\n            0, memcmp(var.tree_sequence, var.tree_sequence, sizeof(*var.tree_sequence)));\n        CU_ASSERT_EQUAL(0, memcmp(&var.tree, &var_copy.tree, sizeof(tsk_tree_t)));\n        CU_ASSERT_EQUAL(0, memcmp(&var.site, &var_copy.site, sizeof(tsk_site_t)));\n        CU_ASSERT_EQUAL(var_copy.traversal_stack, NULL);\n        CU_ASSERT_EQUAL(var_copy.sample_index_map, NULL);\n        CU_ASSERT_EQUAL(var_copy.alt_samples, NULL);\n        CU_ASSERT_EQUAL(var_copy.alt_sample_index_map, NULL);\n        tsk_variant_free(&var_copy);\n    }\n\n    tsk_variant_free(&var);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_variant_copy_long_alleles(void)\n{\n    int ret = 0;\n    const char *sites = \"0.0    GGGG\\n\"\n                        \"0.125  AAAAA\\n\"\n                        \"0.25   CCCCCC\\n\"\n                        \"0.5    AAAAAAA\\n\";\n    const char *mutations = \"0    0     TTT       -1\\n\"\n                            \"1    1     CCCCCCC   -1\\n\"\n                            \"2    0     GGGGGGG   -1\\n\"\n                            \"2    1     AG        -1\\n\"\n                            \"2    2     TTTTTTT   -1\\n\"\n                            \"3    4     TGGGGGG   -1\\n\"\n                            \"3    0     AAA       5\\n\";\n    tsk_treeseq_t ts;\n    tsk_variant_t var, copy, copy_of_copy;\n    tsk_size_t j;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {\n        ret = tsk_variant_decode(&var, (tsk_id_t) j, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_variant_restricted_copy(&var, &copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        assert_variants_equal(&var, &copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_variant_restricted_copy(&copy, &copy_of_copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        assert_variants_equal(&var, &copy_of_copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_variant_free(&copy_of_copy);\n        tsk_variant_free(&copy);\n    }\n    tsk_variant_free(&var);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_variant_copy_memory_management(void)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_treeseq_t ts;\n    tsk_variant_t *var;\n    tsk_variant_t copy, copy_of_copy;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {\n        var = tsk_malloc(sizeof(*var));\n        CU_ASSERT_FATAL(var != NULL);\n        ret = tsk_variant_init(var, &ts, NULL, 0, NULL, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_variant_decode(var, (tsk_id_t) j, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_variant_restricted_copy(var, &copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        assert_variants_equal(var, &copy);\n        /* Free var to make sure we're not pointing to any of the original memory. */\n        tsk_variant_free(var);\n        free(var);\n        ret = tsk_variant_restricted_copy(&copy, &copy_of_copy);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        assert_variants_equal(&copy, &copy_of_copy);\n        ret = tsk_variant_decode(&copy, 0, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);\n        ret = tsk_variant_decode(&copy_of_copy, 0, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);\n\n        tsk_variant_free(&copy);\n        tsk_variant_free(&copy_of_copy);\n    }\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\nbuild_balanced_three_example_align(tsk_treeseq_t *ts)\n{\n    const char *nodes = \"1  0   0  -1\\n\"\n                        \"1  0   0  -1\\n\"\n                        \"1  0   0  -1\\n\"\n                        \"0  1   0  -1\\n\"\n                        \"0  2   0  -1\\n\";\n    const char *edges = \"0  10  3  1,2\\n\"\n                        \"0  10  4  0,3\\n\";\n    const char *sites = \"2  A\\n\"\n                        \"9  T\\n\";\n    const char *mutations = \"0  0  G\\n\"\n                            \"1  3  C\\n\";\n    tsk_treeseq_from_text(ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n}\n\nstatic void\ntest_alignments_basic_default(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    const tsk_id_t *samples;\n    tsk_size_t n, L;\n    char *buf;\n\n    build_balanced_three_example_align(&ts);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n    L = 10;\n    buf = tsk_malloc(n * L);\n    CU_ASSERT_PTR_NOT_NULL_FATAL(buf);\n\n    ret = tsk_treeseq_decode_alignments(\n        &ts, ref, (tsk_size_t) strlen(ref), samples, n, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, \"NNGNNNNNNT\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, \"NNANNNNNNC\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 2 * L, \"NNANNNNNNC\", L);\n\n    tsk_safe_free(buf);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_reference_sequence(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"0123456789\";\n    const tsk_id_t *samples;\n    tsk_size_t n, L = 10;\n    char *buf = NULL;\n\n    build_balanced_three_example_align(&ts);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n    buf = tsk_malloc(n * L);\n    CU_ASSERT_PTR_NOT_NULL_FATAL(buf);\n\n    ret = tsk_treeseq_decode_alignments(\n        &ts, ref, (tsk_size_t) strlen(ref), samples, n, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, \"01G345678T\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, \"01A345678C\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 2 * L, \"01A345678C\", L);\n\n    tsk_safe_free(buf);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_partial_isolation(void)\n{\n    int ret = 0;\n    const char *nodes = \"0  1  0  -1\\n\"  /* parent */\n                        \"1  0  0  -1\\n\"; /* child sample */\n    const char *edges = \"3  7  0  1\\n\";\n    const char *sites = \"5  A\\n\";\n    const char *mutations = \"0  1  G\\n\";\n    tsk_treeseq_t ts;\n    const char *ref = \"0123456789\";\n    tsk_id_t node = 1;\n    char buf[10];\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf, \"NNN34G6NNN\", 10);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 2, 8, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf, \"N34G6N\", 6);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_return_code_truncated_interval(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0  0  -1\\n\"\n                        \"1  0  0  -1\\n\"\n                        \"0  1  0  -1\\n\";\n    /* Tree over [0,5): samples 0 and 1 under root 2.\n     * Tree over [5,10): only sample 1 under root 2 (sample 0 isolated). */\n    const char *edges = \"0  5  2  0\\n\"\n                        \"0 10  2  1\\n\";\n    tsk_treeseq_t ts;\n    const tsk_id_t *samples;\n    tsk_size_t n;\n    char buf[10];\n    const char *ref = \"NNNNNNNNNN\";\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf + 0 * 5, \"NNNNN\", 5);\n    CU_ASSERT_NSTRING_EQUAL(buf + 1 * 5, \"NNNNN\", 5);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_invalid_allele_length(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0  0  -1\\n\";\n    const char *edges = \"\";\n    const char *sites = \"2  AC\\n\";\n    tsk_treeseq_t ts;\n    tsk_id_t node = 0;\n    char buf[5];\n    const char *ref = \"NNNNN\";\n\n    tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, sites, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ALLELE_LENGTH);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_bad_reference_length(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0  0  -1\\n\";\n    const char *edges = \"\";\n    tsk_treeseq_t ts;\n    tsk_id_t node = 0;\n    char buf[5];\n    const char *ref = \"NNNNN\";\n\n    tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 4, &node, 1, 0, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_non_integer_bounds(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0  0  -1\\n\";\n    const char *edges = \"\";\n    tsk_treeseq_t ts;\n    tsk_id_t node = 0;\n    char buf[5];\n    const char *ref = \"NNNNN\";\n\n    tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0.5, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_discrete_genome_required(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0  0  -1\\n\";\n    const char *edges = \"\";\n    const char *sites = \"0.5  A\\n\";\n    tsk_treeseq_t ts;\n    tsk_id_t node = 0;\n    char buf[5];\n    const char *ref = \"NNNNN\";\n\n    tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, sites, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_null_reference(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const tsk_id_t *samples;\n    tsk_size_t n;\n    char buf[10];\n\n    build_balanced_three_example_align(&ts);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n\n    ret = tsk_treeseq_decode_alignments(&ts, NULL, 10, samples, n, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_null_nodes_or_buf(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    const tsk_id_t *samples;\n    tsk_size_t n;\n    char buf[30];\n\n    build_balanced_three_example_align(&ts);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, NULL, n, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 10, 'N', NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_node_out_of_bounds(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    tsk_id_t bad_node;\n    char buf[10];\n\n    build_balanced_three_example_align(&ts);\n    bad_node = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &bad_node, 1, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_isolated_as_not_missing(void)\n{\n    int ret = 0;\n    const char *nodes = \"0  1  0  -1\\n\"  /* parent */\n                        \"1  0  0  -1\\n\"; /* child sample */\n    const char *edges = \"3  7  0  1\\n\";\n    const char *sites = \"5  A\\n\";\n    const char *mutations = \"0  1  G\\n\";\n    tsk_treeseq_t ts;\n    const char *ref = \"0123456789\";\n    tsk_id_t node = 1;\n    char buf[10];\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(\n        &ts, ref, 10, &node, 1, 0, 10, 'N', buf, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf, \"01234G6789\", 10);\n\n    ret = tsk_treeseq_decode_alignments(\n        &ts, ref, 10, &node, 1, 2, 8, 'N', buf, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf, \"234G67\", 6);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_internal_node_non_sample(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    tsk_id_t node = 3; /* internal node */\n    char buf[10];\n\n    build_balanced_three_example_align(&ts);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf, \"NNANNNNNNC\", 10);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_missing_char_collision(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0  0  -1\\n\";\n    const char *edges = \"\";\n    const char *sites = \"2  A\\n\";\n    const char *mutations = \"0  0  Q\\n\"; /* allele equals missing char */\n    tsk_treeseq_t ts;\n    tsk_id_t node = 0;\n    char buf[5];\n    const char *ref = \"NNNNN\";\n\n    tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0, 5, 'Q', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MISSING_CHAR_COLLISION);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_zero_nodes_ok(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    build_balanced_three_example_align(&ts);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, NULL, 0, 0, 10, 'N', NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_bad_bounds_cases(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    tsk_id_t node = 0;\n    char buf[1];\n    build_balanced_three_example_align(&ts);\n    /* left == right invalid */\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 5, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* left negative */\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, -1, 5, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_order_preserved(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    const char *ref = \"NNNNNNNNNN\";\n    tsk_id_t nodes_arr[3];\n    char buf[30];\n    tsk_size_t L = 10;\n\n    build_balanced_three_example_align(&ts);\n    nodes_arr[0] = 2;\n    nodes_arr[1] = 0;\n    nodes_arr[2] = 1;\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, nodes_arr, 3, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, \"NNANNNNNNC\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, \"NNGNNNNNNT\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 2 * L, \"NNANNNNNNC\", L);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_missing_char_custom(void)\n{\n    int ret = 0;\n    const char *nodes = \"0  1  0  -1\\n\"  /* parent */\n                        \"1  0  0  -1\\n\"; /* child sample */\n    const char *edges = \"3  7  0  1\\n\";\n    const char *sites = \"5  A\\n\";\n    const char *mutations = \"0  1  G\\n\";\n    tsk_treeseq_t ts;\n    const char *ref = \"0123456789\";\n    tsk_id_t node = 1;\n    char buf[10];\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 0, 10, 'Q', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_NSTRING_EQUAL(buf, \"QQQ34G6QQQ\", 10);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_embedded_null_reference(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    char ref[10] = { '0', '1', '2', '3', '\\0', '5', '6', '7', '8', '9' };\n    const tsk_id_t *samples;\n    tsk_size_t n, L = 10;\n    char *buf = NULL;\n    char exp0[10] = { '0', '1', 'G', '3', '\\0', '5', '6', '7', '8', 'T' };\n    char exp1[10] = { '0', '1', 'A', '3', '\\0', '5', '6', '7', '8', 'C' };\n    char exp2[10] = { '0', '1', 'A', '3', '\\0', '5', '6', '7', '8', 'C' };\n\n    build_balanced_three_example_align(&ts);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n    buf = tsk_malloc(n * L);\n    CU_ASSERT_PTR_NOT_NULL_FATAL(buf);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 10, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(0, memcmp(buf + 0 * L, exp0, (size_t) L));\n    CU_ASSERT_EQUAL(0, memcmp(buf + 1 * L, exp1, (size_t) L));\n    CU_ASSERT_EQUAL(0, memcmp(buf + 2 * L, exp2, (size_t) L));\n    tsk_safe_free(buf);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_alignments_growing_allele_buffer(void)\n{\n    /* Verify we handle sites with increasing allele counts without per-site realloc\n     * churn. */\n    int ret = 0;\n    /* Two samples (0,1) with root 2 over [0,3). */\n    const char *nodes = \"1  0  0  -1\\n\"\n                        \"1  0  0  -1\\n\"\n                        \"0  1  0  -1\\n\";\n    const char *edges = \"0  3  2  0\\n\"\n                        \"0  3  2  1\\n\";\n    /* Sites: pos 1 ancestral A; pos 2 ancestral A. */\n    const char *sites = \"1  A\\n\"\n                        \"2  A\\n\";\n    /* Mutations: at site 0 (pos 1) node 0 -> G (2 alleles total).\n     * at site 1 (pos 2) node 0 -> C and node 1 -> T (3 alleles total). */\n    const char *mutations = \"0  0  G\\n\"\n                            \"1  0  C\\n\"\n                            \"1  1  T\\n\";\n\n    tsk_treeseq_t ts;\n    const char *ref = \"NNN\";\n    const tsk_id_t *samples;\n    tsk_size_t n, L = 3;\n    char *buf = NULL;\n\n    tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    samples = tsk_treeseq_get_samples(&ts);\n    n = tsk_treeseq_get_num_samples(&ts);\n    buf = tsk_malloc(n * L);\n    CU_ASSERT_PTR_NOT_NULL_FATAL(buf);\n\n    ret = tsk_treeseq_decode_alignments(&ts, ref, 3, samples, n, 0, 3, 'N', buf, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Expected: sample 0 -> NGC; sample 1 -> NAT */\n    CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, \"NGC\", L);\n    CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, \"NAT\", L);\n\n    tsk_safe_free(buf);\n    tsk_treeseq_free(&ts);\n}\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_simplest_missing_data\", test_simplest_missing_data },\n        { \"test_simplest_missing_data_user_alleles\",\n            test_simplest_missing_data_user_alleles },\n        { \"test_simplest_missing_data_mutations\", test_simplest_missing_data_mutations },\n        { \"test_simplest_missing_data_mutations_all_samples\",\n            test_simplest_missing_data_mutations_all_samples },\n        { \"test_single_tree_user_alleles\", test_single_tree_user_alleles },\n        { \"test_single_tree_char_alphabet\", test_single_tree_char_alphabet },\n        { \"test_single_tree_binary_alphabet\", test_single_tree_binary_alphabet },\n        { \"test_single_tree_non_samples\", test_single_tree_non_samples },\n        { \"test_isolated_internal_node\", test_isolated_internal_node },\n        { \"test_single_tree_errors\", test_single_tree_errors },\n        { \"test_single_tree_user_alleles_errors\", test_single_tree_user_alleles_errors },\n        { \"test_single_tree_subsample\", test_single_tree_subsample },\n        { \"test_single_tree_many_alleles\", test_single_tree_many_alleles },\n        { \"test_single_tree_silent_mutations\", test_single_tree_silent_mutations },\n        { \"test_multiple_variant_decode\", test_multiple_variant_decode },\n        { \"test_variant_decode_errors\", test_variant_decode_errors },\n        { \"test_variant_copy\", test_variant_copy },\n        { \"test_variant_copy_long_alleles\", test_variant_copy_long_alleles },\n        { \"test_variant_copy_memory_management\", test_variant_copy_memory_management },\n        { \"test_alignments_basic_default\", test_alignments_basic_default },\n        { \"test_alignments_reference_sequence\", test_alignments_reference_sequence },\n        { \"test_alignments_partial_isolation\", test_alignments_partial_isolation },\n        { \"test_alignments_return_code_truncated_interval\",\n            test_alignments_return_code_truncated_interval },\n        { \"test_alignments_isolated_as_not_missing\",\n            test_alignments_isolated_as_not_missing },\n        { \"test_alignments_internal_node_non_sample\",\n            test_alignments_internal_node_non_sample },\n        { \"test_alignments_invalid_allele_length\",\n            test_alignments_invalid_allele_length },\n        { \"test_alignments_bad_reference_length\", test_alignments_bad_reference_length },\n        { \"test_alignments_non_integer_bounds\", test_alignments_non_integer_bounds },\n        { \"test_alignments_discrete_genome_required\",\n            test_alignments_discrete_genome_required },\n        { \"test_alignments_null_reference\", test_alignments_null_reference },\n        { \"test_alignments_null_nodes_or_buf\", test_alignments_null_nodes_or_buf },\n        { \"test_alignments_node_out_of_bounds\", test_alignments_node_out_of_bounds },\n        { \"test_alignments_missing_char_collision\",\n            test_alignments_missing_char_collision },\n        { \"test_alignments_zero_nodes_ok\", test_alignments_zero_nodes_ok },\n        { \"test_alignments_bad_bounds_cases\", test_alignments_bad_bounds_cases },\n        { \"test_alignments_order_preserved\", test_alignments_order_preserved },\n        { \"test_alignments_missing_char_custom\", test_alignments_missing_char_custom },\n        { \"test_alignments_embedded_null_reference\",\n            test_alignments_embedded_null_reference },\n        { \"test_alignments_growing_allele_buffer\",\n            test_alignments_growing_allele_buffer },\n        { NULL, NULL },\n    };\n\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_haplotype_matching.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2023 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <tskit/haplotype_matching.h>\n\n#include <unistd.h>\n#include <stdlib.h>\n\nstatic void\ntest_single_tree_missing_alleles(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t forward;\n    tsk_viterbi_matrix_t viterbi;\n\n    double rho[] = { 0, 0.25, 0.25 };\n    double mu[] = { 0.125, 0.125, 0.125 };\n    int32_t h[] = { 0, 0, 0, 0 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, TSK_ALLELES_ACGT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&forward);\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_exact_match(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t forward;\n    tsk_viterbi_matrix_t viterbi;\n\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0, 0, 0 };\n    int32_t h[] = { 1, 1, 1 };\n    tsk_id_t path[3];\n    double decoded_compressed_matrix[12];\n    unsigned int precision;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_compressed_matrix_print_state(&forward, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    ret = tsk_compressed_matrix_decode(&forward, decoded_compressed_matrix);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(path[0], 2);\n    CU_ASSERT_EQUAL(path[1], 1);\n    CU_ASSERT_EQUAL(path[2], 1);\n\n    /* Should get the same answer at lower precision */\n    for (precision = 1; precision < 24; precision++) {\n        ret = tsk_ls_hmm_set_precision(&ls_hmm, precision);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n        tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n        ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(path[0], 2);\n        CU_ASSERT_EQUAL(path[1], 1);\n        CU_ASSERT_EQUAL(path[2], 1);\n    }\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&forward);\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_missing_haplotype_data(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t forward;\n    tsk_viterbi_matrix_t viterbi;\n\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0, 0, 0 };\n    int32_t h[] = { 1, TSK_MISSING_DATA, 1 };\n    tsk_id_t path[3];\n    double decoded_compressed_matrix[12];\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_compressed_matrix_print_state(&forward, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    ret = tsk_compressed_matrix_decode(&forward, decoded_compressed_matrix);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(path[0], 2);\n    CU_ASSERT_EQUAL(path[1], 2);\n    CU_ASSERT_EQUAL(path[2], 2);\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&forward);\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_match_impossible(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t forward;\n    tsk_compressed_matrix_t backward;\n    tsk_viterbi_matrix_t viterbi;\n\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0, 0, 0 };\n    /* This haplotype can't happen with a mutation rate of 0 */\n    int32_t h[] = { 0, 0, 0 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MATCH_IMPOSSIBLE);\n    tsk_compressed_matrix_print_state(&forward, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MATCH_IMPOSSIBLE);\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n\n    ret = tsk_ls_hmm_backward(&ls_hmm, h, forward.normalisation_factor, &backward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MATCH_IMPOSSIBLE);\n    tsk_compressed_matrix_print_state(&backward, _devnull);\n    /* tsk_compressed_matrix_print_state(&forward, stdout); */\n    /* tsk_compressed_matrix_print_state(&backward, stdout); */\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&forward);\n    tsk_compressed_matrix_free(&backward);\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_errors(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t forward;\n    tsk_viterbi_matrix_t viterbi;\n    tsk_value_transition_t T[1];\n    double decoded[3][4];\n\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0, 0, 0 };\n    int32_t h[] = { 0, 0, 0 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_viterbi_matrix_init(&viterbi, &ts, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_compressed_matrix_init(&forward, &ts, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    viterbi.matrix.tree_sequence = NULL;\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    viterbi.matrix.tree_sequence = &ts;\n\n    forward.tree_sequence = NULL;\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    forward.tree_sequence = &ts;\n\n    ret = tsk_compressed_matrix_store_site(&forward, 3, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    ret = tsk_compressed_matrix_store_site(&forward, 4, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n\n    /* FIXME disabling this tests for now because we filter out negative\n     * nodes when storing now, to accomodate some oddness in the initial\n     * conditions of the backward matrix. */\n    /* T[0].tree_node = -1; */\n    /* T[0].value = 0; */\n    /* ret = tsk_compressed_matrix_store_site(&forward, 0, 1, 1, T); */\n    /* CU_ASSERT_EQUAL_FATAL(ret, 0); */\n    /* ret = tsk_compressed_matrix_decode(&forward, (double *) decoded); */\n    /* CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); */\n\n    T[0].tree_node = 7;\n    T[0].value = 0;\n    ret = tsk_compressed_matrix_store_site(&forward, 0, 1, 1, T);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_compressed_matrix_decode(&forward, (double *) decoded);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&forward);\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_compressed_matrix(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_compressed_matrix_t matrix;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_size_t max_transitions = 1024;\n    tsk_value_transition_t T[max_transitions];\n    double decoded[3][4];\n    int j;\n\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0.1, 0.1, 0.1 };\n    int32_t h[] = { 0, 0, 0 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_compressed_matrix_init(&matrix, &ts, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_compressed_matrix_print_state(&matrix, _devnull);\n\n    T[0].tree_node = 6;\n    T[0].value = 0;\n    for (j = 0; j < 3; j++) {\n        T[1].tree_node = j;\n        T[1].value = 1;\n        ret = tsk_compressed_matrix_store_site(&matrix, j, 1.0, 2, T);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n    tsk_compressed_matrix_print_state(&matrix, _devnull);\n\n    ret = tsk_compressed_matrix_decode(&matrix, (double *) decoded);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(decoded[0][0], 1.0);\n    CU_ASSERT_EQUAL(decoded[0][1], 0.0);\n    CU_ASSERT_EQUAL(decoded[0][2], 0.0);\n    CU_ASSERT_EQUAL(decoded[1][0], 0.0);\n    CU_ASSERT_EQUAL(decoded[1][1], 1.0);\n    CU_ASSERT_EQUAL(decoded[1][2], 0.0);\n    CU_ASSERT_EQUAL(decoded[2][0], 0.0);\n    CU_ASSERT_EQUAL(decoded[2][1], 0.0);\n    CU_ASSERT_EQUAL(decoded[2][2], 1.0);\n\n    /* Cleared matrix should be zero everywhere */\n    tsk_compressed_matrix_clear(&matrix);\n    ret = tsk_compressed_matrix_decode(&matrix, (double *) decoded);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < 3; j++) {\n        CU_ASSERT_EQUAL(decoded[j][0], 0.0);\n        CU_ASSERT_EQUAL(decoded[j][1], 0.0);\n        CU_ASSERT_EQUAL(decoded[j][2], 0.0);\n    }\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &matrix, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_compressed_matrix_print_state(&matrix, _devnull);\n    ret = tsk_compressed_matrix_decode(&matrix, (double *) decoded);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_compressed_matrix_free(&matrix);\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_viterbi_matrix(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_viterbi_matrix_t viterbi;\n    tsk_ls_hmm_t ls_hmm;\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0, 0, 0 };\n    int32_t h[] = { 1, 1, 1 };\n    tsk_id_t path[3];\n    tsk_value_transition_t T[2];\n    int j;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_viterbi_matrix_init(&viterbi, &ts, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_VITERBI_MATRIX);\n\n    T[0].tree_node = 6;\n    T[0].value = 0;\n    T[1].tree_node = 1;\n    T[1].value = 1;\n    for (j = 0; j < 3; j++) {\n        ret = tsk_compressed_matrix_store_site(&viterbi.matrix, j, 1.0, 2, T);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        /* We need to have one record per site, so we put in a record\n         * at the root saying we don't need to recombine */\n        ret = tsk_viterbi_matrix_add_recombination_required(&viterbi, j, 6, false);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(path[0], 1);\n    CU_ASSERT_EQUAL_FATAL(path[1], 1);\n    CU_ASSERT_EQUAL_FATAL(path[2], 1);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_viterbi_matrix_clear(&viterbi);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_VITERBI_MATRIX);\n\n    tsk_viterbi_matrix_free(&viterbi);\n\n    ret = tsk_viterbi_matrix_init(&viterbi, &ts, 1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Make sure we hit the realloc case for recombination records */\n    for (j = 0; j < 100; j++) {\n        ret = tsk_viterbi_matrix_add_recombination_required(&viterbi, 0, 6, false);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_multi_tree_exact_match(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t forward, backward;\n    tsk_viterbi_matrix_t viterbi;\n\n    double rho[] = { 0.0, 0.25, 0.25 };\n    double mu[] = { 0, 0, 0 };\n    int32_t h[] = { 1, 1, 1 };\n    tsk_id_t path[3];\n    double decoded_compressed_matrix[12];\n    unsigned int precision;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    tsk_compressed_matrix_print_state(&forward, _devnull);\n    ret = tsk_compressed_matrix_decode(&forward, decoded_compressed_matrix);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ls_hmm_backward(&ls_hmm, h, forward.normalisation_factor, &backward, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    tsk_compressed_matrix_print_state(&backward, _devnull);\n    ret = tsk_compressed_matrix_decode(&backward, decoded_compressed_matrix);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n    tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n    ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(path[0], 2);\n    CU_ASSERT_EQUAL(path[1], 0);\n    CU_ASSERT_EQUAL(path[2], 1);\n\n    /* Should get the same answer at lower precision */\n    for (precision = 4; precision < 24; precision++) {\n        ret = tsk_ls_hmm_set_precision(&ls_hmm, precision);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_viterbi_matrix_print_state(&viterbi, _devnull);\n        tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n        ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(path[0], 2);\n        CU_ASSERT_EQUAL(path[1], 0);\n        CU_ASSERT_EQUAL(path[2], 1);\n    }\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&forward);\n    tsk_compressed_matrix_free(&backward);\n    tsk_viterbi_matrix_free(&viterbi);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_multi_tree_errors(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_compressed_matrix_t forward;\n    tsk_value_transition_t T[1];\n    double decoded[3][4];\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_compressed_matrix_init(&forward, &ts, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* We want a tree node that is not in the first tree */\n    T[0].tree_node = 7;\n    T[0].value = 0;\n    ret = tsk_compressed_matrix_store_site(&forward, 0, 1, 1, T);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_compressed_matrix_decode(&forward, (double *) decoded);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COMPRESSED_MATRIX_NODE);\n\n    tsk_compressed_matrix_free(&forward);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_caterpillar_tree_many_values(void)\n{\n    int ret = 0;\n    tsk_ls_hmm_t ls_hmm;\n    tsk_compressed_matrix_t matrix;\n    double rho[] = { 0.1, 0.1, 0.1, 0.1, 0.1 };\n    double mu[] = { 0.0, 0.0, 0.0, 0.0, 0.0 };\n    int32_t h[] = { 0, 0, 0, 0, 0 };\n    tsk_size_t n[] = {\n        8,\n        16,\n        32,\n        64,\n    };\n    tsk_treeseq_t *ts;\n    tsk_size_t j;\n\n    for (j = 0; j < sizeof(n) / sizeof(*n); j++) {\n        ts = caterpillar_tree(n[j], 5, n[j] - 2);\n        ret = tsk_ls_hmm_init(&ls_hmm, ts, rho, mu, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_compressed_matrix_init(&matrix, ts, 1 << 10, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_ls_hmm_forward(&ls_hmm, h, &matrix, TSK_NO_INIT);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_compressed_matrix_print_state(&matrix, _devnull);\n        tsk_ls_hmm_print_state(&ls_hmm, _devnull);\n\n        tsk_ls_hmm_free(&ls_hmm);\n        tsk_compressed_matrix_free(&matrix);\n        tsk_treeseq_free(ts);\n        free(ts);\n    }\n\n    j = 40;\n    ts = caterpillar_tree(j, 5, j - 2);\n    ret = tsk_ls_hmm_init(&ls_hmm, ts, rho, mu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_compressed_matrix_init(&matrix, ts, 1 << 20, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Short circuit this value so we can run the test */\n    ls_hmm.max_parsimony_words = 0;\n    ret = tsk_ls_hmm_forward(&ls_hmm, h, &matrix, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TOO_MANY_VALUES);\n\n    tsk_ls_hmm_free(&ls_hmm);\n    tsk_compressed_matrix_free(&matrix);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_single_tree_missing_alleles\", test_single_tree_missing_alleles },\n        { \"test_single_tree_exact_match\", test_single_tree_exact_match },\n        { \"test_single_tree_missing_haplotype_data\",\n            test_single_tree_missing_haplotype_data },\n        { \"test_single_tree_match_impossible\", test_single_tree_match_impossible },\n        { \"test_single_tree_errors\", test_single_tree_errors },\n        { \"test_single_tree_compressed_matrix\", test_single_tree_compressed_matrix },\n        { \"test_single_tree_viterbi_matrix\", test_single_tree_viterbi_matrix },\n\n        { \"test_multi_tree_exact_match\", test_multi_tree_exact_match },\n        { \"test_multi_tree_errors\", test_multi_tree_errors },\n\n        { \"test_caterpillar_tree_many_values\", test_caterpillar_tree_many_values },\n        { NULL, NULL },\n    };\n\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_minimal_cpp.cpp",
    "content": "/* * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/* Minimal tests to make sure that tskit at least compiles and links\n * in a simple C++ program */\n\n#include <iostream>\n#include <cassert>\n#include <sstream>\n#include <vector>\n#include <algorithm>\n#include <cstring>\n\n#include <tskit.h>\n\nusing namespace std;\n\nvoid\ntest_kas_strerror()\n{\n    std::cout << \"test_kas_strerror\" << endl;\n    std::ostringstream o;\n    o << kas_strerror(KAS_ERR_NO_MEMORY);\n    assert(std::string(\"Out of memory\").compare(o.str()) == 0);\n}\n\nvoid\ntest_strerror()\n{\n    std::cout << \"test_strerror\" << endl;\n    std::ostringstream o;\n    o << tsk_strerror(TSK_ERR_NO_MEMORY);\n    assert(std::string(\"Out of memory. (TSK_ERR_NO_MEMORY)\").compare(o.str()) == 0);\n}\n\nvoid\ntest_load_error()\n{\n    std::cout << \"test_open_error\" << endl;\n    tsk_treeseq_t ts;\n    int ret = tsk_treeseq_load(&ts, \"no such file\", 0);\n    assert(ret == TSK_ERR_IO);\n    tsk_treeseq_free(&ts);\n}\n\nvoid\ntest_table_basics()\n{\n    std::cout << \"test_table_basics\" << endl;\n    tsk_table_collection_t tables;\n    int ret = tsk_table_collection_init(&tables, 0);\n    assert(ret == 0);\n\n    ret = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    assert(ret == 0);\n    ret = tsk_node_table_add_row(&tables.nodes, 0, 2.0, TSK_NULL, TSK_NULL, NULL, 0);\n    assert(ret == 1);\n    assert(tables.nodes.num_rows == 2);\n\n    tsk_table_collection_free(&tables);\n}\n\n/* A definition of sort_edges that uses C++ std::sort and inlining of the\n * comparison function to achieve significantly better performance than\n * the builtin method in tskit.\n */\nint\ncpp_sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)\n{\n    struct _edge {\n        double left, right;\n        tsk_id_t parent, child;\n\n        _edge(double l, double r, tsk_id_t p, tsk_id_t c)\n            : left{ l }, right{ r }, parent{ p }, child{ c }\n        {\n        }\n    };\n    tsk_edge_table_t *edges = &sorter->tables->edges;\n    const double *node_time = sorter->tables->nodes.time;\n    std::vector<_edge> sorted_edges;\n    size_t num_edges = edges->num_rows;\n    size_t j;\n\n    /* This is the comparison function.  We cannot define an\n     * operator < for _edge because we need to bind the node times\n     * so we have to use a functional method. This is a copy of the cmp\n     * from fwdpp.  Only difference is the final time comparison\n     * (fwdpp table times go forwards). */\n    const auto cmp = [&node_time](const _edge &lhs, const _edge &rhs) {\n        auto tl = node_time[lhs.parent];\n        auto tr = node_time[rhs.parent];\n        if (tl == tr) {\n            if (lhs.parent == rhs.parent) {\n                if (lhs.child == rhs.child) {\n                    return lhs.left < rhs.left;\n                }\n                return lhs.child < rhs.child;\n            }\n            return lhs.parent < rhs.parent;\n        }\n        return tl < tr;\n    };\n\n    assert(start == 0);\n    /* Let's not bother with metadata */\n    assert(edges->metadata_length == 0);\n\n    sorted_edges.reserve(num_edges);\n    for (j = 0; j < num_edges; j++) {\n        sorted_edges.emplace_back(\n            edges->left[j], edges->right[j], edges->parent[j], edges->child[j]);\n    }\n\n    std::sort(begin(sorted_edges), end(sorted_edges), cmp);\n\n    for (j = 0; j < num_edges; j++) {\n        edges->left[j] = sorted_edges[j].left;\n        edges->right[j] = sorted_edges[j].right;\n        edges->parent[j] = sorted_edges[j].parent;\n        edges->child[j] = sorted_edges[j].child;\n    }\n    return 0;\n}\n\nvoid\ntest_edge_sorting()\n{\n    std::cout << \"test_edge_sorting\" << endl;\n    tsk_table_collection_t tables;\n    tsk_id_t n = 10;\n    tsk_id_t j;\n    int ret = tsk_table_collection_init(&tables, 0);\n    assert(ret == 0);\n\n    tables.sequence_length = 1.0;\n    /* Make a stick tree */\n    /* Add nodes and edges */\n    for (j = 0; j < n; j++) {\n        ret = tsk_node_table_add_row(\n            &tables.nodes, TSK_NODE_IS_SAMPLE, j + 1, TSK_NULL, TSK_NULL, NULL, 0);\n        assert(ret == j);\n    }\n    for (j = n - 1; j > 0; j--) {\n        tsk_edge_table_add_row(&tables.edges, 0, 1, j, j - 1, NULL, 0);\n    }\n    assert(tables.nodes.num_rows == (tsk_size_t) n);\n    assert(tables.edges.num_rows == (tsk_size_t) n - 1);\n\n    /* Make sure the edges are unsorted */\n    /* Not calling TSK_CHECK_TREES so casting is safe */\n    ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING);\n    assert(ret == TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);\n\n    /* Sort the tables */\n    tsk_table_sorter_t sorter;\n    ret = tsk_table_sorter_init(&sorter, &tables, 0);\n    assert(ret == 0);\n    /* Set the sort_edges to our local C++ version. We could also set some\n     * persistent state in sorter.params if we wanted to. */\n    sorter.sort_edges = cpp_sort_edges;\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    assert(ret == 0);\n    tsk_table_sorter_free(&sorter);\n\n    /* Make sure the edges are now sorted */\n    ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING);\n    assert(ret == 0);\n\n    tsk_table_collection_free(&tables);\n}\n\nint\nsort_edges_raises_exception(tsk_table_sorter_t *sorter, tsk_size_t start)\n{\n    throw std::exception();\n    return 0;\n}\n\nint\nsort_edges_raises_non_exception(tsk_table_sorter_t *sorter, tsk_size_t start)\n{\n    throw 42;\n    return 0;\n}\n\nint\nsafe_sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)\n{\n    int ret = 0;\n    if (sorter->user_data == NULL) {\n        try {\n            ret = sort_edges_raises_exception(sorter, start);\n        } catch (...) {\n            ret = -12345;\n        }\n    } else {\n        try {\n            ret = sort_edges_raises_non_exception(sorter, start);\n        } catch (...) {\n            ret = -12346;\n        }\n    }\n    return ret;\n}\n\nvoid\ntest_edge_sorting_errors()\n{\n    /* Some inexplicable error happened here on 32 bit Windows where the\n     * exceptions were not being caught as expected. This seems much\n     * more likely to be a platform quirk that a real bug in our code,\n     * so just disabling the test there.\n     *\n     * https://github.com/tskit-dev/tskit/issues/1790\n     * https://github.com/tskit-dev/tskit/pull/1791\n     */\n#if !defined(_WIN32)\n    std::cout << \"test_edge_sorting_errors\" << endl;\n    tsk_table_collection_t tables;\n    tsk_table_sorter_t sorter;\n    tsk_id_t ret = tsk_table_collection_init(&tables, 0);\n\n    assert(ret == 0);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_table_sorter_init(&sorter, &tables, 0);\n    assert(ret == 0);\n    sorter.sort_edges = safe_sort_edges;\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    assert(ret == -12345);\n\n    /* Use the user_data as a way to communicate with the sorter\n     * function. Here, we want to try out two different types\n     * of exception that get thrown. */\n    sorter.user_data = &tables;\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    assert(ret == -12346);\n\n    tsk_table_sorter_free(&sorter);\n    tsk_table_collection_free(&tables);\n#endif\n}\n\nint\nmain()\n{\n    test_kas_strerror();\n    test_strerror();\n    test_load_error();\n    test_table_basics();\n    test_edge_sorting();\n    test_edge_sorting_errors();\n    return 0;\n}\n"
  },
  {
    "path": "c/tests/test_stats.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <math.h>\n#include <tskit/stats.h>\n\n#include <unistd.h>\n#include <stdlib.h>\n#include <float.h>\n\nstatic bool\nmulti_mutations_exist(tsk_treeseq_t *ts, tsk_id_t start, tsk_id_t end)\n{\n    int ret;\n    tsk_id_t j;\n    tsk_site_t site;\n\n    for (j = start; j < TSK_MIN((tsk_id_t) tsk_treeseq_get_num_sites(ts), end); j++) {\n        ret = tsk_treeseq_get_site(ts, j, &site);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        if (site.mutations_length > 1) {\n            return true;\n        }\n    }\n    return false;\n}\n\nstatic void\nverify_ld(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_size_t num_sites = tsk_treeseq_get_num_sites(ts);\n    tsk_site_t *sites = tsk_malloc(num_sites * sizeof(tsk_site_t));\n    int *num_site_mutations = tsk_malloc(num_sites * sizeof(int));\n    tsk_ld_calc_t ld_calc;\n    double *r2, *r2_prime, x;\n    tsk_id_t j;\n    tsk_size_t num_r2_values;\n    double eps = 1e-6;\n\n    r2 = tsk_calloc(num_sites, sizeof(double));\n    r2_prime = tsk_calloc(num_sites, sizeof(double));\n    CU_ASSERT_FATAL(r2 != NULL);\n    CU_ASSERT_FATAL(r2_prime != NULL);\n    CU_ASSERT_FATAL(sites != NULL);\n    CU_ASSERT_FATAL(num_site_mutations != NULL);\n\n    ret = tsk_ld_calc_init(&ld_calc, ts);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_ld_calc_print_state(&ld_calc, _devnull);\n\n    for (j = 0; j < (tsk_id_t) num_sites; j++) {\n        ret = tsk_treeseq_get_site(ts, j, sites + j);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        num_site_mutations[j] = (int) sites[j].mutations_length;\n        ret = tsk_ld_calc_get_r2(&ld_calc, j, j, &x);\n        if (num_site_mutations[j] <= 1) {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_DOUBLE_EQUAL_FATAL(x, 1.0, eps);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        }\n    }\n\n    if (num_sites > 0) {\n        /* Some checks in the forward direction */\n        ret = tsk_ld_calc_get_r2_array(\n            &ld_calc, 0, TSK_DIR_FORWARD, num_sites, DBL_MAX, r2, &num_r2_values);\n        if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);\n        }\n        tsk_ld_calc_print_state(&ld_calc, _devnull);\n\n        ret = tsk_ld_calc_get_r2_array(&ld_calc, (tsk_id_t) num_sites - 2,\n            TSK_DIR_FORWARD, num_sites, DBL_MAX, r2_prime, &num_r2_values);\n        if (multi_mutations_exist(ts, (tsk_id_t) num_sites - 2, (tsk_id_t) num_sites)) {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);\n        }\n        tsk_ld_calc_print_state(&ld_calc, _devnull);\n\n        ret = tsk_ld_calc_get_r2_array(\n            &ld_calc, 0, TSK_DIR_FORWARD, num_sites, DBL_MAX, r2_prime, &num_r2_values);\n        if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);\n            for (j = 0; j < (tsk_id_t) num_r2_values; j++) {\n                CU_ASSERT_EQUAL_FATAL(r2[j], r2_prime[j]);\n                ret = tsk_ld_calc_get_r2(&ld_calc, 0, j + 1, &x);\n                CU_ASSERT_EQUAL_FATAL(ret, 0);\n                CU_ASSERT_DOUBLE_EQUAL_FATAL(r2[j], x, eps);\n            }\n        }\n\n        /* Some checks in the reverse direction */\n        ret = tsk_ld_calc_get_r2_array(&ld_calc, (tsk_id_t) num_sites - 1,\n            TSK_DIR_REVERSE, num_sites, DBL_MAX, r2, &num_r2_values);\n        if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);\n        }\n        tsk_ld_calc_print_state(&ld_calc, _devnull);\n\n        ret = tsk_ld_calc_get_r2_array(\n            &ld_calc, 1, TSK_DIR_REVERSE, num_sites, DBL_MAX, r2_prime, &num_r2_values);\n        if (multi_mutations_exist(ts, 0, 2)) {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);\n        }\n\n        ret = tsk_ld_calc_get_r2_array(&ld_calc, (tsk_id_t) num_sites - 1,\n            TSK_DIR_REVERSE, num_sites, DBL_MAX, r2_prime, &num_r2_values);\n        if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n        } else {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);\n            tsk_ld_calc_print_state(&ld_calc, _devnull);\n\n            for (j = 0; j < (tsk_id_t) num_r2_values; j++) {\n                CU_ASSERT_EQUAL_FATAL(r2[j], r2_prime[j]);\n                ret = tsk_ld_calc_get_r2(&ld_calc, (tsk_id_t) num_sites - 1,\n                    (tsk_id_t) num_sites - j - 2, &x);\n                CU_ASSERT_EQUAL_FATAL(ret, 0);\n                CU_ASSERT_DOUBLE_EQUAL_FATAL(r2[j], x, eps);\n            }\n        }\n\n        /* Check some error conditions */\n        ret = tsk_ld_calc_get_r2_array(\n            &ld_calc, 0, 0, num_sites, DBL_MAX, r2, &num_r2_values);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    }\n\n    /* Check some error conditions */\n    for (j = (tsk_id_t) num_sites; j < (tsk_id_t) num_sites + 2; j++) {\n        ret = tsk_ld_calc_get_r2_array(\n            &ld_calc, j, TSK_DIR_FORWARD, num_sites, DBL_MAX, r2, &num_r2_values);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n        ret = tsk_ld_calc_get_r2(&ld_calc, j, 0, r2);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n        ret = tsk_ld_calc_get_r2(&ld_calc, 0, j, r2);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    }\n\n    tsk_ld_calc_free(&ld_calc);\n    free(r2);\n    free(r2_prime);\n    free(sites);\n    free(num_site_mutations);\n}\n\n/* FIXME: this test is weak and should check the return value somehow.\n * We should also have simplest and single tree tests along with separate\n * tests for the error conditions. This should be done as part of the general\n * stats framework.\n */\nstatic void\nverify_genealogical_nearest_neighbours(tsk_treeseq_t *ts)\n{\n    int ret;\n    const tsk_id_t *samples;\n    const tsk_id_t *sample_sets[2];\n    tsk_size_t sample_set_size[2];\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *A = tsk_malloc(2 * num_samples * sizeof(double));\n    CU_ASSERT_FATAL(A != NULL);\n\n    samples = tsk_treeseq_get_samples(ts);\n\n    sample_sets[0] = samples;\n    sample_set_size[0] = num_samples / 2;\n    sample_sets[1] = samples + sample_set_size[0];\n    sample_set_size[1] = num_samples - sample_set_size[0];\n\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        ts, samples, num_samples, sample_sets, sample_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    sample_sets[0] = samples;\n    sample_set_size[0] = 1;\n    sample_sets[1] = samples + 1;\n    sample_set_size[1] = 1;\n\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        ts, samples, num_samples, sample_sets, sample_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    free(A);\n}\n\n/* FIXME: this test is weak and should check the return value somehow.\n * We should also have simplest and single tree tests along with separate\n * tests for the error conditions. This should be done as part of the general\n * stats framework.\n */\nstatic void\nverify_mean_descendants(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_id_t *samples;\n    const tsk_id_t *sample_sets[2];\n    tsk_size_t sample_set_size[2];\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *C = tsk_malloc(2 * tsk_treeseq_get_num_nodes(ts) * sizeof(double));\n    CU_ASSERT_FATAL(C != NULL);\n\n    samples = tsk_malloc(num_samples * sizeof(*samples));\n    tsk_memcpy(samples, tsk_treeseq_get_samples(ts), num_samples * sizeof(*samples));\n\n    sample_sets[0] = samples;\n    sample_set_size[0] = num_samples / 2;\n    sample_sets[1] = samples + sample_set_size[0];\n    sample_set_size[1] = num_samples - sample_set_size[0];\n\n    ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 2, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Check some error conditions */\n    ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 0, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    samples[0] = -1;\n    ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 2, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(ts) + 1;\n    ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 2, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    free(samples);\n    free(C);\n}\n\n/* Check the divergence matrix by running against the stats API equivalent\n * code.\n */\nstatic void\nverify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)\n{\n    int ret;\n    const tsk_size_t n = tsk_treeseq_get_num_samples(ts);\n    const tsk_id_t *samples = tsk_treeseq_get_samples(ts);\n    tsk_size_t sample_set_sizes[n];\n    tsk_id_t index_tuples[2 * n * n];\n    double D1[n * n], D2[n * n];\n    tsk_size_t i, j, k;\n\n    for (j = 0; j < n; j++) {\n        sample_set_sizes[j] = 1;\n        for (k = 0; k < n; k++) {\n            index_tuples[2 * (j * n + k)] = (tsk_id_t) j;\n            index_tuples[2 * (j * n + k) + 1] = (tsk_id_t) k;\n        }\n    }\n    ret = tsk_treeseq_divergence(\n        ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, options, D1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        ts, n, sample_set_sizes, samples, 0, NULL, options, D2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < n; j++) {\n        for (k = 0; k < n; k++) {\n            i = j * n + k;\n            /* printf(\"%d\\t%d\\t%f\\t%f\\n\", (int) j, (int) k, D1[i], D2[i]); */\n            if (j == k) {\n                CU_ASSERT_EQUAL(D2[i], 0);\n            } else {\n                CU_ASSERT_DOUBLE_EQUAL(D1[i], D2[i], 1E-6);\n            }\n        }\n    }\n}\n\n/* Check coalescence counts */\nstatic void\nverify_pair_coalescence_counts(tsk_treeseq_t *ts, tsk_flags_t options)\n{\n    int ret;\n    const tsk_size_t n = tsk_treeseq_get_num_samples(ts);\n    const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);\n    const tsk_size_t T = tsk_treeseq_get_num_trees(ts);\n    const tsk_id_t *samples = tsk_treeseq_get_samples(ts);\n    const double *breakpoints = tsk_treeseq_get_breakpoints(ts);\n    const tsk_size_t P = 2;\n    const tsk_size_t I = P * (P + 1) / 2;\n    const tsk_size_t B = 8;\n    tsk_id_t sample_sets[n];\n    tsk_size_t sample_set_sizes[P];\n    tsk_id_t index_tuples[2 * I];\n    tsk_id_t node_bin_map[N];\n    tsk_size_t dim = T * N * I;\n    double C[dim];\n    double C_B[T * B * I];\n    double C_Nh[T * (N / 2) * I];\n    tsk_size_t i, j, k;\n\n    for (i = 0; i < n; i++) {\n        sample_sets[i] = samples[i];\n    }\n\n    for (i = 0; i < P; i++) {\n        sample_set_sizes[i] = 0;\n    }\n    for (j = 0; j < n; j++) {\n        i = j / ((n + P - 1) / P);\n        sample_set_sizes[i]++;\n    }\n\n    for (j = 0, i = 0; j < P; j++) {\n        for (k = j; k < P; k++) {\n            index_tuples[i++] = (tsk_id_t) j;\n            index_tuples[i++] = (tsk_id_t) k;\n        }\n    }\n\n    /* test various bin assignments */\n    for (i = 0; i < N; i++) {\n        node_bin_map[i] = ((tsk_id_t) (i % B));\n    }\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, options, C_B);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (i = 0; i < N; i++) {\n        node_bin_map[i] = i < N / 2 ? ((tsk_id_t) i) : TSK_NULL;\n    }\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N / 2, node_bin_map, options, C_Nh);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (i = 0; i < N; i++) {\n        node_bin_map[i] = (tsk_id_t) i;\n    }\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* cover errors */\n    double bad_breakpoints[2] = { breakpoints[1], 0.0 };\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, 1, bad_breakpoints, N, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    index_tuples[0] = (tsk_id_t) P;\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n    index_tuples[0] = 0;\n\n    tsk_size_t tmp = sample_set_sizes[0];\n    sample_set_sizes[0] = 0;\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);\n    sample_set_sizes[0] = tmp;\n\n    sample_sets[1] = 0;\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    sample_sets[1] = 1;\n\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N - 1, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);\n\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, 0, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);\n\n    node_bin_map[0] = -2;\n    ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, N, node_bin_map, options, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP);\n    node_bin_map[0] = 0;\n}\n\n/* Check coalescence quantiles */\nstatic void\nverify_pair_coalescence_quantiles(tsk_treeseq_t *ts)\n{\n    int ret;\n    const tsk_size_t n = tsk_treeseq_get_num_samples(ts);\n    const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);\n    const tsk_size_t T = tsk_treeseq_get_num_trees(ts);\n    const tsk_id_t *samples = tsk_treeseq_get_samples(ts);\n    const double *breakpoints = tsk_treeseq_get_breakpoints(ts);\n    const double *nodes_time = ts->tables->nodes.time;\n    const double max_time = ts->max_time;\n    const tsk_size_t P = 2;\n    const tsk_size_t Q = 5;\n    const tsk_size_t B = 4;\n    const tsk_size_t I = P * (P + 1) / 2;\n    double quantiles[] = { 0.0, 0.25, 0.5, 0.75, 1.0 };\n    double epochs[] = { 0.0, max_time / 4, max_time / 2, max_time, INFINITY };\n    tsk_id_t sample_sets[n];\n    tsk_size_t sample_set_sizes[P];\n    tsk_id_t index_tuples[2 * I];\n    tsk_id_t node_bin_map[N];\n    tsk_id_t node_bin_map_empty[N];\n    tsk_id_t node_bin_map_shuff[N];\n    tsk_size_t dim = T * Q * I;\n    double C[dim];\n    tsk_size_t i, j, k;\n\n    for (i = 0; i < N; i++) {\n        node_bin_map_empty[i] = TSK_NULL;\n        node_bin_map_shuff[i] = (tsk_id_t) (i % B);\n        for (j = 0; j < B; j++) {\n            if (nodes_time[i] >= epochs[j] && nodes_time[i] < epochs[j + 1]) {\n                node_bin_map[i] = (tsk_id_t) j;\n            }\n        }\n    }\n\n    for (i = 0; i < n; i++) {\n        sample_sets[i] = samples[i];\n    }\n\n    for (i = 0; i < P; i++) {\n        sample_set_sizes[i] = 0;\n    }\n    for (j = 0; j < n; j++) {\n        i = j / (n / P);\n        sample_set_sizes[i]++;\n    }\n\n    for (j = 0, i = 0; j < P; j++) {\n        for (k = j; k < P; k++) {\n            index_tuples[i++] = (tsk_id_t) j;\n            index_tuples[i++] = (tsk_id_t) k;\n        }\n    }\n\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    quantiles[Q - 1] = 0.9;\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    quantiles[Q - 1] = 1.0;\n\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map_empty, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* cover errors */\n    quantiles[0] = -1.0;\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_QUANTILES);\n    quantiles[0] = 0.0;\n\n    quantiles[Q - 1] = 2.0;\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_QUANTILES);\n    quantiles[Q - 1] = 1.0;\n\n    quantiles[1] = 0.0;\n    quantiles[0] = 0.25;\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_QUANTILES);\n    quantiles[0] = 0.0;\n    quantiles[1] = 0.25;\n\n    ts->tables->nodes.time[N - 1] = -1.0;\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map_shuff, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_TIMES);\n    ts->tables->nodes.time[N - 1] = max_time;\n\n    node_bin_map[0] = (tsk_id_t) B;\n    ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);\n    node_bin_map[0] = 0;\n}\n\n/* Check coalescence rates */\nstatic void\nverify_pair_coalescence_rates(tsk_treeseq_t *ts)\n{\n    int ret;\n    const tsk_size_t n = tsk_treeseq_get_num_samples(ts);\n    const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);\n    const tsk_size_t T = tsk_treeseq_get_num_trees(ts);\n    const tsk_id_t *samples = tsk_treeseq_get_samples(ts);\n    const double *breakpoints = tsk_treeseq_get_breakpoints(ts);\n    const double *nodes_time = ts->tables->nodes.time;\n    const double max_time = ts->max_time;\n    const tsk_size_t P = 2;\n    const tsk_size_t B = 5;\n    const tsk_size_t I = P * (P + 1) / 2;\n    double epochs[]\n        = { 0.0, max_time / 4, max_time / 2, max_time, max_time * 2, INFINITY };\n    tsk_id_t sample_sets[n];\n    tsk_size_t sample_set_sizes[P];\n    tsk_id_t index_tuples[2 * I];\n    tsk_id_t node_bin_map[N];\n    tsk_id_t empty_node_bin_map[N];\n    tsk_size_t dim = T * B * I;\n    double C[dim];\n    tsk_size_t i, j, k;\n\n    for (i = 0; i < N; i++) {\n        node_bin_map[i] = TSK_NULL;\n        for (j = 0; j < B; j++) {\n            if (nodes_time[i] >= epochs[j] && nodes_time[i] < epochs[j + 1]) {\n                node_bin_map[i] = (tsk_id_t) j;\n            }\n        }\n        empty_node_bin_map[i] = TSK_NULL;\n    }\n\n    for (i = 0; i < n; i++) {\n        sample_sets[i] = samples[i];\n    }\n\n    for (i = 0; i < P; i++) {\n        sample_set_sizes[i] = 0;\n    }\n    for (j = 0; j < n; j++) {\n        i = j / (n / P);\n        sample_set_sizes[i]++;\n    }\n\n    for (j = 0, i = 0; j < P; j++) {\n        for (k = j; k < P; k++) {\n            index_tuples[i++] = (tsk_id_t) j;\n            index_tuples[i++] = (tsk_id_t) k;\n        }\n    }\n\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    node_bin_map[0] = TSK_NULL;\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    node_bin_map[0] = 0;\n\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, empty_node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* cover errors */\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, 0, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_DIM);\n\n    epochs[0] = epochs[1] / 2;\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_PAIR_TIMES);\n    epochs[0] = 0.0;\n\n    epochs[2] = epochs[1];\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);\n    epochs[2] = max_time / 2;\n\n    epochs[B] = DBL_MAX;\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_END);\n    epochs[B] = INFINITY;\n\n    node_bin_map[0] = (tsk_id_t) B;\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);\n    node_bin_map[0] = 0;\n\n    node_bin_map[0] = (tsk_id_t) (B - 1);\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_WINDOW);\n    node_bin_map[0] = 0;\n\n    node_bin_map[N - 1] = 0;\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_WINDOW);\n    node_bin_map[N - 1] = 3;\n\n    tsk_size_t tmp = sample_set_sizes[0];\n    sample_set_sizes[0] = 0;\n    ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,\n        index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);\n    sample_set_sizes[0] = tmp;\n}\n\ntypedef struct {\n    int call_count;\n    int error_on;\n    int error_code;\n} general_stat_error_params_t;\n\nstatic int\ngeneral_stat_error(tsk_size_t TSK_UNUSED(K), const double *TSK_UNUSED(X), tsk_size_t M,\n    double *Y, void *params)\n{\n    int ret = 0;\n    CU_ASSERT_FATAL(M == 1);\n    Y[0] = 0;\n    general_stat_error_params_t *the_params = (general_stat_error_params_t *) params;\n    if (the_params->call_count == the_params->error_on) {\n        ret = the_params->error_code;\n    }\n    the_params->call_count++;\n    return ret;\n}\n\nstatic void\nverify_window_errors(tsk_treeseq_t *ts, tsk_flags_t mode)\n{\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *W = tsk_calloc(num_samples, sizeof(double));\n    /* node mode requires this much space at least */\n    double *sigma = tsk_calloc(tsk_treeseq_get_num_nodes(ts), sizeof(double));\n    double windows[] = { 0, 0, 0 };\n    tsk_flags_t options = mode;\n\n    /* Window errors */\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 0, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);\n\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = -1;\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[1] = -1;\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 1, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 10;\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 0;\n    windows[2] = tsk_treeseq_get_sequence_length(ts) + 1;\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 0;\n    windows[1] = -1;\n    windows[2] = tsk_treeseq_get_sequence_length(ts);\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    free(W);\n    free(sigma);\n}\n\nstatic void\nverify_summary_func_errors(tsk_treeseq_t *ts, tsk_flags_t mode)\n{\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *W = tsk_calloc(num_samples, sizeof(double));\n    /* We need this much space for NODE mode */\n    double *sigma = tsk_calloc(tsk_treeseq_get_num_nodes(ts), sizeof(double));\n    int j;\n    general_stat_error_params_t params;\n    CU_ASSERT_FATAL(W != NULL);\n\n    /* Errors in the summary function */\n    j = 1;\n    while (true) {\n        params.call_count = 0;\n        params.error_on = j;\n        params.error_code = -j;\n        ret = tsk_treeseq_general_stat(ts, 1, W, 1, general_stat_error, &params, 0, NULL,\n            TSK_STAT_POLARISED | mode, sigma);\n        if (ret == 0) {\n            break;\n        }\n        CU_ASSERT_EQUAL_FATAL(ret, params.error_code);\n        j++;\n    }\n    CU_ASSERT_FATAL(j > 1);\n\n    j = 1;\n    while (true) {\n        params.call_count = 0;\n        params.error_on = j;\n        params.error_code = -j;\n        ret = tsk_treeseq_general_stat(\n            ts, 1, W, 1, general_stat_error, &params, 0, NULL, mode, sigma);\n        if (ret == 0) {\n            break;\n        }\n        CU_ASSERT_EQUAL_FATAL(ret, params.error_code);\n        j++;\n    }\n    CU_ASSERT_FATAL(j > 1);\n\n    free(W);\n    free(sigma);\n}\n\nstatic void\nverify_branch_general_stat_errors(tsk_treeseq_t *ts)\n{\n    verify_summary_func_errors(ts, TSK_STAT_BRANCH);\n    verify_window_errors(ts, TSK_STAT_BRANCH);\n}\n\nstatic void\nverify_site_general_stat_errors(tsk_treeseq_t *ts)\n{\n    verify_window_errors(ts, TSK_STAT_SITE);\n    verify_summary_func_errors(ts, TSK_STAT_SITE);\n}\n\nstatic void\nverify_node_general_stat_errors(tsk_treeseq_t *ts)\n{\n    verify_window_errors(ts, TSK_STAT_NODE);\n    verify_summary_func_errors(ts, TSK_STAT_NODE);\n}\n\nstatic void\nverify_one_way_weighted_func_errors(tsk_treeseq_t *ts, one_way_weighted_method *method)\n{\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *weights = tsk_malloc(num_samples * sizeof(double));\n    double bad_windows[] = { 0, -1 };\n    double result;\n    tsk_size_t j;\n\n    for (j = 0; j < num_samples; j++) {\n        weights[j] = 1.0;\n    }\n\n    ret = method(ts, 0, weights, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_WEIGHTS);\n\n    ret = method(ts, 1, weights, 1, bad_windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    free(weights);\n}\n\nstatic void\nverify_one_way_weighted_covariate_func_errors(\n    tsk_treeseq_t *ts, one_way_covariates_method *method)\n{\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *weights = tsk_malloc(num_samples * sizeof(double));\n    double *covariates = NULL;\n    double bad_windows[] = { 0, -1 };\n    double result;\n    tsk_size_t j;\n\n    for (j = 0; j < num_samples; j++) {\n        weights[j] = 1.0;\n    }\n\n    ret = method(ts, 0, weights, 0, covariates, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_WEIGHTS);\n\n    ret = method(ts, 1, weights, 0, covariates, 1, bad_windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    free(weights);\n}\n\nstatic void\nverify_one_way_stat_func_errors(tsk_treeseq_t *ts, one_way_sample_stat_method *method)\n{\n    int ret;\n    tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(ts);\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes = 4;\n    double windows[] = { 0, 0, 0 };\n    double result;\n\n    ret = method(ts, 0, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    samples[0] = TSK_NULL;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = -10;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = num_nodes;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = num_nodes + 1;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    samples[0] = num_nodes - 1;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);\n\n    samples[0] = 1;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    samples[0] = 0;\n    sample_set_sizes = 0;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);\n\n    sample_set_sizes = 4;\n    /* Window errors */\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);\n\n    ret = method(ts, 1, &sample_set_sizes, samples, 2, windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n}\n\n// Temporary definition for time_windows in tsk_treeseq_allele_frequency_spectrum\ntypedef int one_way_sample_stat_method_tw(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,\n    tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,\n    double *result);\n\n// Temporary duplicate for time-windows-having methods\nstatic void\nverify_one_way_stat_func_errors_tw(\n    tsk_treeseq_t *ts, one_way_sample_stat_method_tw *method)\n{\n    int ret;\n    tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(ts);\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes = 4;\n    double windows[] = { 0, 0, 0 };\n    double time_windows[] = { -1, 0.5, INFINITY };\n    double result;\n\n    ret = method(ts, 0, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    samples[0] = TSK_NULL;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = -10;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = num_nodes;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = num_nodes + 1;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    samples[0] = num_nodes - 1;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);\n\n    samples[0] = 1;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    samples[0] = 0;\n    sample_set_sizes = 0;\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);\n\n    sample_set_sizes = 4;\n    /* Window errors */\n    ret = method(ts, 1, &sample_set_sizes, samples, 0, windows, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);\n\n    ret = method(ts, 1, &sample_set_sizes, samples, 2, windows, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    /* Time window errors */\n    ret = method(\n        ts, 1, &sample_set_sizes, samples, 0, NULL, 0, time_windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_DIM);\n\n    ret = method(\n        ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);\n\n    time_windows[0] = 0.1;\n    ret = method(\n        ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);\n\n    time_windows[0] = 0;\n    time_windows[1] = 0;\n    ret = method(\n        ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);\n}\n\nstatic void\nverify_two_way_stat_func_errors(\n    tsk_treeseq_t *ts, general_sample_stat_method *method, tsk_flags_t options)\n{\n    int ret;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t set_indexes[] = { 0, 1 };\n    double result;\n\n    ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        options | TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    ret = method(ts, 2, sample_set_sizes, samples, 0, set_indexes, 0, NULL,\n        options | TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);\n\n    set_indexes[0] = -1;\n    ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        options | TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n    set_indexes[0] = 0;\n    set_indexes[1] = 2;\n    ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        options | TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n}\n\nstatic void\nverify_two_way_weighted_stat_func_errors(\n    tsk_treeseq_t *ts, two_way_weighted_method *method, tsk_flags_t options)\n{\n    int ret;\n    tsk_id_t indexes[] = { 0, 0, 0, 1 };\n    double bad_windows[] = { -1, -1 };\n    double weights[10];\n    double result[10];\n\n    memset(weights, 0, sizeof(weights));\n\n    ret = method(ts, 2, weights, 2, indexes, 0, NULL, result, options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = method(ts, 2, weights, 2, indexes, 0, NULL, result,\n        options | TSK_STAT_SITE | TSK_STAT_NODE);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n\n    ret = method(ts, 0, weights, 2, indexes, 0, NULL, result, options);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_WEIGHTS);\n\n    ret = method(ts, 2, weights, 2, indexes, 1, bad_windows, result, options);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n}\n\nstatic void\nverify_three_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method *method)\n{\n    int ret;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 1, 1, 2 };\n    tsk_id_t set_indexes[] = { 0, 1, 2 };\n    double result;\n\n    ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    ret = method(ts, 3, sample_set_sizes, samples, 0, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);\n\n    set_indexes[0] = -1;\n    ret = method(ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n    set_indexes[0] = 0;\n    set_indexes[1] = 3;\n    ret = method(ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n}\n\nstatic void\nverify_four_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method *method)\n{\n    int ret;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 1, 1, 1, 1 };\n    tsk_id_t set_indexes[] = { 0, 1, 2, 3 };\n    double result;\n\n    ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    ret = method(ts, 4, sample_set_sizes, samples, 0, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);\n\n    set_indexes[0] = -1;\n    ret = method(ts, 4, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n    set_indexes[0] = 0;\n    set_indexes[1] = 4;\n    ret = method(ts, 4, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n}\n\nstatic int\ngeneral_stat_identity(\n    tsk_size_t K, const double *restrict X, tsk_size_t M, double *Y, void *params)\n{\n    tsk_size_t k;\n    CU_ASSERT_FATAL(M == K);\n    CU_ASSERT_FATAL(params == NULL);\n\n    for (k = 0; k < K; k++) {\n        Y[k] = X[k];\n    }\n    return 0;\n}\n\nstatic void\nverify_branch_general_stat_identity(tsk_treeseq_t *ts)\n{\n    CU_ASSERT_FATAL(ts != NULL);\n\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *W = tsk_malloc(num_samples * sizeof(double));\n    tsk_id_t *nodes = tsk_malloc(tsk_treeseq_get_num_nodes(ts) * sizeof(*nodes));\n    tsk_id_t u;\n    tsk_size_t num_nodes;\n    double s, branch_length;\n    double *sigma = tsk_malloc(tsk_treeseq_get_num_trees(ts) * sizeof(*sigma));\n    tsk_tree_t tree;\n    tsk_size_t j;\n    CU_ASSERT_FATAL(W != NULL);\n    CU_ASSERT_FATAL(nodes != NULL);\n\n    for (j = 0; j < num_samples; j++) {\n        W[j] = 1;\n    }\n\n    ret = tsk_treeseq_general_stat(ts, 1, W, 1, general_stat_identity, NULL,\n        tsk_treeseq_get_num_trees(ts), tsk_treeseq_get_breakpoints(ts),\n        TSK_STAT_BRANCH | TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_tree_init(&tree, ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {\n        ret = tsk_tree_preorder(&tree, nodes, &num_nodes);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        s = 0;\n        for (j = 0; j < num_nodes; j++) {\n            u = nodes[j];\n            ret = tsk_tree_get_branch_length(&tree, u, &branch_length);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            s += branch_length * (double) tree.num_samples[u];\n        }\n        CU_ASSERT_DOUBLE_EQUAL_FATAL(sigma[tree.index], s, 1e-6);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    free(nodes);\n    tsk_tree_free(&tree);\n    free(W);\n    free(sigma);\n}\n\nstatic int\ngeneral_stat_sum(\n    tsk_size_t K, const double *restrict X, tsk_size_t M, double *Y, void *params)\n{\n    tsk_size_t k, m;\n    double s = 0;\n    CU_ASSERT_FATAL(params == NULL);\n\n    s = 0;\n    for (k = 0; k < K; k++) {\n        s += X[k];\n    }\n    for (m = 0; m < M; m++) {\n        Y[m] = s;\n    }\n    return 0;\n}\n\nstatic void\nverify_general_stat_dims(\n    tsk_treeseq_t *ts, tsk_size_t K, tsk_size_t M, tsk_flags_t options)\n{\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *W = tsk_malloc(K * num_samples * sizeof(double));\n    /* We need this much space for NODE mode; no harm for other modes. */\n    double *sigma = tsk_calloc(tsk_treeseq_get_num_nodes(ts) * M, sizeof(double));\n    tsk_size_t j, k;\n    CU_ASSERT_FATAL(W != NULL);\n\n    for (j = 0; j < num_samples; j++) {\n        for (k = 0; k < K; k++) {\n            W[j * K + k] = 1;\n        }\n    }\n    ret = tsk_treeseq_general_stat(\n        ts, K, W, M, general_stat_sum, NULL, 0, NULL, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    free(W);\n    free(sigma);\n}\n\nstatic void\nverify_general_stat_windows(\n    tsk_treeseq_t *ts, tsk_size_t num_windows, tsk_flags_t options)\n{\n    int ret;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *W = tsk_malloc(num_samples * sizeof(double));\n    tsk_size_t M = 5;\n    /* We need this much space for NODE mode; no harm for other modes. */\n    double *sigma\n        = tsk_calloc(M * tsk_treeseq_get_num_nodes(ts) * num_windows, sizeof(double));\n    double *windows = tsk_malloc((num_windows + 1) * sizeof(*windows));\n    double L = tsk_treeseq_get_sequence_length(ts);\n    tsk_size_t j;\n    CU_ASSERT_FATAL(W != NULL);\n    CU_ASSERT_FATAL(sigma != NULL);\n    CU_ASSERT_FATAL(windows != NULL);\n\n    for (j = 0; j < num_samples; j++) {\n        W[j] = 1;\n    }\n    windows[0] = 0;\n    windows[num_windows] = L;\n    for (j = 1; j < num_windows; j++) {\n        windows[j] = ((double) j) * L / (double) num_windows;\n    }\n    ret = tsk_treeseq_general_stat(\n        ts, 1, W, M, general_stat_sum, NULL, num_windows, windows, options, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    free(W);\n    free(sigma);\n    free(windows);\n}\n\nstatic void\nverify_default_general_stat(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_size_t K = 2;\n    tsk_size_t M = 1;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    double *W = tsk_malloc(K * num_samples * sizeof(double));\n    double sigma1, sigma2;\n    tsk_size_t j, k;\n    CU_ASSERT_FATAL(W != NULL);\n\n    for (j = 0; j < num_samples; j++) {\n        for (k = 0; k < K; k++) {\n            W[j * K + k] = 1;\n        }\n    }\n    ret = tsk_treeseq_general_stat(\n        ts, K, W, M, general_stat_sum, NULL, 0, NULL, TSK_STAT_SITE, &sigma1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_general_stat(\n        ts, K, W, M, general_stat_sum, NULL, 0, NULL, 0, &sigma2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(sigma1, sigma2);\n    free(W);\n}\n\nstatic void\nverify_general_stat(tsk_treeseq_t *ts, tsk_flags_t mode)\n{\n    CU_ASSERT_FATAL(ts != NULL);\n    verify_general_stat_dims(ts, 4, 2, mode);\n    verify_general_stat_dims(ts, 4, 2, mode | TSK_STAT_POLARISED);\n    verify_general_stat_dims(ts, 1, 20, mode);\n    verify_general_stat_dims(ts, 1, 20, mode | TSK_STAT_POLARISED);\n    verify_general_stat_dims(ts, 100, 1, mode);\n    verify_general_stat_dims(ts, 100, 1, mode | TSK_STAT_POLARISED);\n    verify_general_stat_dims(ts, 10, 12, mode);\n    verify_general_stat_dims(ts, 10, 12, mode | TSK_STAT_POLARISED);\n    verify_general_stat_windows(ts, 1, mode);\n    verify_general_stat_windows(ts, 1, mode | TSK_STAT_SPAN_NORMALISE);\n    verify_general_stat_windows(ts, 2, mode);\n    verify_general_stat_windows(ts, 2, mode | TSK_STAT_SPAN_NORMALISE);\n    verify_general_stat_windows(ts, 3, mode);\n    verify_general_stat_windows(ts, 3, mode | TSK_STAT_SPAN_NORMALISE);\n    verify_general_stat_windows(ts, 10, mode);\n    verify_general_stat_windows(ts, 10, mode | TSK_STAT_SPAN_NORMALISE);\n    verify_general_stat_windows(ts, 100, mode);\n    verify_general_stat_windows(ts, 100, mode | TSK_STAT_SPAN_NORMALISE);\n}\n\nstatic void\nverify_afs(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_size_t n = tsk_treeseq_get_num_samples(ts);\n    tsk_size_t sample_set_sizes[2];\n    double time_windows[] = { 0, 1 };\n    const tsk_id_t *samples = tsk_treeseq_get_samples(ts);\n    double *result = tsk_malloc(n * n * sizeof(*result));\n\n    CU_ASSERT_FATAL(sample_set_sizes != NULL);\n\n    sample_set_sizes[0] = n - 2;\n    sample_set_sizes[1] = 2;\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,\n        NULL, 0, NULL, TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,\n        NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,\n        NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,\n        NULL, 1, time_windows, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    free(result);\n}\n\nstatic void\ntest_general_stat_input_errors(void)\n{\n    tsk_treeseq_t ts;\n    double result;\n    double W;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    /* Bad input dimensions */\n    ret = tsk_treeseq_general_stat(\n        &ts, 0, &W, 1, general_stat_sum, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_STATE_DIMS);\n\n    ret = tsk_treeseq_general_stat(\n        &ts, 1, &W, 0, general_stat_sum, NULL, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_RESULT_DIMS);\n\n    /* Multiple stats*/\n    ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,\n        TSK_STAT_SITE | TSK_STAT_BRANCH, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n    ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,\n        TSK_STAT_SITE | TSK_STAT_NODE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n    ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,\n        TSK_STAT_BRANCH | TSK_STAT_NODE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_empty_ts_ld(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_tree_ex_nodes, \"\", NULL, NULL, NULL, NULL, NULL, 0);\n\n    verify_ld(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_empty_ts_mean_descendants(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_tree_ex_nodes, \"\", NULL, NULL, NULL, NULL, NULL, 0);\n    verify_mean_descendants(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_empty_ts_genealogical_nearest_neighbours(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_tree_ex_nodes, \"\", NULL, NULL, NULL, NULL, NULL, 0);\n    verify_genealogical_nearest_neighbours(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_empty_ts_general_stat(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_tree_ex_nodes, \"\", NULL, NULL, NULL, NULL, NULL, 0);\n    verify_branch_general_stat_identity(&ts);\n    verify_default_general_stat(&ts);\n    verify_general_stat(&ts, TSK_STAT_BRANCH);\n    verify_general_stat(&ts, TSK_STAT_SITE);\n    verify_general_stat(&ts, TSK_STAT_NODE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_empty_ts_afs(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_tree_ex_nodes, \"\", NULL, NULL, NULL, NULL, NULL, 0);\n    verify_afs(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_ld(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_ld(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_mean_descendants(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_mean_descendants(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_genealogical_nearest_neighbours(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_genealogical_nearest_neighbours(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_general_stat(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_branch_general_stat_identity(&ts);\n    verify_default_general_stat(&ts);\n    verify_general_stat(&ts, TSK_STAT_BRANCH);\n    verify_general_stat(&ts, TSK_STAT_SITE);\n    verify_general_stat(&ts, TSK_STAT_NODE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_general_stat_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_branch_general_stat_errors(&ts);\n    verify_site_general_stat_errors(&ts);\n    verify_node_general_stat_errors(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_divergence_matrix(void)\n{\n    tsk_treeseq_t ts;\n    int ret;\n    double result[16];\n    double D_branch[16] = { 0, 2, 6, 6, 2, 0, 6, 6, 6, 6, 0, 4, 6, 6, 4, 0 };\n    double D_site[16] = { 0, 1, 1, 0, 1, 0, 2, 1, 1, 2, 0, 1, 0, 1, 1, 0 };\n\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D_branch);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D_site);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    sample_set_sizes[0] = 3;\n    sample_set_sizes[1] = 1;\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* assert_arrays_almost_equal(4, result, D_site); */\n\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH);\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_divergence_matrix_internal_samples(void)\n{\n    tsk_treeseq_t ts;\n    int ret;\n    double *result = malloc(16 * sizeof(double));\n    double D[16] = { 0, 2, 4, 3, 2, 0, 4, 3, 4, 4, 0, 1, 3, 3, 1, 0 };\n\n    const char *nodes = \"1  0   -1   -1\\n\" /* 2.00┊    6    ┊ */\n                        \"1  0   -1   -1\\n\" /*     ┊  ┏━┻━┓  ┊ */\n                        \"1  0   -1   -1\\n\" /* 1.00┊  4   5* ┊ */\n                        \"0  0   -1   -1\\n\" /*     ┊ ┏┻┓ ┏┻┓ ┊ */\n                        \"0  1   -1   -1\\n\" /* 0.00┊ 0 1 2 3 ┊ */\n                        \"1  1   -1   -1\\n\" /*     0 * * *   1 */\n                        \"0  2   -1   -1\\n\";\n    const char *edges = \"0  1   4   0,1\\n\"\n                        \"0  1   5   2,3\\n\"\n                        \"0  1   6   4,5\\n\";\n    /* One mutations per branch so we get the same as the branch length value */\n    const char *sites = \"0.1  A\\n\"\n                        \"0.2  A\\n\"\n                        \"0.3  A\\n\"\n                        \"0.4  A\\n\"\n                        \"0.5  A\\n\"\n                        \"0.6  A\\n\";\n    const char *mutations = \"0  0  T  -1\\n\"\n                            \"1  1  T  -1\\n\"\n                            \"2  2  T  -1\\n\"\n                            \"3  3  T  -1\\n\"\n                            \"4  4  T  -1\\n\"\n                            \"5  5  T  -1\\n\";\n    tsk_id_t samples[] = { 0, 1, 2, 5 };\n    tsk_size_t sizes[] = { 1, 1, 1, 1 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 4, sizes, samples, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 4, sizes, samples, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 4, NULL, samples, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 4, NULL, samples, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D);\n\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH);\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);\n\n    tsk_treeseq_free(&ts);\n    free(result);\n}\n\nstatic void\ntest_single_tree_divergence_matrix_multi_root(void)\n{\n    tsk_treeseq_t ts;\n    int ret;\n    double result[16];\n    double D_branch[16] = { 0, 2, 3, 3, 2, 0, 3, 3, 3, 3, 0, 4, 3, 3, 4, 0 };\n\n    const char *nodes = \"1  0   -1   -1\\n\"\n                        \"1  0   -1   -1\\n\"  /* 2.00┊      5  ┊ */\n                        \"1  0   -1   -1\\n\"  /* 1.00┊  4      ┊ */\n                        \"1  0   -1   -1\\n\"  /*     ┊ ┏┻┓ ┏┻┓ ┊ */\n                        \"0  1   -1   -1\\n\"  /* 0.00┊ 0 1 2 3 ┊ */\n                        \"0  2   -1   -1\\n\"; /*     0 * * * * 1 */\n    const char *edges = \"0  1   4   0,1\\n\"\n                        \"0  1   5   2,3\\n\";\n    /* Two mutations per branch */\n    const char *sites = \"0.1  A\\n\"\n                        \"0.2  A\\n\"\n                        \"0.3  A\\n\"\n                        \"0.4  A\\n\";\n    const char *mutations = \"0  0  B  -1\\n\"\n                            \"0  0  C  0\\n\"\n                            \"1  1  B  -1\\n\"\n                            \"1  1  C  2\\n\"\n                            \"2  2  B  -1\\n\"\n                            \"2  2  C  4\\n\"\n                            \"2  2  D  5\\n\"\n                            \"2  2  E  6\\n\"\n                            \"3  3  B  -1\\n\"\n                            \"3  3  C  8\\n\"\n                            \"3  3  D  9\\n\"\n                            \"3  3  E  10\\n\";\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(16, result, D_branch);\n\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH);\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_ld(void)\n{\n    tsk_treeseq_t ts;\n    tsk_ld_calc_t ld_calc;\n    double r2[3];\n    tsk_size_t num_r2_values;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_ld(&ts);\n\n    /* Check early exit corner cases */\n    ret = tsk_ld_calc_init(&ld_calc, &ts);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ld_calc_get_r2_array(\n        &ld_calc, 0, TSK_DIR_FORWARD, 1, DBL_MAX, r2, &num_r2_values);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);\n\n    ret = tsk_ld_calc_get_r2_array(\n        &ld_calc, 2, TSK_DIR_REVERSE, 1, DBL_MAX, r2, &num_r2_values);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);\n\n    tsk_ld_calc_free(&ld_calc);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_mean_descendants(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_mean_descendants(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_genealogical_nearest_neighbours(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_genealogical_nearest_neighbours(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_general_stat(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_branch_general_stat_identity(&ts);\n    verify_default_general_stat(&ts);\n    verify_general_stat(&ts, TSK_STAT_BRANCH);\n    verify_general_stat(&ts, TSK_STAT_SITE);\n    verify_general_stat(&ts, TSK_STAT_NODE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_general_stat_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_branch_general_stat_errors(&ts);\n    verify_site_general_stat_errors(&ts);\n    verify_node_general_stat_errors(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_diversity_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_one_way_stat_func_errors(&ts, tsk_treeseq_diversity);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_diversity(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes = 4;\n    double pi;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_diversity(\n        &ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &pi);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(pi, 1.5, 1e-6);\n\n    /* A sample set size of 1 leads to NaN */\n    sample_set_sizes = 1;\n    ret = tsk_treeseq_diversity(\n        &ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &pi);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(pi));\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_trait_covariance_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_one_way_weighted_func_errors(&ts, tsk_treeseq_trait_covariance);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_trait_covariance(void)\n{\n    tsk_treeseq_t ts;\n    double result;\n    double *weights;\n    tsk_size_t j;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    weights = tsk_malloc(4 * sizeof(double));\n    weights[0] = weights[1] = 0.0;\n    weights[2] = weights[3] = 1.0;\n\n    ret = tsk_treeseq_trait_covariance(&ts, 1, weights, 0, NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 1.0 / 12.0, 1e-6);\n\n    /* weights of 0 leads to 0 */\n    for (j = 0; j < 4; j++) {\n        weights[j] = 0.0;\n    }\n    ret = tsk_treeseq_trait_covariance(&ts, 1, weights, 0, NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 0.0, 1e-6);\n\n    tsk_treeseq_free(&ts);\n    free(weights);\n}\n\nstatic void\ntest_paper_ex_trait_correlation_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_one_way_weighted_func_errors(&ts, tsk_treeseq_trait_correlation);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_trait_correlation(void)\n{\n    tsk_treeseq_t ts;\n    double result;\n    double *weights;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    weights = tsk_malloc(4 * sizeof(double));\n    weights[0] = weights[1] = 0.0;\n    weights[2] = weights[3] = 1.0;\n\n    ret = tsk_treeseq_trait_correlation(\n        &ts, 1, weights, 0, NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 1.0, 1e-6);\n\n    tsk_treeseq_free(&ts);\n    free(weights);\n}\n\nstatic void\ntest_paper_ex_trait_linear_model_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_one_way_weighted_covariate_func_errors(&ts, tsk_treeseq_trait_linear_model);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_trait_linear_model(void)\n{\n    tsk_treeseq_t ts;\n    double result;\n    double *weights;\n    double *covariates;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    weights = tsk_malloc(4 * sizeof(double));\n    covariates = tsk_malloc(8 * sizeof(double));\n    weights[0] = weights[1] = 0.0;\n    weights[2] = weights[3] = 1.0;\n    covariates[0] = covariates[1] = 0.0;\n    covariates[2] = covariates[3] = 1.0;\n    covariates[4] = covariates[6] = 0.0;\n    covariates[5] = covariates[7] = 1.0;\n\n    ret = tsk_treeseq_trait_linear_model(\n        &ts, 1, weights, 2, covariates, 0, NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 0.0, 1e-6);\n\n    tsk_treeseq_free(&ts);\n    free(weights);\n    free(covariates);\n}\n\nstatic void\ntest_paper_ex_segregating_sites_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_one_way_stat_func_errors(&ts, tsk_treeseq_segregating_sites);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_segregating_sites(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes = 4;\n    double segsites;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_segregating_sites(\n        &ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &segsites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(segsites, 3.0, 1e-6);\n\n    /* A sample set size of 1 leads to 0 */\n    sample_set_sizes = 1;\n    ret = tsk_treeseq_segregating_sites(\n        &ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &segsites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(segsites, 0.0, 1e-6);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_Y1_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_one_way_stat_func_errors(&ts, tsk_treeseq_Y1);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_Y1(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes = 4;\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_Y1(&ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* A sample set size of < 2 leads to NaN */\n    sample_set_sizes = 1;\n    ret = tsk_treeseq_Y1(&ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(result));\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_divergence_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_two_way_stat_func_errors(&ts, tsk_treeseq_divergence, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_divergence(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t set_indexes[] = { 0, 1 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_divergence(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0,\n        NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* sample_set[0] size = 1 with indexes = (0, 0) leads to NaN */\n    sample_set_sizes[0] = 1;\n    set_indexes[1] = 0;\n    ret = tsk_treeseq_divergence(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0,\n        NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(result));\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t set_indexes[] = { 0, 0 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_genetic_relatedness(&ts, 2, sample_set_sizes, samples, 1,\n        set_indexes, 0, NULL, TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_genetic_relatedness(&ts, 2, sample_set_sizes, samples, 1,\n        set_indexes, 0, NULL, TSK_STAT_SITE | TSK_STAT_NONCENTRED, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_two_way_stat_func_errors(&ts, tsk_treeseq_genetic_relatedness, 0);\n    verify_two_way_stat_func_errors(\n        &ts, tsk_treeseq_genetic_relatedness, TSK_STAT_NONCENTRED);\n    verify_two_way_stat_func_errors(\n        &ts, tsk_treeseq_genetic_relatedness, TSK_STAT_POLARISED);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness_weighted(void)\n{\n    tsk_treeseq_t ts;\n    double weights[] = { 1.2, 0.1, 0.0, 0.0, 3.4, 5.0, 1.0, -1.0 };\n    tsk_id_t indexes[] = { 0, 0, 0, 1 };\n    double result[100];\n    tsk_size_t num_weights;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    for (num_weights = 1; num_weights < 3; num_weights++) {\n        ret = tsk_treeseq_genetic_relatedness_weighted(\n            &ts, num_weights, weights, 2, indexes, 0, NULL, result, TSK_STAT_SITE);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_treeseq_genetic_relatedness_weighted(\n            &ts, num_weights, weights, 2, indexes, 0, NULL, result, TSK_STAT_BRANCH);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_treeseq_genetic_relatedness_weighted(\n            &ts, num_weights, weights, 2, indexes, 0, NULL, result, TSK_STAT_NODE);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_treeseq_genetic_relatedness_weighted(&ts, num_weights, weights, 2,\n            indexes, 0, NULL, result, TSK_STAT_SITE | TSK_STAT_NONCENTRED);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_treeseq_genetic_relatedness_weighted(&ts, num_weights, weights, 2,\n            indexes, 0, NULL, result, TSK_STAT_BRANCH | TSK_STAT_NONCENTRED);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_treeseq_genetic_relatedness_weighted(&ts, num_weights, weights, 2,\n            indexes, 0, NULL, result, TSK_STAT_NODE | TSK_STAT_NONCENTRED);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n    }\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness_weighted_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_two_way_weighted_stat_func_errors(\n        &ts, tsk_treeseq_genetic_relatedness_weighted, 0);\n    verify_two_way_weighted_stat_func_errors(\n        &ts, tsk_treeseq_genetic_relatedness_weighted, TSK_STAT_NONCENTRED);\n    verify_two_way_weighted_stat_func_errors(\n        &ts, tsk_treeseq_genetic_relatedness_weighted, TSK_STAT_POLARISED);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_empty_genetic_relatedness_vector(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_size_t num_samples;\n    double *weights, *result, *result2;\n    tsk_size_t j;\n    tsk_size_t num_weights = 2;\n    double windows[] = { 0, 0 };\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_tree_ex_nodes, \"\", NULL, NULL, NULL, NULL, NULL, 0);\n    num_samples = tsk_treeseq_get_num_samples(&ts);\n    windows[1] = tsk_treeseq_get_sequence_length(&ts);\n    weights = tsk_malloc(num_weights * num_samples * sizeof(double));\n    result = tsk_malloc(num_weights * num_samples * sizeof(double));\n    result2 = tsk_malloc(num_weights * num_samples * sizeof(double));\n    for (j = 0; j < num_samples; j++) {\n        weights[j] = 1.0;\n    }\n    for (j = 0; j < num_samples; j++) {\n        weights[j + num_samples] = (float) j;\n    }\n\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, num_weights, weights, 1, windows, num_samples, ts.samples, result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 1, windows,\n        num_samples, ts.samples, result, TSK_STAT_NONCENTRED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    windows[0] = 0.5 * tsk_treeseq_get_sequence_length(&ts);\n    windows[1] = 0.75 * tsk_treeseq_get_sequence_length(&ts);\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, num_weights, weights, 1, windows, num_samples, ts.samples, result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 1, windows,\n        num_samples, ts.samples, result2, TSK_STAT_SPAN_NORMALISE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_samples * num_weights; j++) {\n        CU_ASSERT_EQUAL_FATAL(result[j] / (windows[1] - windows[0]), result2[j]);\n    }\n\n    tsk_treeseq_free(&ts);\n    free(weights);\n    free(result);\n    free(result2);\n}\n\nstatic void\nverify_genetic_relatedness_vector(\n    tsk_treeseq_t *ts, tsk_size_t num_weights, tsk_size_t num_windows)\n{\n    int ret;\n    tsk_size_t num_samples;\n    double *weights, *result;\n    tsk_size_t j, k;\n    double *windows = tsk_malloc((num_windows + 1) * sizeof(*windows));\n    double L = tsk_treeseq_get_sequence_length(ts);\n\n    windows[0] = 0;\n    windows[num_windows] = L;\n    for (j = 1; j < num_windows; j++) {\n        windows[j] = ((double) j) * L / (double) num_windows;\n    }\n    num_samples = tsk_treeseq_get_num_samples(ts);\n\n    weights = tsk_malloc(num_weights * num_samples * sizeof(*weights));\n    result = tsk_malloc(num_windows * num_weights * num_samples * sizeof(*result));\n    for (j = 0; j < num_samples; j++) {\n        for (k = 0; k < num_weights; k++) {\n            weights[j + k * num_samples] = 1.0 + (double) k;\n        }\n    }\n\n    ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,\n        windows, num_samples, ts->samples, result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    windows[0] = windows[1] / 2;\n    if (num_windows > 1) {\n        windows[num_windows - 1]\n            = windows[num_windows - 2] + (L / (double) (2 * num_windows));\n    }\n    ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,\n        windows, num_samples, ts->samples, result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,\n        windows, num_samples, ts->samples, result, TSK_STAT_NONCENTRED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_set_debug_stream(_devnull);\n    ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,\n        windows, num_samples, ts->samples, result, TSK_DEBUG);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_set_debug_stream(stdout);\n\n    free(windows);\n    free(weights);\n    free(result);\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness_vector(void)\n{\n    tsk_treeseq_t ts;\n    double gap;\n\n    for (gap = 0.0; gap < 2.0; gap += 1.0) {\n        tsk_treeseq_from_text(&ts, 10 + gap, paper_ex_nodes, paper_ex_edges, NULL,\n            paper_ex_sites, paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n        tsk_size_t j, k;\n        for (j = 1; j < 3; j++) {\n            for (k = 1; k < 3; k++) {\n                verify_genetic_relatedness_vector(&ts, j, k);\n            }\n        }\n        tsk_treeseq_free(&ts);\n    }\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness_vector_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_size_t num_samples;\n    double *weights, *result;\n    tsk_size_t j;\n    tsk_size_t num_windows = 2;\n    tsk_size_t num_weights = 2;\n    double windows[] = { 0, 0, 0 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    num_samples = tsk_treeseq_get_num_samples(&ts);\n\n    weights = tsk_malloc(num_weights * num_samples * sizeof(double));\n    result = tsk_malloc(num_windows * num_weights * num_samples * sizeof(double));\n    for (j = 0; j < num_samples; j++) {\n        weights[j] = 1.0;\n    }\n    for (j = 0; j < num_samples; j++) {\n        weights[j + num_samples] = (float) j;\n    }\n\n    /* Window errors */\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, 1, weights, 0, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, 1, weights, 0, NULL, num_samples, ts.samples, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);\n\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = -1;\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 12;\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 0;\n    windows[2] = 12;\n    ret = tsk_treeseq_genetic_relatedness_vector(\n        &ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    /* unsupported mode errors */\n    windows[0] = 0.0;\n    windows[1] = 5.0;\n    windows[2] = 10.0;\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,\n        num_samples, ts.samples, result, TSK_STAT_SITE);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,\n        num_samples, ts.samples, result, TSK_STAT_NODE);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);\n\n    tsk_treeseq_free(&ts);\n    free(weights);\n    free(result);\n}\n\nstatic void\ntest_paper_ex_genetic_relatedness_vector_node_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_size_t num_samples;\n    double *weights, *result;\n    tsk_size_t j;\n    tsk_size_t num_weights = 2;\n    tsk_size_t num_windows = 2;\n    double windows[] = { 1, 1.5, 2 };\n    tsk_size_t num_nodes = 3;\n    const tsk_id_t good_nodes[] = { 1, 0, 2 };\n    const tsk_id_t bad_nodes1[] = { 1, -1, 2 };\n    const tsk_id_t bad_nodes2[] = { 1, 100, 2 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    num_samples = tsk_treeseq_get_num_samples(&ts);\n\n    weights = tsk_malloc(num_weights * num_samples * sizeof(double));\n    result = tsk_malloc(num_windows * num_weights * num_nodes * sizeof(double));\n    for (j = 0; j < num_samples; j++) {\n        weights[j] = 1.0;\n    }\n    for (j = 0; j < num_samples; j++) {\n        weights[j + num_samples] = (float) j;\n    }\n\n    /* node errors */\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,\n        num_nodes, good_nodes, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,\n        num_nodes, bad_nodes1, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,\n        num_nodes, bad_nodes2, result, TSK_STAT_BRANCH);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    tsk_treeseq_free(&ts);\n    free(weights);\n    free(result);\n}\n\nstatic void\ntest_paper_ex_Y2_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_two_way_stat_func_errors(&ts, tsk_treeseq_Y2, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_Y2(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t set_indexes[] = { 0, 1 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_Y2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* sample_set_size of 1 leads to NaN */\n    sample_set_sizes[1] = 1;\n    ret = tsk_treeseq_Y2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(result));\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_f2_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_two_way_stat_func_errors(&ts, tsk_treeseq_f2, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_f2(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t set_indexes[] = { 0, 1 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_f2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* sample_set_size of 1 leads to NaN */\n    sample_set_sizes[0] = 1;\n    ret = tsk_treeseq_f2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(result));\n\n    /* sample_set_size of 1 leads to NaN */\n    sample_set_sizes[0] = 2;\n    sample_set_sizes[1] = 1;\n    ret = tsk_treeseq_f2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(result));\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_Y3_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_three_way_stat_func_errors(&ts, tsk_treeseq_Y3);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_Y3(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 1, 1 };\n    tsk_id_t set_indexes[] = { 0, 1, 2 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_Y3(&ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_f3_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_three_way_stat_func_errors(&ts, tsk_treeseq_f3);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_f3(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 2, 1, 1 };\n    tsk_id_t set_indexes[] = { 0, 1, 2 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_f3(&ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* sample_set_size of 1 leads to NaN */\n    sample_set_sizes[0] = 1;\n    ret = tsk_treeseq_f3(&ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(tsk_isnan(result));\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_f4_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_four_way_stat_func_errors(&ts, tsk_treeseq_f4);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_f4(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 1, 1, 1, 1 };\n    tsk_id_t set_indexes[] = { 0, 1, 2, 3 };\n    double result;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_treeseq_f4(&ts, 4, sample_set_sizes, samples, 1, set_indexes, 0, NULL,\n        TSK_STAT_SITE, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_afs_errors(void)\n{\n    tsk_treeseq_t ts;\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    double result[10]; /* not thinking too hard about the actual value needed */\n    double time_windows[] = { 0, 1 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    verify_one_way_stat_func_errors_tw(&ts, tsk_treeseq_allele_frequency_spectrum);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        &ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_NODE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,\n        NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,\n        NULL, 1, time_windows, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_afs(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 4, 0 };\n    double result[25];\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    /* we have two singletons and one tripleton */\n\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        &ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result[0], 0);\n    CU_ASSERT_EQUAL_FATAL(result[1], 3.0);\n    CU_ASSERT_EQUAL_FATAL(result[2], 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        &ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result[0], 0);\n    CU_ASSERT_EQUAL_FATAL(result[1], 2.0);\n    CU_ASSERT_EQUAL_FATAL(result[2], 0);\n    CU_ASSERT_EQUAL_FATAL(result[3], 1.0);\n    CU_ASSERT_EQUAL_FATAL(result[4], 0);\n\n    verify_afs(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_divergence_matrix(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH);\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_unary_ex_afs(void)\n{\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 2, 3 };\n    tsk_size_t sample_set_sizes[] = { 3, 0 };\n    double result[25];\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 100, unary_ex_nodes, unary_ex_edges, NULL, unary_ex_sites,\n        unary_ex_mutations, NULL, NULL, 0);\n    /* we have a singleton and a doubleton */\n\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        &ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result[0], 0);\n    CU_ASSERT_EQUAL_FATAL(result[1], 1.0);\n    CU_ASSERT_EQUAL_FATAL(result[2], 1.0);\n    CU_ASSERT_EQUAL_FATAL(result[3], 0.0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(&ts, 1, sample_set_sizes, samples, 0,\n        NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_POLARISED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE_FATAL(result[0] > 0);\n    CU_ASSERT_TRUE_FATAL(result[1] > 0);\n    CU_ASSERT_TRUE_FATAL(result[2] > 0);\n    CU_ASSERT_EQUAL_FATAL(result[3], 0.0);\n\n    verify_afs(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_ex_ld(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_ld(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_ex_mean_descendants(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_mean_descendants(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_ex_genealogical_nearest_neighbours(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_genealogical_nearest_neighbours(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_ex_general_stat(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_branch_general_stat_identity(&ts);\n    verify_default_general_stat(&ts);\n    verify_general_stat(&ts, TSK_STAT_BRANCH);\n    verify_general_stat(&ts, TSK_STAT_SITE);\n    verify_general_stat(&ts, TSK_STAT_NODE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_ex_general_stat_errors(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_branch_general_stat_errors(&ts);\n    verify_site_general_stat_errors(&ts);\n    verify_node_general_stat_errors(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_caterpillar_tree_ld(void)\n{\n    tsk_treeseq_t *ts = caterpillar_tree(50, 20, 1);\n    tsk_ld_calc_t ld_calc;\n    double r2[20];\n    tsk_size_t num_r2_values;\n    int ret = tsk_ld_calc_init(&ld_calc, ts);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    verify_ld(ts);\n\n    ret = tsk_ld_calc_get_r2_array(\n        &ld_calc, 0, TSK_DIR_FORWARD, 5, DBL_MAX, r2, &num_r2_values);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(num_r2_values, 5);\n\n    ret = tsk_ld_calc_get_r2_array(\n        &ld_calc, 10, TSK_DIR_REVERSE, 5, DBL_MAX, r2, &num_r2_values);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(num_r2_values, 5);\n\n    tsk_ld_calc_free(&ld_calc);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_ld_multi_mutations(void)\n{\n    tsk_treeseq_t *ts = caterpillar_tree(4, 2, 2);\n    tsk_ld_calc_t ld_calc;\n    double r2;\n    int ret = tsk_ld_calc_init(&ld_calc, ts);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ld_calc_get_r2(&ld_calc, 0, 1, &r2);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);\n\n    tsk_ld_calc_free(&ld_calc);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_ld_silent_mutations(void)\n{\n    tsk_treeseq_t *base_ts = caterpillar_tree(4, 2, 1);\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_ld_calc_t ld_calc;\n    double r2;\n    int ret = tsk_table_collection_copy(base_ts->tables, &tables, 0);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.mutations.derived_state[1] = '0';\n\n    ret = tsk_treeseq_init(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_ld_calc_init(&ld_calc, &ts);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_ld_calc_get_r2(&ld_calc, 0, 1, &r2);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED);\n    tsk_ld_calc_free(&ld_calc);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(base_ts);\n    free(base_ts);\n}\n\nstatic void\ntest_paper_ex_two_site(void)\n{\n    tsk_treeseq_t ts;\n    double result[27];\n    tsk_size_t s, result_size, num_sample_sets;\n    int ret;\n\n    double truth_one_set[9] = { 1, 0.1111111111111111, 0.1111111111111111,\n        0.1111111111111111, 1, 1, 0.1111111111111111, 1, 1 };\n    double truth_two_sets[18] = { 1, 1, 0.1111111111111111, 0.1111111111111111,\n        0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111,\n        1, 1, 1, 1, 0.1111111111111111, 0.1111111111111111, 1, 1, 1, 1 };\n    double truth_three_sets[27] = { 1, 1, NAN, 0.1111111111111111, 0.1111111111111111,\n        NAN, 0.1111111111111111, 0.1111111111111111, NAN, 0.1111111111111111,\n        0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1, 0.1111111111111111,\n        0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    double truth_three_index_tuples[27] = { 1, 1, NAN, 0.1111111111111111,\n        0.1111111111111111, NAN, 0.1111111111111111, 0.1111111111111111, NAN,\n        0.1111111111111111, 0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1,\n        0.1111111111111111, 0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1 };\n\n    tsk_size_t sample_set_sizes[3], num_index_tuples;\n    tsk_id_t sample_sets[ts.num_samples * 3], index_tuples[2 * 3] = { 0, 1, 0, 0, 0, 2 };\n    tsk_size_t num_sites = ts.tables->sites.num_rows;\n    tsk_id_t *sites = tsk_malloc(num_sites * sizeof(*sites));\n\n    // First sample set contains all of the samples\n    sample_set_sizes[0] = ts.num_samples;\n    num_sample_sets = 1;\n    for (s = 0; s < ts.num_samples; s++) {\n        sample_sets[s] = (tsk_id_t) s;\n    }\n    for (s = 0; s < num_sites; s++) {\n        sites[s] = (tsk_id_t) s;\n    }\n\n    result_size = num_sites * num_sites;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_one_set);\n\n    // Second sample set contains all of the samples\n    sample_set_sizes[1] = ts.num_samples;\n    num_sample_sets = 2;\n    for (s = ts.num_samples; s < ts.num_samples * 2; s++) {\n        sample_sets[s] = (tsk_id_t) s - (tsk_id_t) ts.num_samples;\n    }\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_two_sets);\n\n    // Third sample set contains the first two samples\n    sample_set_sizes[2] = 2;\n    num_sample_sets = 3;\n    for (s = ts.num_samples * 2; s < (ts.num_samples * 3) - 2; s++) {\n        sample_sets[s] = (tsk_id_t) s - (tsk_id_t) ts.num_samples * 2;\n    }\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(\n        result_size * num_sample_sets, result, truth_three_sets);\n\n    // Two-way stats: we'll reuse all sample sets from the first 3 tests\n    num_sample_sets = 3;\n\n    num_index_tuples = 1;\n    // We'll compute r2 between sample set 0 and sample set 1\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);\n    ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_sites, sites, NULL, num_sites, sites, NULL,\n        0, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_one_set);\n\n    // Compare sample sets [(0, 1), (0, 0)]\n    num_index_tuples = 2;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);\n    ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_sites, sites, NULL, num_sites, sites, NULL,\n        0, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_two_sets);\n\n    // Compare sample sets [(0, 1), (0, 0), (0, 2)]\n    num_index_tuples = 3;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);\n    ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_sites, sites, NULL, num_sites, sites, NULL,\n        0, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(\n        result_size * num_index_tuples, result, truth_three_index_tuples);\n\n    tsk_treeseq_free(&ts);\n    tsk_safe_free(sites);\n}\n\nstatic void\ntest_paper_ex_two_branch(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    double result[27];\n    tsk_size_t i, result_size, num_sample_sets;\n    tsk_flags_t options = 0;\n    double truth_one_set[9] = { 0.008890640625, 0.004624203125, 0.005215703125,\n        0.004624203125, 0.003737578125, 0.004377078125, 0.005215703125,\n        0.004377078124999999, 0.005160578124999998 };\n    double truth_two_sets[18] = { 0.008890640625, 0.008890640625, 0.004624203125,\n        0.004624203125, 0.005215703125, 0.005215703125, 0.004624203125, 0.004624203125,\n        0.003737578125, 0.003737578125, 0.004377078125, 0.004377078125, 0.005215703125,\n        0.005215703125, 0.004377078124999999, 0.004377078124999999, 0.005160578124999998,\n        0.005160578124999998 };\n    double truth_three_sets[27]\n        = { 0.008890640625, 0.008890640625, 0.007225, 0.004624203125000001,\n              0.004624203125, 0.007225, 0.005215703125000002, 0.005215703125, 0.008585,\n              0.004624203125, 0.004624203125, 0.007225, 0.003737578125, 0.003737578125,\n              0.007225, 0.004377078125, 0.004377078125, 0.008585, 0.005215703125,\n              0.005215703125, 0.008585, 0.004377078124999999, 0.004377078124999999,\n              0.008585, 0.005160578124999998, 0.005160578124999998, 0.010201 };\n    double truth_positions_subset_1[12] = { 0.008890640625, 0.008890640625, 0.007225,\n        0.008890640625, 0.008890640625, 0.007225, 0.008890640625, 0.008890640625,\n        0.007225, 0.008890640625, 0.008890640625, 0.007225 };\n    double truth_positions_subset_2[12] = { 0.003737578125, 0.003737578125, 0.007225,\n        0.003737578125, 0.003737578125, 0.007225, 0.003737578125, 0.003737578125,\n        0.007225, 0.003737578125, 0.003737578125, 0.007225 };\n    double truth_positions_subset_3[12] = { 0.005160578125, 0.005160578125, 0.010201,\n        0.005160578125, 0.005160578125, 0.010201, 0.005160578125, 0.005160578125,\n        0.010201, 0.005160578125, 0.005160578125, 0.010201 };\n    double truth_three_index_tuples[27] = { 0.008890640625, 0.008890640625, 0.0039125,\n        0.004624203125, 0.004624203125, 0.0038125, 0.005215703125, 0.005215703125,\n        0.0045725, 0.004624203125, 0.004624203125, 0.0038125, 0.003737578125,\n        0.003737578125, 0.0040125, 0.004377078125, 0.004377078125, 0.0048525,\n        0.005215703125, 0.005215703125, 0.0045725, 0.004377078125, 0.004377078125,\n        0.0048525, 0.005160578125, 0.005160578125, 0.0058845 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    tsk_size_t sample_set_sizes[3], num_index_tuples;\n    tsk_id_t sample_sets[ts.num_samples * 3], index_tuples[2 * 3] = { 0, 1, 0, 0, 0, 2 };\n    tsk_size_t num_trees = ts.num_trees;\n    double *positions = tsk_malloc(num_trees * sizeof(*positions));\n    double positions_subset_1[2] = { 0., 0.1 };\n    double positions_subset_2[2] = { 2., 6. };\n    double positions_subset_3[2] = { 9., 9.999 };\n\n    // First sample set contains all of the samples\n    sample_set_sizes[0] = ts.num_samples;\n    num_sample_sets = 1;\n    for (i = 0; i < ts.num_samples; i++) {\n        sample_sets[i] = (tsk_id_t) i;\n    }\n    for (i = 0; i < num_trees; i++) {\n        positions[i] = ts.breakpoints[i];\n    }\n\n    options |= TSK_STAT_BRANCH;\n\n    result_size = num_trees * num_trees * num_sample_sets;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_trees,\n        NULL, positions, num_trees, NULL, positions, options, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_one_set);\n\n    // Second sample set contains all of the samples\n    sample_set_sizes[1] = ts.num_samples;\n    num_sample_sets = 2;\n    for (i = ts.num_samples; i < ts.num_samples * 2; i++) {\n        sample_sets[i] = (tsk_id_t) i - (tsk_id_t) ts.num_samples;\n    }\n\n    result_size = num_trees * num_trees * num_sample_sets;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_trees,\n        NULL, positions, num_trees, NULL, positions, options, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_two_sets);\n\n    // Third sample set contains the first two samples\n    sample_set_sizes[2] = 2;\n    num_sample_sets = 3;\n    for (i = ts.num_samples * 2; i < (ts.num_samples * 3) - 2; i++) {\n        sample_sets[i] = (tsk_id_t) i - (tsk_id_t) ts.num_samples * 2;\n    }\n\n    result_size = num_trees * num_trees * num_sample_sets;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_trees,\n        NULL, positions, num_trees, NULL, positions, options, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(result_size, result, truth_three_sets);\n\n    result_size = 4 * num_sample_sets;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2, NULL,\n        positions_subset_1, 2, NULL, positions_subset_1, options, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(result_size, result, truth_positions_subset_1);\n\n    result_size = 4 * num_sample_sets;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2, NULL,\n        positions_subset_2, 2, NULL, positions_subset_2, options, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(result_size, result, truth_positions_subset_2);\n\n    result_size = 4 * num_sample_sets;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2, NULL,\n        positions_subset_3, 2, NULL, positions_subset_3, options, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(result_size, result, truth_positions_subset_3);\n\n    // Two-way stats: we'll reuse all sample sets from the first 3 tests\n    num_sample_sets = 3;\n    result_size = num_trees * num_trees;\n\n    num_index_tuples = 1;\n    // We'll compute D2 between sample set 0 and sample set 1\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);\n    ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_trees, NULL, positions, num_trees, NULL,\n        positions, options, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_one_set);\n\n    // Compare sample sets [(0, 1), (0, 0)]\n    num_index_tuples = 2;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);\n    ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_trees, NULL, positions, num_trees, NULL,\n        positions, options, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_two_sets);\n\n    // Compare sample sets [(0, 1), (0, 0), (0, 2)]\n    num_index_tuples = 3;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);\n    ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_trees, NULL, positions, num_trees, NULL,\n        positions, options, result);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal_nan(\n        result_size * num_index_tuples, result, truth_three_index_tuples);\n\n    tsk_treeseq_free(&ts);\n    tsk_safe_free(positions);\n}\n\nstatic void\ntest_two_site_correlated_multiallelic(void)\n{\n    const char *nodes = \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"1   0   -1\\n\"\n                        \"0   2   -1\\n\"\n                        \"0   4   -1\\n\"\n                        \"0   6   -1\\n\"\n                        \"0   8   -1\\n\"\n                        \"0   10  -1\\n\"\n                        \"0   12  -1\\n\"\n                        \"0   14  -1\\n\"\n                        \"0   16  -1\\n\";\n    const char *edges = \"0   20   9    0,1\\n\"\n                        \"0   20   10   2,9\\n\"\n                        \"0   20   11   4,5\\n\"\n                        \"0   20   12   6,11\\n\"\n                        \"0   20   13   7,8\\n\"\n                        \"0   20   14   3,10\\n\"\n                        \"0   10   15   12\\n\"\n                        \"10  20   15   13\\n\"\n                        \"0   10   15   14\\n\"\n                        \"10  20   15   14\\n\"\n                        \"10  20   16   12\\n\"\n                        \"0   10   16   13\\n\"\n                        \"0   10   16   15\\n\"\n                        \"10  20   16   15\\n\";\n    const char *tree_sites = \"7   A\\n\"\n                             \"13  G\\n\";\n    const char *mutations = \"0   15  T  -1\\n\"\n                            \"0   14  G   0\\n\"\n                            \"1   15  T  -1\\n\"\n                            \"1   13  C   2\\n\";\n\n    int ret;\n\n    tsk_treeseq_t ts;\n    tsk_size_t s, result_size;\n\n    double truth_D[4] = { 0.043209876543209874, -0.018518518518518517,\n        -0.018518518518518517, 0.05555555555555555 };\n    double truth_D2[4] = { 0.023844603634269844, 0.02384460363426984,\n        0.02384460363426984, 0.02384460363426984 };\n    double truth_r2[4] = { 1, 1, 1, 1 };\n    double truth_D_prime[4] = { 0, -0.5, -0.5, 0 };\n    double truth_r[4] = { 0.18377223398316206, -0.12212786219416509,\n        -0.12212786219416509, 0.2609542781331212 };\n    double truth_Dz[4] = { 0.0033870175616860566, 0.003387017561686057,\n        0.003387017561686057, 0.003387017561686057 };\n    double truth_pi2[4] = { 0.04579247743399549, 0.04579247743399549,\n        0.04579247743399549, 0.0457924774339955 };\n    double truth_D2_unbiased[4] = { 0.026455026455026454, 0.026455026455026454,\n        0.026455026455026454, 0.026455026455026454 };\n    double truth_Dz_unbiased[4] = { -0.008818342151675485, -0.008818342151675485,\n        -0.008818342151675485, -0.008818342151675485 };\n    double truth_pi2_unbiased[4] = { 0.0582010582010582, 0.0582010582010582,\n        0.0582010582010582, 0.0582010582010582 };\n    double truth_D2_unbiased_disjoint[4] = { 0.007407407407407407, 0.007407407407407407,\n        0.007407407407407407, 0.007407407407407407 };\n\n    tsk_treeseq_from_text(\n        &ts, 20, nodes, edges, NULL, tree_sites, mutations, NULL, NULL, 0);\n\n    tsk_size_t num_sample_sets = 1;\n    tsk_size_t sample_set_sizes[2] = { ts.num_samples, ts.num_samples };\n    tsk_id_t sample_sets[ts.num_samples * 2];\n    tsk_size_t num_sites = ts.tables->sites.num_rows;\n    tsk_id_t *sites = tsk_malloc(num_sites * sizeof(*sites));\n    result_size = num_sites * num_sites;\n    double result[result_size];\n\n    // Two sample sets for multipop at the bottom, only presenting one to single pop\n    // results\n    for (s = 0; s < ts.num_samples; s++) {\n        sample_sets[s] = (tsk_id_t) s;\n        sample_sets[s + ts.num_samples] = (tsk_id_t) s;\n    }\n    for (s = 0; s < num_sites; s++) {\n        sites[s] = (tsk_id_t) s;\n    }\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D_prime(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D_prime);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_Dz(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_Dz);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_pi2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_pi2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_Dz_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_Dz_unbiased);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_pi2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_pi2_unbiased);\n\n    // We'll compute r2 between sample set 0 and sample set 1\n    num_sample_sets = 2;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,\n        (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,\n        (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2);\n\n    // perfectly overlapping sample sets will produce a result equal to the single\n    // population case\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        1, (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);\n\n    // two disjoint sample sets with 5 and 4 samples {0,1,2,3,4}{5,6,7,8}\n    sample_set_sizes[0] = 5;\n    sample_set_sizes[1] = 4;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        1, (tsk_id_t[2]) { 0, 1 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased_disjoint);\n\n    tsk_treeseq_free(&ts);\n    tsk_safe_free(sites);\n}\n\nstatic void\ntest_two_site_uncorrelated_multiallelic(void)\n{\n    const char *nodes = \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"1   0  -1\\n\"\n                        \"0   2  -1\\n\"\n                        \"0   4  -1\\n\"\n                        \"0   6  -1\\n\"\n                        \"0   8  -1\\n\"\n                        \"0   10 -1\\n\"\n                        \"0   12 -1\\n\"\n                        \"0   14 -1\\n\"\n                        \"0   16 -1\\n\"\n                        \"0   2  -1\\n\"\n                        \"0   4  -1\\n\"\n                        \"0   6  -1\\n\"\n                        \"0   8  -1\\n\"\n                        \"0   10 -1\\n\"\n                        \"0   12 -1\\n\"\n                        \"0   14 -1\\n\"\n                        \"0   16 -1\\n\";\n    const char *edges = \"0     10    9      0,1\\n\"\n                        \"10    20    17     0,3\\n\"\n                        \"0     10    10     2,9\\n\"\n                        \"10    20    18     6,17\\n\"\n                        \"0     10    11     3,4\\n\"\n                        \"10    20    19     1,4\\n\"\n                        \"0     10    12     5,11\\n\"\n                        \"10    20    20     7,19\\n\"\n                        \"0     10    13     6,7\\n\"\n                        \"10    20    21     2,5\\n\"\n                        \"0     10    14     8,13\\n\"\n                        \"10    20    22     8,21\\n\"\n                        \"0     10    15     10,12\\n\"\n                        \"10    20    23     18,20\\n\"\n                        \"0     10    16     14,15\\n\"\n                        \"10    20    24     22,23\\n\";\n    const char *tree_sites = \"7   A\\n\"\n                             \"13  G\\n\";\n    const char *mutations = \"0   15  T  -1\\n\"\n                            \"0   12  G   0\\n\"\n                            \"1   23  T  -1\\n\"\n                            \"1   20  A   2\\n\";\n\n    tsk_treeseq_t ts;\n\n    int ret;\n\n    double truth_D[4] = { 0.05555555555555555, 0.0, 0.0, 0.05555555555555555 };\n    double truth_D2[4] = { 0.024691358024691357, 0.0, 0.0, 0.024691358024691357 };\n    double truth_r2[4] = { 1, 0, 0, 1 };\n    double truth_D_prime[4] = { 0.0, 0.0, 0.0, 0.0 };\n    double truth_r[4] = { 0.25, 0.0, 0.0, 0.25 };\n    double truth_Dz[4] = { 0.0, 0.0, 0.0, 0.0 };\n    double truth_pi2[4] = { 0.04938271604938272, 0.04938271604938272,\n        0.04938271604938272, 0.04938271604938272 };\n    double truth_D2_unbiased[4] = { 0.027777777777777776, -0.009259259259259259,\n        -0.009259259259259259, 0.027777777777777776 };\n    double truth_Dz_unbiased[4] = { -0.015873015873015872, 0.005291005291005289,\n        0.005291005291005289, -0.015873015873015872 };\n    double truth_pi2_unbiased[4] = { 0.06349206349206349, 0.06216931216931215,\n        0.06216931216931215, 0.06349206349206349 };\n    double truth_D2_unbiased_disjoint[4] = { 0.008333333333333333,\n        -0.0027777777777777775, -0.0027777777777777775, 0.03518518518518518 };\n\n    tsk_treeseq_from_text(\n        &ts, 20, nodes, edges, NULL, tree_sites, mutations, NULL, NULL, 0);\n\n    tsk_size_t s;\n    tsk_size_t num_sample_sets = 1;\n    tsk_size_t num_sites = ts.tables->sites.num_rows;\n    tsk_id_t *sites = tsk_malloc(num_sites * sizeof(*sites));\n    tsk_size_t sample_set_sizes[2] = { ts.num_samples, ts.num_samples };\n    tsk_id_t sample_sets[ts.num_samples * 2];\n    tsk_size_t result_size = num_sites * num_sites;\n    double result[result_size];\n\n    // Two sample sets for multipop at the bottom, only presenting one to single pop\n    // results\n    for (s = 0; s < ts.num_samples; s++) {\n        sample_sets[s] = (tsk_id_t) s;\n        sample_sets[s + ts.num_samples] = (tsk_id_t) s;\n    }\n    for (s = 0; s < num_sites; s++) {\n        sites[s] = (tsk_id_t) s;\n    }\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D_prime(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D_prime);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_Dz(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_Dz);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_pi2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_pi2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_D2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_Dz_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_Dz_unbiased);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_pi2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_sites, sites, NULL, num_sites, sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_pi2_unbiased);\n\n    // We'll compute r2 between sample set 0 and sample set 1\n    num_sample_sets = 2;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,\n        (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,\n        (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2);\n\n    // perfectly overlapping sample sets will produce a result equal to the single\n    // population case\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        1, (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);\n\n    // two disjoint sample sets with 5 and 4 samples {0,1,2,3,4}{5,6,7,8}\n    sample_set_sizes[0] = 5;\n    sample_set_sizes[1] = 4;\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        1, (tsk_id_t[2]) { 0, 1 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased_disjoint);\n\n    tsk_treeseq_free(&ts);\n    tsk_safe_free(sites);\n}\n\nstatic void\ntest_two_site_backmutation(void)\n{\n    const char *nodes\n        = \"1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n\"\n          \"1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n\"\n          \"1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n\"\n          \"1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n1 0  -1\\n\"\n          \"1 0  -1\\n1 0  -1\\n1 0  -1\\n0 2  -1\\n0 4  -1\\n0 6  -1\\n0 8  -1\\n0 10 -1\\n\"\n          \"0 12 -1\\n0 14 -1\\n0 16 -1\\n0 18 -1\\n0 20 -1\\n0 22 -1\\n0 24 -1\\n0 26 -1\\n\"\n          \"0 28 -1\\n0 30 -1\\n0 32 -1\\n0 34 -1\\n0 36 -1\\n0 38 -1\\n0 40 -1\\n0 42 -1\\n\"\n          \"0 44 -1\\n0 46 -1\\n0 48 -1\\n0 50 -1\\n0 52 -1\\n0 54 -1\\n0 56 -1\\n0 58 -1\\n\"\n          \"0 60 -1\\n0 62 -1\\n0 64 -1\\n0 66 -1\\n0 68 -1\\n\";\n\n    const char *edges\n        = \"0 10 35 0,1\\n0 10 36 2,35\\n0 10 37 3,36\\n0 10 38 4,37\\n0 10 39 5,38\\n\"\n          \"0 10 40 6,39\\n0 10 41 7,40\\n0 10 42 8,41\\n0 10 43 9,42\\n0 10 44 10,43\\n\"\n          \"0 10 45 11,44\\n0 10 46 12,45\\n0 10 47 13,46\\n0 10 48 14,47\\n0 10 49 15,48\\n\"\n          \"0 10 50 16,49\\n0 10 51 17,50\\n0 10 52 18,51\\n0 10 53 19,52\\n0 10 54 20,53\\n\"\n          \"0 10 55 21,54\\n0 10 56 22,55\\n0 10 57 23,56\\n0 10 58 24,57\\n0 10 59 25,58\\n\"\n          \"0 10 60 26,59\\n0 10 61 27,60\\n0 10 62 28,61\\n0 10 63 29,62\\n0 10 64 30,63\\n\"\n          \"0 10 65 31,64\\n0 10 66 32,65\\n0 10 67 33,66\\n0 10 68 34,67\\n\";\n\n    const char *sites = \"1    A\\n\"\n                        \"4.5  T\\n\";\n\n    const char *mutations = \"0  50  T  -1\\n\"\n                            \"0  48  G   0\\n\"\n                            \"0  46  A   1\\n\"\n                            \"1  62  G  -1\\n\"\n                            \"1  60  T   3\\n\"\n                            \"1  58  A   4\\n\";\n\n    int ret;\n\n    tsk_treeseq_t ts;\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    tsk_size_t num_sample_sets = 1;\n    tsk_size_t num_sites = ts.tables->sites.num_rows;\n    tsk_id_t *row_sites = tsk_malloc(num_sites * sizeof(*row_sites));\n    tsk_id_t *col_sites = tsk_malloc(num_sites * sizeof(*col_sites));\n    tsk_size_t sample_set_sizes[1] = { ts.num_samples };\n    tsk_id_t sample_sets[ts.num_samples];\n    tsk_size_t result_size = num_sites * num_sites;\n    double result[result_size];\n    tsk_size_t s;\n\n    double truth_r2[4] = { 0.999999999999999, 0.042923862278701, 0.042923862278701, 1. };\n\n    for (s = 0; s < ts.num_samples; s++) {\n        sample_sets[s] = (tsk_id_t) s;\n    }\n    for (s = 0; s < num_sites; s++) {\n        row_sites[s] = (tsk_id_t) s;\n        col_sites[s] = (tsk_id_t) s;\n    }\n\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r2);\n\n    tsk_treeseq_free(&ts);\n    tsk_safe_free(row_sites);\n    tsk_safe_free(col_sites);\n}\n\nstatic void\ntest_two_locus_branch_all_stats(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    double result[16];\n    tsk_size_t result_size = 16;\n    tsk_id_t sample_sets[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };\n    tsk_size_t sample_set_sizes[1] = { 10 };\n    double positions[4] = { 0.0, 2.0, 5.0, 6.0 };\n\n    const char *nodes\n        = \"1 0 -1\\n1 0 -1\\n1 0 -1\\n1 0 -1\\n1 0 -1\\n1 0 -1\\n1 0 -1\\n1 0 -1\\n\"\n          \"1 0 -1\\n1 0 -1\\n0 0.02 -1\\n0 0.06 -1\\n0 0.08 -1\\n0 0.09 -1\\n0 0.21 -1\\n\"\n          \"0 0.35 -1\\n0 0.44 -1\\n0 0.69 -1\\n0 0.79 -1\\n0 0.80 -1\\n0 0.84 -1\\n\"\n          \"0 1.26 -1\\n\";\n    const char *edges\n        = \"0 10 10 0,8\\n0 10 11 4,7\\n0 10 12 3,9\\n0 10 13 6,11\\n0 10 14 1,2\\n\"\n          \"5 10 15 5,10\\n0 5 16 5,10\\n6 10 17 12,14\\n2 6 18 14\\n5 10 18 15\\n\"\n          \"2 5 18 16\\n6 10 18 17\\n0 6 19 12\\n0 2 19 14\\n2 6 19 18\\n\"\n          \"0 2 20 13\\n0 2 20 16\\n2 10 21 13\\n6 10 21 18\\n0 6 21 19\\n\"\n          \"0 2 21 20\\n\";\n\n    double truth_D[16] = { 0 };\n    double truth_D2[16] = { 0.21949755999999998, 0.1867003599999999, 0.18798699999999988,\n        0.18941379999999983, 0.18670035999999995, 0.21159555999999993,\n        0.21257979999999996, 0.21222580000000005, 0.187987, 0.21257979999999996,\n        0.21380379999999996, 0.2134714, 0.18941379999999994, 0.21222579999999996,\n        0.21347139999999992, 0.21377299999999996 };\n    double truth_r2[16] = { 6.286870108969513, 5.742220038107836, 5.7080225607835695,\n        5.623290389581752, 5.742220038107832, 6.3274209876543175, 6.291288603867465,\n        6.195658345930953, 5.708022560783573, 6.291288603867472, 6.266256220080618,\n        6.170677280171318, 5.623290389581758, 6.195658345930966, 6.170677280171324,\n        6.094109054547737 };\n    double truth_D_prime[16] = { -9.6552, -9.44459999999999, -9.136799999999988,\n        -8.680999999999989, -9.444599999999998, -9.240699999999984, -8.937399999999977,\n        -8.488499999999984, -9.136799999999996, -8.93739999999999, -8.658399999999984,\n        -8.219399999999993, -8.68099999999999, -8.488499999999991, -8.21939999999999,\n        -7.814699999999995 };\n    double truth_r[16] = { 0.023193673439522472, 0.023272634599981495,\n        0.021243465874728862, 0.01919099466703808, 0.023272634599981454,\n        0.023358527073393587, 0.021370047752011, 0.019268461077492888,\n        0.021243465874728862, 0.021370047752011012, 0.020359977803327087,\n        0.01793842604857987, 0.019190994667037817, 0.019268461077492804,\n        0.017938426048579773, 0.0160605735196305 };\n    double truth_Dz[16] = { 0.01958895999999996, -0.007941440000000037,\n        -0.007572800000000046, -0.010558400000000029, -0.007941440000000022,\n        0.01385535999999997, 0.014569599999999966, 0.015529599999999963,\n        -0.007572800000000024, 0.01456959999999996, 0.015426399999999951,\n        0.016271199999999948, -0.010558400000000011, 0.01552959999999999,\n        0.016271199999999986, 0.017607999999999985 };\n    double truth_pi2[16] = { 0.7201219600000001, 0.6895723600000001, 0.6865174000000006,\n        0.6780314000000008, 0.6895723600000002, 0.6603187600000002, 0.6573934000000002,\n        0.6492674000000002, 0.6865174000000002, 0.6573934000000003, 0.6544810000000003,\n        0.6463910000000003, 0.6780314000000002, 0.6492674000000004, 0.6463910000000005,\n        0.6384010000000007 };\n    double truth_Dz_unbiased[16] = { -0.06387380952380949, -0.09312571428571428,\n        -0.09361428571428566, -0.10075682539682536, -0.09312571428571428,\n        -0.0734419047619048, -0.0730733333333334, -0.07171301587301597,\n        -0.0936142857142857, -0.07307333333333343, -0.07261476190476202,\n        -0.07147730158730167, -0.10075682539682543, -0.07171301587301596,\n        -0.07147730158730159, -0.06988666666666674 };\n    double truth_D2_unbiased[16] = { 0.19576484126984134, 0.1586769841269842,\n        0.16093412698412704, 0.16485253968253985, 0.15867698412698414,\n        0.1949926984126984, 0.19673555555555555, 0.19734825396825403,\n        0.16093412698412699, 0.1967355555555555, 0.19879341269841264,\n        0.19945182539682532, 0.16485253968253968, 0.19734825396825395,\n        0.1994518253968253, 0.20091222222222213 };\n    double truth_pi2_unbiased[16] = { 0.8910765079365083, 0.8571103174603181,\n        0.853337460317461, 0.8434880952380959, 0.8571103174603178, 0.8182193650793657,\n        0.8145322222222225, 0.8043504761904768, 0.8533374603174609, 0.8145322222222225,\n        0.8108450793650795, 0.800729047619048, 0.8434880952380955, 0.8043504761904766,\n        0.8007290476190477, 0.7906733333333332 };\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,\n        NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,\n        NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_r2(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,\n        NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D_prime(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions,\n        4, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D_prime);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_r(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,\n        NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_r);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_Dz(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,\n        NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_Dz);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_pi2(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,\n        NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_pi2);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_Dz_unbiased(&ts, 1, sample_set_sizes, sample_sets, 4, NULL,\n        positions, 4, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_Dz_unbiased);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_D2_unbiased(&ts, 1, sample_set_sizes, sample_sets, 4, NULL,\n        positions, 4, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);\n\n    tsk_memset(result, 0, sizeof(*result) * result_size);\n    ret = tsk_treeseq_pi2_unbiased(&ts, 1, sample_set_sizes, sample_sets, 4, NULL,\n        positions, 4, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size, result, truth_pi2_unbiased);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_paper_ex_two_site_subset(void)\n{\n    tsk_treeseq_t ts;\n    double result[4];\n    int ret;\n    tsk_size_t s, result_size;\n    tsk_size_t sample_set_sizes[1];\n    tsk_size_t num_sample_sets;\n    tsk_id_t row_sites[2] = { 0, 1 };\n    tsk_id_t col_sites[2] = { 1, 2 };\n    double result_truth_1[4] = { 0.1111111111111111, 0.1111111111111111, 1, 1 };\n    double result_truth_2[1] = { 0.1111111111111111 };\n    double result_truth_3[4] = { 0.1111111111111111, 1, 0.1111111111111111, 1 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    tsk_id_t sample_sets[ts.num_samples];\n\n    sample_set_sizes[0] = ts.num_samples;\n    num_sample_sets = 1;\n    for (s = 0; s < ts.num_samples; s++) {\n        sample_sets[s] = (tsk_id_t) s;\n    }\n\n    result_size = 2 * 2;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2,\n        row_sites, NULL, 2, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_sample_sets, result, result_truth_1);\n\n    result_size = 1 * 1;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    col_sites[0] = 2;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,\n        row_sites, NULL, 1, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_sample_sets, result, result_truth_2);\n\n    result_size = 2 * 2;\n    tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);\n    row_sites[0] = 1;\n    row_sites[1] = 2;\n    col_sites[0] = 0;\n    col_sites[1] = 1;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2,\n        row_sites, NULL, 2, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(result_size * num_sample_sets, result, result_truth_3);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_two_locus_stat_input_errors(void)\n{\n    tsk_treeseq_t ts;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    tsk_size_t num_sites = ts.tables->sites.num_rows;\n    tsk_id_t *row_sites = tsk_malloc(num_sites * sizeof(*row_sites));\n    tsk_id_t *col_sites = tsk_malloc(num_sites * sizeof(*col_sites));\n    tsk_size_t sample_set_sizes[2] = { ts.num_samples, ts.num_samples };\n    tsk_size_t num_sample_sets = 1;\n    tsk_id_t index_tuples[2] = { 0 };\n    tsk_size_t num_index_tuples = 1;\n    tsk_id_t sample_sets[ts.num_samples * 2]; // need 2 sample sets for multipop\n    double positions[10] = { 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 };\n    double bad_col_positions[2] = { 0., 0. }; // used in 1 test to cover column check\n    double result[100];\n    tsk_size_t s;\n\n    for (s = 0; s < ts.num_samples; s++) {\n        sample_sets[s] = (tsk_id_t) s;\n        sample_sets[s + ts.num_samples] = (tsk_id_t) s;\n    }\n    for (s = 0; s < num_sites; s++) {\n        row_sites[s] = (tsk_id_t) s;\n        col_sites[s] = (tsk_id_t) s;\n    }\n    // begin with the happy path\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_two_locus_count_stat(&ts, num_sample_sets, sample_set_sizes,\n        sample_sets, 0, NULL, NULL, NULL, num_sites, row_sites, NULL, num_sites,\n        col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_RESULT_DIMS);\n\n    ret = tsk_treeseq_r2(&ts, 1, sample_set_sizes, sample_sets, num_sites, row_sites,\n        NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    sample_sets[1] = 0;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    sample_sets[1] = 1;\n\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH,\n        result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n\n    ret = tsk_treeseq_r2(&ts, 0, sample_set_sizes, sample_sets, num_sites, row_sites,\n        NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    sample_set_sizes[0] = 0;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);\n    sample_set_sizes[0] = ts.num_samples;\n\n    sample_sets[1] = 10;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    sample_sets[1] = 1;\n\n    row_sites[0] = 1000;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    row_sites[0] = 0;\n\n    col_sites[num_sites - 1] = (tsk_id_t) num_sites;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    col_sites[num_sites - 1] = (tsk_id_t) num_sites - 1;\n\n    row_sites[0] = 1;\n    row_sites[1] = 0;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_UNSORTED_SITES);\n    row_sites[0] = 0;\n    row_sites[1] = 1;\n\n    row_sites[0] = 1;\n    row_sites[1] = 1;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,\n        row_sites, NULL, num_sites, col_sites, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_DUPLICATE_SITES);\n    row_sites[0] = 0;\n    row_sites[1] = 1;\n\n    // Not an error condition, but we want to record this behavior. The method is robust\n    // to zero-length site/position inputs.\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL,\n        NULL, 0, NULL, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL,\n        NULL, 0, NULL, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    positions[9] = 1;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,\n        positions, 10, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POSITION_OUT_OF_BOUNDS);\n    positions[9] = 0.9;\n\n    positions[0] = -0.1;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,\n        positions, 10, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POSITION_OUT_OF_BOUNDS);\n    positions[0] = 0;\n\n    positions[0] = 0.1;\n    positions[1] = 0;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,\n        positions, 10, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_UNSORTED_POSITIONS);\n    positions[0] = 0;\n    positions[1] = 0.1;\n\n    // rows always fail first, check columns\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,\n        positions, 2, NULL, bad_col_positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_DUPLICATE_POSITIONS);\n\n    positions[0] = 0;\n    positions[1] = 0;\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,\n        positions, 10, NULL, positions, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_DUPLICATE_POSITIONS);\n    positions[0] = 0;\n    positions[1] = 0.1;\n\n    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,\n        positions, 10, NULL, positions, TSK_STAT_NODE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);\n\n    num_sample_sets = 2;\n    num_index_tuples = 0;\n    ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,\n        NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);\n\n    num_sample_sets = 0;\n    num_index_tuples = 1;\n    ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,\n        NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n\n    num_sample_sets = 2;\n    index_tuples[0] = 2;\n    ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,\n        num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,\n        NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);\n\n    tsk_treeseq_free(&ts);\n    tsk_safe_free(row_sites);\n    tsk_safe_free(col_sites);\n}\n\nstatic void\ntest_simplest_divergence_matrix(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    const char *sites = \"0.1  A\\n\"\n                        \"0.6  A\\n\";\n    const char *mutations = \"0  0  B  -1\\n\"\n                            \"1  0  B  -1\\n\";\n    tsk_treeseq_t ts;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    double D_branch[4] = { 0, 2, 2, 0 };\n    double D_site[4] = { 0, 2, 2, 0 };\n    double result[4];\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_branch, result);\n\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL,\n        TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_branch, result);\n\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_site, result);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SPAN_NORMALISE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_site, result);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_site, result);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_branch, result);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_site, result);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_NODE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_POLARISED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_POLARISED_UNSUPPORTED);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);\n\n    sample_ids[0] = -1;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    sample_ids[0] = 3;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    sample_ids[0] = 1;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    sample_ids[0] = 2;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_divergence_matrix_windows(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    const char *sites = \"0.1  A\\n\"\n                        \"0.6  A\\n\";\n    const char *mutations = \"0  0  B  -1\\n\"\n                            \"1  0  B  -1\\n\";\n    tsk_treeseq_t ts;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    double D_branch[8] = { 0, 1, 1, 0, 0, 1, 1, 0 };\n    double D_site[8] = { 0, 1, 1, 0, 0, 1, 1, 0 };\n    double result[8];\n    double windows[] = { 0, 0.5, 1 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(8, D_site, result);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 2, windows, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(8, D_branch, result);\n\n    /* Windows for the second half */\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_site, result);\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(4, D_branch, result);\n\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, windows, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);\n\n    windows[0] = -1;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 0.45;\n    windows[2] = 1.5;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    windows[0] = 0.55;\n    windows[2] = 1.0;\n    ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_divergence_matrix_internal_sample(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  1   0\\n\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts;\n    tsk_id_t sample_ids[] = { 0, 1, 2 };\n    double result[9];\n    double D_branch[9] = { 0, 2, 1, 2, 0, 1, 1, 1, 0 };\n    double D_site[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(9, D_branch, result);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_arrays_almost_equal(9, D_site, result);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_multiroot_divergence_matrix(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL,\n        multiroot_ex_sites, multiroot_ex_mutations, NULL, NULL, 0);\n\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH);\n    verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE);\n    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_pair_coalescence_counts(void)\n{\n    tsk_treeseq_t ts;\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_pair_coalescence_counts(&ts, 0);\n    verify_pair_coalescence_counts(&ts, TSK_STAT_SPAN_NORMALISE);\n    verify_pair_coalescence_counts(&ts, TSK_STAT_PAIR_NORMALISE);\n    verify_pair_coalescence_counts(\n        &ts, TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_pair_coalescence_counts_missing(void)\n{\n    tsk_treeseq_t ts;\n    tsk_treeseq_from_text(\n        &ts, 5, missing_ex_nodes, missing_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);\n    verify_pair_coalescence_counts(&ts, 0);\n    verify_pair_coalescence_counts(&ts, TSK_STAT_SPAN_NORMALISE);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_pair_coalescence_quantiles(void)\n{\n    tsk_treeseq_t ts;\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_pair_coalescence_quantiles(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_pair_coalescence_rates(void)\n{\n    tsk_treeseq_t ts;\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_pair_coalescence_rates(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_general_stat_input_errors\", test_general_stat_input_errors },\n\n        { \"test_empty_ts_ld\", test_empty_ts_ld },\n        { \"test_empty_ts_mean_descendants\", test_empty_ts_mean_descendants },\n        { \"test_empty_ts_genealogical_nearest_neighbours\",\n            test_empty_ts_genealogical_nearest_neighbours },\n        { \"test_empty_ts_general_stat\", test_empty_ts_general_stat },\n        { \"test_empty_ts_afs\", test_empty_ts_afs },\n\n        { \"test_single_tree_ld\", test_single_tree_ld },\n        { \"test_single_tree_mean_descendants\", test_single_tree_mean_descendants },\n        { \"test_single_tree_genealogical_nearest_neighbours\",\n            test_single_tree_genealogical_nearest_neighbours },\n        { \"test_single_tree_general_stat\", test_single_tree_general_stat },\n        { \"test_single_tree_general_stat_errors\", test_single_tree_general_stat_errors },\n        { \"test_single_tree_divergence_matrix\", test_single_tree_divergence_matrix },\n        { \"test_single_tree_divergence_matrix_internal_samples\",\n            test_single_tree_divergence_matrix_internal_samples },\n        { \"test_single_tree_divergence_matrix_multi_root\",\n            test_single_tree_divergence_matrix_multi_root },\n\n        { \"test_paper_ex_ld\", test_paper_ex_ld },\n        { \"test_paper_ex_mean_descendants\", test_paper_ex_mean_descendants },\n        { \"test_paper_ex_genealogical_nearest_neighbours\",\n            test_paper_ex_genealogical_nearest_neighbours },\n        { \"test_paper_ex_general_stat_errors\", test_paper_ex_general_stat_errors },\n        { \"test_paper_ex_general_stat\", test_paper_ex_general_stat },\n        { \"test_paper_ex_trait_covariance_errors\",\n            test_paper_ex_trait_covariance_errors },\n        { \"test_paper_ex_trait_covariance\", test_paper_ex_trait_covariance },\n        { \"test_paper_ex_trait_correlation_errors\",\n            test_paper_ex_trait_correlation_errors },\n        { \"test_paper_ex_trait_correlation\", test_paper_ex_trait_correlation },\n        { \"test_paper_ex_trait_linear_model_errors\",\n            test_paper_ex_trait_linear_model_errors },\n        { \"test_paper_ex_trait_linear_model\", test_paper_ex_trait_linear_model },\n        { \"test_paper_ex_diversity_errors\", test_paper_ex_diversity_errors },\n        { \"test_paper_ex_diversity\", test_paper_ex_diversity },\n        { \"test_paper_ex_segregating_sites_errors\",\n            test_paper_ex_segregating_sites_errors },\n        { \"test_paper_ex_segregating_sites\", test_paper_ex_segregating_sites },\n        { \"test_paper_ex_Y1_errors\", test_paper_ex_Y1_errors },\n        { \"test_paper_ex_Y1\", test_paper_ex_Y1 },\n        { \"test_paper_ex_divergence_errors\", test_paper_ex_divergence_errors },\n        { \"test_paper_ex_divergence\", test_paper_ex_divergence },\n        { \"test_paper_ex_genetic_relatedness_errors\",\n            test_paper_ex_genetic_relatedness_errors },\n        { \"test_paper_ex_genetic_relatedness\", test_paper_ex_genetic_relatedness },\n        { \"test_paper_ex_genetic_relatedness_weighted\",\n            test_paper_ex_genetic_relatedness_weighted },\n        { \"test_paper_ex_genetic_relatedness_weighted_errors\",\n            test_paper_ex_genetic_relatedness_weighted_errors },\n        { \"test_empty_genetic_relatedness_vector\",\n            test_empty_genetic_relatedness_vector },\n        { \"test_paper_ex_genetic_relatedness_vector\",\n            test_paper_ex_genetic_relatedness_vector },\n        { \"test_paper_ex_genetic_relatedness_vector_errors\",\n            test_paper_ex_genetic_relatedness_vector_errors },\n        { \"test_paper_ex_genetic_relatedness_vector_node_errors\",\n            test_paper_ex_genetic_relatedness_vector_node_errors },\n        { \"test_paper_ex_Y2_errors\", test_paper_ex_Y2_errors },\n        { \"test_paper_ex_Y2\", test_paper_ex_Y2 },\n        { \"test_paper_ex_f2_errors\", test_paper_ex_f2_errors },\n        { \"test_paper_ex_f2\", test_paper_ex_f2 },\n        { \"test_paper_ex_Y3_errors\", test_paper_ex_Y3_errors },\n        { \"test_paper_ex_Y3\", test_paper_ex_Y3 },\n        { \"test_paper_ex_f3_errors\", test_paper_ex_f3_errors },\n        { \"test_paper_ex_f3\", test_paper_ex_f3 },\n        { \"test_paper_ex_f4_errors\", test_paper_ex_f4_errors },\n        { \"test_paper_ex_f4\", test_paper_ex_f4 },\n        { \"test_paper_ex_afs_errors\", test_paper_ex_afs_errors },\n        { \"test_paper_ex_afs\", test_paper_ex_afs },\n        { \"test_paper_ex_divergence_matrix\", test_paper_ex_divergence_matrix },\n\n        { \"test_unary_ex_afs\", test_unary_ex_afs },\n        { \"test_nonbinary_ex_ld\", test_nonbinary_ex_ld },\n        { \"test_nonbinary_ex_mean_descendants\", test_nonbinary_ex_mean_descendants },\n        { \"test_nonbinary_ex_genealogical_nearest_neighbours\",\n            test_nonbinary_ex_genealogical_nearest_neighbours },\n        { \"test_nonbinary_ex_general_stat\", test_nonbinary_ex_general_stat },\n        { \"test_nonbinary_ex_general_stat_errors\",\n            test_nonbinary_ex_general_stat_errors },\n\n        { \"test_caterpillar_tree_ld\", test_caterpillar_tree_ld },\n        { \"test_ld_multi_mutations\", test_ld_multi_mutations },\n        { \"test_ld_silent_mutations\", test_ld_silent_mutations },\n\n        { \"test_paper_ex_two_site\", test_paper_ex_two_site },\n        { \"test_paper_ex_two_branch\", test_paper_ex_two_branch },\n        { \"test_two_site_correlated_multiallelic\",\n            test_two_site_correlated_multiallelic },\n        { \"test_two_site_uncorrelated_multiallelic\",\n            test_two_site_uncorrelated_multiallelic },\n        { \"test_two_site_backmutation\", test_two_site_backmutation },\n        { \"test_two_locus_site_all_stats\", test_two_locus_branch_all_stats },\n        { \"test_paper_ex_two_site_subset\", test_paper_ex_two_site_subset },\n        { \"test_two_locus_stat_input_errors\", test_two_locus_stat_input_errors },\n\n        { \"test_simplest_divergence_matrix\", test_simplest_divergence_matrix },\n        { \"test_simplest_divergence_matrix_windows\",\n            test_simplest_divergence_matrix_windows },\n        { \"test_simplest_divergence_matrix_internal_sample\",\n            test_simplest_divergence_matrix_internal_sample },\n        { \"test_multiroot_divergence_matrix\", test_multiroot_divergence_matrix },\n\n        { \"test_pair_coalescence_counts\", test_pair_coalescence_counts },\n        { \"test_pair_coalescence_counts_missing\", test_pair_coalescence_counts_missing },\n        { \"test_pair_coalescence_quantiles\", test_pair_coalescence_quantiles },\n        { \"test_pair_coalescence_rates\", test_pair_coalescence_rates },\n\n        { NULL, NULL },\n    };\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_tables.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2023 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include \"tskit/core.h\"\n#include <tskit/tables.h>\n\n#include <float.h>\n#include <unistd.h>\n#include <stdlib.h>\n\nstatic void\nreverse_migrations(tsk_table_collection_t *tables)\n{\n    int ret;\n    tsk_migration_table_t migrations;\n    tsk_migration_t migration;\n    tsk_id_t j, ret_id;\n\n    /* Easy way to copy the metadata schema */\n    ret = tsk_migration_table_copy(&tables->migrations, &migrations, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_clear(&migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = (tsk_id_t) tables->migrations.num_rows - 1; j >= 0; j--) {\n        ret = tsk_migration_table_get_row(&tables->migrations, j, &migration);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret_id = tsk_migration_table_add_row(&migrations, migration.left,\n            migration.right, migration.node, migration.source, migration.dest,\n            migration.time, migration.metadata, migration.metadata_length);\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n\n    ret = tsk_migration_table_copy(&migrations, &tables->migrations, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_migration_table_free(&migrations);\n}\n\nstatic void\nreverse_edges(tsk_table_collection_t *tables)\n{\n    int ret;\n    tsk_edge_table_t edges;\n    tsk_edge_t edge;\n    tsk_id_t j, ret_id;\n\n    /* Easy way to copy the metadata schema */\n    ret = tsk_edge_table_copy(&tables->edges, &edges, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_clear(&edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = (tsk_id_t) tables->edges.num_rows - 1; j >= 0; j--) {\n        ret = tsk_edge_table_get_row(&tables->edges, j, &edge);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret_id = tsk_edge_table_add_row(&edges, edge.left, edge.right, edge.parent,\n            edge.child, edge.metadata, edge.metadata_length);\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n\n    ret = tsk_edge_table_copy(&edges, &tables->edges, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_edge_table_free(&edges);\n}\n\nstatic void\nreverse_mutations(tsk_table_collection_t *tables)\n{\n    int ret;\n    tsk_mutation_table_t mutations;\n    tsk_mutation_t mutation;\n    tsk_id_t j, ret_id;\n    tsk_id_t new_parent;\n    tsk_id_t n = (tsk_id_t) tables->mutations.num_rows;\n\n    ret = tsk_mutation_table_init(&mutations, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = n - 1; j >= 0; j--) {\n        ret = tsk_mutation_table_get_row(&tables->mutations, j, &mutation);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        new_parent = (mutation.parent == TSK_NULL) ? TSK_NULL : n - mutation.parent - 1;\n        ret_id = tsk_mutation_table_add_row(&mutations, mutation.site, mutation.node,\n            new_parent, mutation.time, mutation.derived_state,\n            mutation.derived_state_length, mutation.metadata, mutation.metadata_length);\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n\n    ret = tsk_mutation_table_copy(&mutations, &tables->mutations, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_mutation_table_free(&mutations);\n}\n\nstatic void\ninsert_edge_metadata(tsk_table_collection_t *tables)\n{\n    int ret;\n    tsk_edge_table_t edges;\n    tsk_edge_t edge;\n    tsk_id_t j, ret_id;\n    char metadata[100];\n\n    ret = tsk_edge_table_init(&edges, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) tables->edges.num_rows; j++) {\n        ret = tsk_edge_table_get_row(&tables->edges, j, &edge);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        snprintf(metadata, sizeof(metadata), \"md_%lld\\n\", (long long) j);\n        ret_id = tsk_edge_table_add_row(&edges, edge.left, edge.right, edge.parent,\n            edge.child, metadata, (tsk_size_t) strlen(metadata));\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n    ret = tsk_edge_table_copy(&edges, &tables->edges, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_edge_table_free(&edges);\n}\n\nstatic void\ntest_table_collection_equals_options(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tc1, tc2;\n\n    char example_time_units[100] = \"An example of time units with unicode ⏰\";\n    char example_metadata[100] = \"An example of metadata with unicode 🎄🌳🌴🌲🎋\";\n    char example_metadata_schema[100]\n        = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_time_units_length = (tsk_size_t) strlen(example_time_units);\n    tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);\n    tsk_size_t example_metadata_schema_length\n        = (tsk_size_t) strlen(example_metadata_schema);\n\n    // Test equality empty tables\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_TRUE(ret);\n\n    // Adding some meat to the tables\n    ret_id = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id\n        = tsk_individual_table_add_row(&tc1.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tc1.populations, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tc1.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tc1.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tc1.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n\n    // Equality of empty vs non-empty\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_FALSE(ret);\n    ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    // Equivalent except for time_units\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_time_units, example_time_units_length);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    // Equivalent except for metadata\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA);\n    CU_ASSERT_TRUE(ret);\n    /* TSK_CMP_IGNORE_METADATA implies TSK_CMP_IGNORE_TS_METADATA */\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_METADATA);\n    CU_ASSERT_TRUE(ret);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_FALSE(ret);\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE);\n    CU_ASSERT_FALSE(ret);\n    ret = tsk_table_collection_set_metadata(\n        &tc2, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_TRUE(ret);\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc1, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA);\n    CU_ASSERT_TRUE(ret);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_FALSE(ret);\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc2, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_TRUE(ret);\n\n    // Ignore provenance\n    ret_id = tsk_provenance_table_add_row(&tc1.provenances, \"time\", 4, \"record\", 6);\n    CU_ASSERT_EQUAL(ret_id, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE);\n    CU_ASSERT_TRUE(ret);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_FALSE(ret);\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA);\n    CU_ASSERT_FALSE(ret);\n    ret_id = tsk_provenance_table_add_row(&tc2.provenances, \"time\", 4, \"record\", 6);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE);\n    CU_ASSERT_TRUE(ret);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_TRUE(ret);\n\n    // Ignore provenance timestamp\n    ret_id = tsk_provenance_table_add_row(&tc1.provenances, \"time\", 4, \"record\", 6);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_provenance_table_add_row(&tc2.provenances, \"other\", 5, \"record\", 6);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE));\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TIMESTAMPS));\n\n    // Ignore provenance and top-level metadata.\n    ret = tsk_provenance_table_clear(&tc1.provenances);\n    CU_ASSERT_EQUAL(ret, 0);\n    example_metadata[0] = 'J';\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_equals(&tc1, &tc2, 0);\n    CU_ASSERT_FALSE(ret);\n    ret = tsk_table_collection_equals(\n        &tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA | TSK_CMP_IGNORE_PROVENANCE);\n    CU_ASSERT_TRUE(ret);\n\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n\n    // Check what happens when one of the tables just differs by metadata.\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_population_table_add_row(&tc1.populations, \"metadata\", 8);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_population_table_add_row(&tc2.populations, \"\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_METADATA));\n\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n\n    // Ignore tables\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(\n        &tc2, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    // Add one row for each table we're ignoring\n    ret_id\n        = tsk_individual_table_add_row(&tc1.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tc1.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(&tc1.migrations, 0, 0, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tc1.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tc1.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tc1.populations, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TABLES));\n\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n\n    // Ignore reference sequence\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(\n        &tc2, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_reference_sequence_set_data(&tc1.reference_sequence, \"A\", 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    CU_ASSERT_TRUE(\n        tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_REFERENCE_SEQUENCE));\n\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n}\n\nstatic void\ntest_table_collection_simplify_errors(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_id_t ret_id;\n    const char *individuals = \"1      0.25     -2\\n\";\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    /* Bad samples */\n    samples[0] = -1;\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = 10;\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    samples[0] = 0;\n\n    /* Duplicate samples */\n    samples[0] = 0;\n    samples[1] = 0;\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    samples[0] = 0;\n\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SITE_POSITION);\n\n    /* Out of order positions */\n    tables.sites.position[0] = 0.5;\n    ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);\n\n    /* Position out of bounds */\n    tables.sites.position[0] = 1.5;\n    ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tsk_site_table_truncate(&tables.sites, 0);\n    tables.sites.position[0] = 0;\n\n    /* Individual out of bounds */\n    parse_individuals(individuals, &tables.individuals);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 1);\n    ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    /* TODO More tests for this: see\n     * https://github.com/tskit-dev/msprime/issues/517 */\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_reference_sequence_state_machine(void)\n{\n\n    tsk_reference_sequence_t r1;\n\n    tsk_reference_sequence_init(&r1, 0);\n    CU_ASSERT_EQUAL(r1.data, NULL);\n    CU_ASSERT_EQUAL(r1.url, NULL);\n    CU_ASSERT_EQUAL(r1.metadata, NULL);\n    CU_ASSERT_EQUAL(r1.metadata_schema, NULL);\n    CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));\n\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    /* Setting the value back to NULL makes the reference whole object NULL */\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, NULL, 0), 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));\n    tsk_reference_sequence_free(&r1);\n    CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));\n\n    /* Any empty string is the same thing. */\n    tsk_reference_sequence_init(&r1, 0);\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, \"\", 0), 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));\n    tsk_reference_sequence_free(&r1);\n\n    tsk_reference_sequence_init(&r1, 0);\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_url(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    tsk_reference_sequence_free(&r1);\n\n    tsk_reference_sequence_init(&r1, 0);\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    tsk_reference_sequence_free(&r1);\n\n    tsk_reference_sequence_init(&r1, 0);\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata_schema(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    tsk_reference_sequence_free(&r1);\n\n    tsk_reference_sequence_init(&r1, 0);\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata_schema(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_url(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, \"x\", 1), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata(&r1, \"\", 0), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata_schema(&r1, \"\", 0), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_url(&r1, \"\", 0), 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));\n    CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, \"\", 0), 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));\n\n    tsk_reference_sequence_free(&r1);\n}\n\nstatic void\ntest_reference_sequence_take(void)\n{\n    int ret;\n    tsk_reference_sequence_t r1;\n    tsk_reference_sequence_t r2;\n    const char *const_data = \"data\";\n    const char *const_metadata = \"metadata\";\n    char *takeset_data = strdup(const_data);\n    char *takeset_metadata = strdup(const_metadata);\n\n    ret = tsk_reference_sequence_init(&r1, 0);\n\n    ret = tsk_reference_sequence_set_data(&r1, const_data, strlen(const_data));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_set_metadata(\n        &r1, const_metadata, strlen(const_metadata));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_reference_sequence_init(&r2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    ret = tsk_reference_sequence_takeset_data(&r2, takeset_data, strlen(takeset_data));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    ret = tsk_reference_sequence_takeset_metadata(\n        &r2, takeset_metadata, strlen(takeset_metadata));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    /* Writing over these with copies doesn't lose memory */\n    ret = tsk_reference_sequence_set_data(&r2, const_data, strlen(const_data));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_set_metadata(\n        &r2, const_metadata, strlen(const_metadata));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    /* The original copies are gone, make some new ones */\n    takeset_data = strdup(const_data);\n    takeset_metadata = strdup(const_metadata);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_takeset_data(&r1, takeset_data, strlen(takeset_data));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_takeset_metadata(\n        &r1, takeset_metadata, strlen(takeset_metadata));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    tsk_reference_sequence_free(&r1);\n    tsk_reference_sequence_free(&r2);\n}\n\nstatic void\ntest_reference_sequence(void)\n{\n    int ret;\n    tsk_reference_sequence_t r1;\n    tsk_reference_sequence_t r2;\n\n    const char example_data[100] = \"An example string with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_data_length = (tsk_size_t) strlen(example_data);\n    const char example_url[100] = \"An example url with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_url_length = (tsk_size_t) strlen(example_url);\n    const char example_metadata[100] = \"An example metadata with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);\n    const char example_schema[100] = \"An example schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_schema_length = (tsk_size_t) strlen(example_schema);\n\n    tsk_reference_sequence_init(&r1, 0);\n    tsk_reference_sequence_init(&r2, 0);\n\n    /* NULL sequences are initially equal */\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_data(&r1, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_data(&r1, \"\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_data(&r2, \"\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_data(&r1, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_data(&r2, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_url(&r1, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    ret = tsk_reference_sequence_set_url(&r2, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_metadata(\n        &r1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));\n    ret = tsk_reference_sequence_set_metadata(\n        &r2, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));\n\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &r1, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &r2, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));\n\n    // Test copy\n    tsk_reference_sequence_free(&r1);\n    tsk_reference_sequence_free(&r2);\n\n    tsk_reference_sequence_init(&r1, 0);\n    ret = tsk_reference_sequence_set_data(&r1, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_copy(&r1, &r2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_url(&r1, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_copy(&r1, &r2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_metadata(\n        &r1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_copy(&r1, &r2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &r1, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_copy(&r1, &r2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));\n\n    tsk_reference_sequence_free(&r1);\n    tsk_reference_sequence_free(&r2);\n}\n\nstatic void\ntest_table_collection_reference_sequence(void)\n{\n    int ret;\n    tsk_table_collection_t tc1, tc2;\n\n    char example_data[100] = \"An example string with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_data_length = (tsk_size_t) strlen(example_data);\n    char example_url[100] = \"An example url with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_url_length = (tsk_size_t) strlen(example_url);\n    char example_metadata[100] = \"An example metadata with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);\n    char example_schema[100] = \"An example schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_schema_length = (tsk_size_t) strlen(example_schema);\n\n    // Test equality\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_data(\n        &tc1.reference_sequence, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_data(\n        &tc2.reference_sequence, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_url(\n        &tc1.reference_sequence, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_reference_sequence_set_url(\n        &tc2.reference_sequence, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_metadata(\n        &tc1.reference_sequence, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_reference_sequence_set_metadata(\n        &tc2.reference_sequence, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &tc1.reference_sequence, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &tc2.reference_sequence, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test copy\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_reference_sequence_set_data(\n        &tc1.reference_sequence, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_copy(&tc1, &tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_url(\n        &tc1.reference_sequence, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_metadata(\n        &tc1.reference_sequence, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &tc1.reference_sequence, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n\n    // Test dump and load\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tc1.sequence_length = 1.0;\n    ret = tsk_reference_sequence_set_data(\n        &tc1.reference_sequence, example_data, example_data_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_set_url(\n        &tc1.reference_sequence, example_url, example_url_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_set_metadata(\n        &tc1.reference_sequence, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_reference_sequence_set_metadata_schema(\n        &tc1.reference_sequence, example_schema, example_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n}\n\nstatic void\ntest_table_collection_has_reference_sequence(void)\n{\n    int ret;\n    tsk_table_collection_t tc;\n\n    ret = tsk_table_collection_init(&tc, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tc.sequence_length = 1.0;\n\n    CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&tc));\n    ret = tsk_reference_sequence_set_data(&tc.reference_sequence, \"A\", 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&tc));\n    /* Goes back to NULL by setting a empty string. See\n     * test_reference_sequence_state_machine for detailed tests. */\n    ret = tsk_reference_sequence_set_data(&tc.reference_sequence, \"\", 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&tc));\n\n    tsk_table_collection_free(&tc);\n}\n\nstatic void\ntest_table_collection_metadata(void)\n{\n    int ret;\n    tsk_table_collection_t tc1, tc2;\n\n    char example_metadata[100] = \"An example of metadata with unicode 🎄🌳🌴🌲🎋\";\n    char *takeset_metadata;\n    char example_metadata_schema[100]\n        = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);\n    tsk_size_t example_metadata_schema_length\n        = (tsk_size_t) strlen(example_metadata_schema);\n\n    // Test equality\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_table_collection_set_metadata(\n        &tc2, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc1, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc2, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test copy\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_copy(&tc1, &tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc1, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_copy(&tc1, &tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test dump and load with empty metadata and schema\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tc1.sequence_length = 1.0;\n    ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test dump and load with set metadata and schema\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tc1.sequence_length = 1.0;\n    ret = tsk_table_collection_set_metadata(\n        &tc1, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc1, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n\n    takeset_metadata = tsk_malloc(example_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(takeset_metadata != NULL);\n    memcpy(takeset_metadata, &example_metadata,\n        (size_t) (example_metadata_length * sizeof(char)));\n\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_takeset_metadata(\n        &tc1, takeset_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(tc1.metadata, &example_metadata, example_metadata_length), 0);\n    tsk_table_collection_free(&tc1);\n}\n\nstatic void\ntest_table_collection_time_units(void)\n{\n    int ret;\n    tsk_table_collection_t tc1, tc2;\n\n    char example_time_units[100] = \"An example of time units with unicode ⏰\";\n    tsk_size_t example_time_units_length = (tsk_size_t) strlen(example_time_units);\n\n    // Test equality\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_table_collection_set_time_units(\n        &tc1, example_time_units, example_time_units_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    ret = tsk_table_collection_set_time_units(\n        &tc2, example_time_units, example_time_units_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test copy\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_time_units(\n        &tc1, example_time_units, example_time_units_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_copy(&tc1, &tc2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test dump and load with default time_units\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, strncmp(tc1.time_units, TSK_TIME_UNITS_UNKNOWN, 7));\n    tc1.sequence_length = 1.0;\n    ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n\n    // Test dump and load with set time_units and schema\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n    ret = tsk_table_collection_init(&tc1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tc1.sequence_length = 1.0;\n    ret = tsk_table_collection_set_time_units(\n        &tc1, example_time_units, example_time_units_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));\n    tsk_table_collection_free(&tc1);\n    tsk_table_collection_free(&tc2);\n}\n\nstatic void\ntest_node_table(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_node_table_t table, table2;\n    tsk_node_t node, node2;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    tsk_flags_t *flags;\n    tsk_id_t *population;\n    double *time;\n    tsk_id_t *individual;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    char metadata_copy[test_metadata_length + 1];\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    metadata_copy[test_metadata_length] = '\\0';\n    ret = tsk_node_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_node_table_set_max_rows_increment(&table, 1);\n    tsk_node_table_set_max_metadata_length_increment(&table, 1);\n    tsk_node_table_print_state(&table, _devnull);\n    ret = tsk_node_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_node_table_add_row(&table, (tsk_flags_t) j, (double) j, j, j,\n            test_metadata, test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        CU_ASSERT_EQUAL(table.flags[j], (tsk_flags_t) j);\n        CU_ASSERT_EQUAL(table.time[j], j);\n        CU_ASSERT_EQUAL(table.population[j], j);\n        CU_ASSERT_EQUAL(table.individual[j], j);\n        CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);\n        CU_ASSERT_EQUAL(\n            table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);\n        CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);\n        /* check the metadata */\n        tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],\n            test_metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);\n        ret = tsk_node_table_get_row(&table, (tsk_id_t) j, &node);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(node.id, j);\n        CU_ASSERT_EQUAL(node.flags, (tsk_size_t) j);\n        CU_ASSERT_EQUAL(node.time, j);\n        CU_ASSERT_EQUAL(node.population, j);\n        CU_ASSERT_EQUAL(node.individual, j);\n        CU_ASSERT_EQUAL(node.metadata_length, test_metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(node.metadata, test_metadata, test_metadata_length);\n    }\n\n    /* Test equality with and without metadata */\n    tsk_node_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the metadata values */\n    table2.metadata[0] = 0;\n    CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the last metadata entry */\n    table2.metadata_offset[table2.num_rows]\n        = table2.metadata_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Delete all metadata */\n    tsk_memset(table2.metadata_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n    CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    tsk_node_table_free(&table2);\n\n    CU_ASSERT_EQUAL(tsk_node_table_get_row(&table, (tsk_id_t) num_rows, &node),\n        TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_node_table_print_state(&table, _devnull);\n    ret = tsk_node_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_node_table_clear(&table);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    num_rows *= 2;\n    flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));\n    CU_ASSERT_FATAL(flags != NULL);\n    tsk_memset(flags, 1, num_rows * sizeof(tsk_flags_t));\n    population = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(population != NULL);\n    tsk_memset(population, 2, num_rows * sizeof(tsk_id_t));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memset(time, 0, num_rows * sizeof(double));\n    individual = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(individual != NULL);\n    tsk_memset(individual, 3, num_rows * sizeof(tsk_id_t));\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    tsk_memset(metadata, 'a', num_rows * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n    ret = tsk_node_table_set_columns(&table, num_rows, flags, time, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    tsk_node_table_print_state(&table, _devnull);\n    ret = tsk_node_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Append another num_rows onto the end */\n    ret = tsk_node_table_append_columns(&table, num_rows, flags, time, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.flags + num_rows, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.population + num_rows, population, num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.individual + num_rows, individual, num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);\n    tsk_node_table_print_state(&table, _devnull);\n    ret = tsk_node_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Truncate back to the original number of rows. */\n    ret = tsk_node_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    ret = tsk_node_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* If population is NULL it should be set to -1. If metadata is NULL all metadatas\n     * should be set to the empty string. If individual is NULL it should be set to -1.\n     */\n    num_rows = 10;\n    tsk_memset(population, 0xff, num_rows * sizeof(tsk_id_t));\n    tsk_memset(individual, 0xff, num_rows * sizeof(tsk_id_t));\n    ret = tsk_node_table_set_columns(\n        &table, num_rows, flags, time, NULL, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        num_rows * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    /* flags and time cannot be NULL */\n    ret = tsk_node_table_set_columns(\n        &table, num_rows, NULL, time, population, individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_node_table_set_columns(&table, num_rows, flags, NULL, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_node_table_set_columns(\n        &table, num_rows, flags, time, population, individual, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_node_table_set_columns(\n        &table, num_rows, flags, time, population, individual, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* if metadata and metadata_offset are both null, all metadatas are zero length */\n    num_rows = 10;\n    tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    ret = tsk_node_table_set_columns(\n        &table, num_rows, flags, time, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    ret = tsk_node_table_append_columns(\n        &table, num_rows, flags, time, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.flags + num_rows, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, metadata_offset,\n                        num_rows * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    tsk_node_table_print_state(&table, _devnull);\n    ret = tsk_node_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Test extend method */\n    ret = tsk_node_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_node_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n    ret = tsk_node_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_node_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_node_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_node_table_set_columns(&table2, num_rows, flags, time, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));\n    ret = tsk_node_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_node_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_node_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n    ret = tsk_node_table_set_columns(&table2, num_rows, flags, time, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Copy a subset */\n    ret = tsk_node_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));\n    ret = tsk_node_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_row_subset; j++) {\n        ret = tsk_node_table_get_row(&table, j, &node);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_node_table_get_row(&table2, row_subset[j], &node2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(node.flags, node2.flags);\n        CU_ASSERT_EQUAL(node.time, node2.time);\n        CU_ASSERT_EQUAL(node.population, node2.population);\n        CU_ASSERT_EQUAL(node.individual, node2.individual);\n        CU_ASSERT_EQUAL(node.metadata_length, node2.metadata_length);\n        CU_ASSERT_EQUAL(tsk_memcmp(node.metadata, node2.metadata,\n                            node.metadata_length * sizeof(*node.metadata)),\n            0);\n    }\n\n    ret = tsk_node_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    tsk_node_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    tsk_node_table_copy(&table, &table2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    tsk_node_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));\n    tsk_node_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    tsk_node_table_clear(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    tsk_node_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_node_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n    free(flags);\n    free(population);\n    free(time);\n    free(metadata);\n    free(metadata_offset);\n    free(individual);\n}\n\nstatic void\ntest_node_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_node_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    tsk_flags_t *flags;\n    double *time;\n    tsk_id_t *population;\n    tsk_id_t *individual;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n    tsk_id_t neg_ones[num_rows];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));\n    /* Make a table to copy from */\n    ret = tsk_node_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_node_table_add_row(&source_table, (tsk_flags_t) j, (double) j + 1,\n            j + 2, j + 3, test_metadata, test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));\n    CU_ASSERT_FATAL(flags != NULL);\n    tsk_memcpy(flags, source_table.flags, num_rows * sizeof(tsk_flags_t));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memcpy(time, source_table.time, num_rows * sizeof(double));\n    population = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(population != NULL);\n    tsk_memcpy(population, source_table.population, num_rows * sizeof(tsk_id_t));\n    individual = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(individual != NULL);\n    tsk_memcpy(individual, source_table.individual, num_rows * sizeof(tsk_id_t));\n    metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    tsk_memcpy(\n        metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    tsk_memcpy(metadata_offset, source_table.metadata_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_node_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_node_table_add_row(\n        &table, (tsk_flags_t) 1, 2, 3, 4, test_metadata, test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_node_table_takeset_columns(&table, num_rows, flags, time, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_node_table_takeset_columns(\n        &table, num_rows, NULL, time, population, individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_node_table_takeset_columns(&table, num_rows, flags, NULL, population,\n        individual, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_node_table_takeset_columns(\n        &table, num_rows, flags, time, population, individual, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_node_table_takeset_columns(\n        &table, num_rows, flags, time, population, individual, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_node_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));\n    CU_ASSERT_FATAL(flags != NULL);\n    tsk_memcpy(flags, source_table.flags, num_rows * sizeof(tsk_flags_t));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memcpy(time, source_table.time, num_rows * sizeof(double));\n    /* if metadata and offset are both null, all entries are zero length,\n       individual and population default to -1 */\n    num_rows = 10;\n    ret = tsk_node_table_takeset_columns(\n        &table, num_rows, flags, time, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.population, neg_ones, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.individual, neg_ones, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_node_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_node_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_node_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_node_table_t table;\n    tsk_node_t row;\n    const char *metadata = \"ABC\";\n\n    ret = tsk_node_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_node_table_add_row(&table, 0, 1.0, 2, 3, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&table, 1, 2.0, 3, 4, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&table, 2, 3.0, 4, 5, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_node_table_update_row(&table, 0, 1, 2.0, 3, 4, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 1);\n    CU_ASSERT_EQUAL_FATAL(row.time, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.population, 3);\n    CU_ASSERT_EQUAL_FATAL(row.individual, 4);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_node_table_update_row(&table, 0, row.flags + 1, row.time + 1,\n        row.population + 1, row.individual + 1, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.time, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.population, 4);\n    CU_ASSERT_EQUAL_FATAL(row.individual, 5);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_node_table_update_row(&table, 0, 0, 0, 0, 0, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 0);\n    CU_ASSERT_EQUAL_FATAL(row.time, 0);\n    CU_ASSERT_EQUAL_FATAL(row.population, 0);\n    CU_ASSERT_EQUAL_FATAL(row.individual, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_node_table_update_row(&table, 1, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 0);\n    CU_ASSERT_EQUAL_FATAL(row.time, 0);\n    CU_ASSERT_EQUAL_FATAL(row.population, 0);\n    CU_ASSERT_EQUAL_FATAL(row.individual, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_node_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.time, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.population, 4);\n    CU_ASSERT_EQUAL_FATAL(row.individual, 5);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_node_table_update_row(&table, 3, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    tsk_node_table_free(&table);\n}\n\nstatic void\ntest_node_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_node_table_t source, t1, t2;\n    tsk_node_t row;\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    const char *metadata = \"ABC\";\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_node_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_node_table_add_row(&source, 0, 1.0, 2, 3, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&source, 1, 2.0, 3, 4, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&source, 2, 3.0, 4, 5, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_node_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &source, 0));\n\n    ret = tsk_node_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_node_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_node_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 1);\n    CU_ASSERT_EQUAL_FATAL(row.time, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.population, 3);\n    CU_ASSERT_EQUAL_FATAL(row.individual, 4);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_node_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_node_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_node_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_node_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_node_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_node_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_node_table_equals(&source, &t2, 0));\n\n        tsk_node_table_free(&t1);\n        tsk_node_table_free(&t2);\n    }\n\n    tsk_node_table_free(&source);\n}\n\nstatic void\ntest_edge_table_with_options(tsk_flags_t options)\n{\n    int ret;\n    tsk_edge_table_t table, table2;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j, ret_id;\n    tsk_edge_t edge, edge2;\n    tsk_id_t *parent, *child;\n    double *left, *right;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    char metadata_copy[test_metadata_length + 1];\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    metadata_copy[test_metadata_length] = '\\0';\n    ret = tsk_edge_table_init(&table, options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_edge_table_set_max_rows_increment(&table, 1);\n    tsk_edge_table_set_max_metadata_length_increment(&table, 1);\n    tsk_edge_table_print_state(&table, _devnull);\n    ret = tsk_edge_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        if (options & TSK_TABLE_NO_METADATA) {\n            ret_id = tsk_edge_table_add_row(&table, (double) j, (double) j, j, j,\n                test_metadata, test_metadata_length);\n            CU_ASSERT_EQUAL(ret_id, TSK_ERR_METADATA_DISABLED);\n            ret_id\n                = tsk_edge_table_add_row(&table, (double) j, (double) j, j, j, NULL, 0);\n        } else {\n            ret_id = tsk_edge_table_add_row(&table, (double) j, (double) j, j, j,\n                test_metadata, test_metadata_length);\n        }\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        CU_ASSERT_EQUAL(table.left[j], j);\n        CU_ASSERT_EQUAL(table.right[j], j);\n        CU_ASSERT_EQUAL(table.parent[j], j);\n        CU_ASSERT_EQUAL(table.child[j], j);\n        CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);\n        if (options & TSK_TABLE_NO_METADATA) {\n            CU_ASSERT_EQUAL(table.metadata_length, 0);\n            CU_ASSERT_EQUAL(table.metadata, NULL);\n            CU_ASSERT_EQUAL(table.metadata_offset, NULL);\n        } else {\n            CU_ASSERT_EQUAL(\n                table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);\n            CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);\n            /* check the metadata */\n            tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],\n                test_metadata_length);\n            CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);\n        }\n\n        ret = tsk_edge_table_get_row(&table, (tsk_id_t) j, &edge);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(edge.id, j);\n        CU_ASSERT_EQUAL(edge.left, j);\n        CU_ASSERT_EQUAL(edge.right, j);\n        CU_ASSERT_EQUAL(edge.parent, j);\n        CU_ASSERT_EQUAL(edge.child, j);\n        if (options & TSK_TABLE_NO_METADATA) {\n            CU_ASSERT_EQUAL(edge.metadata_length, 0);\n            CU_ASSERT_EQUAL(edge.metadata, NULL);\n        } else {\n            CU_ASSERT_EQUAL(edge.metadata_length, test_metadata_length);\n            CU_ASSERT_NSTRING_EQUAL(edge.metadata, test_metadata, test_metadata_length);\n        }\n    }\n    ret = tsk_edge_table_get_row(&table, (tsk_id_t) num_rows, &edge);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    tsk_edge_table_print_state(&table, _devnull);\n    ret = tsk_edge_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    num_rows *= 2;\n    left = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(left != NULL);\n    tsk_memset(left, 0, num_rows * sizeof(double));\n    right = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(right != NULL);\n    tsk_memset(right, 0, num_rows * sizeof(double));\n    parent = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(parent != NULL);\n    tsk_memset(parent, 1, num_rows * sizeof(tsk_id_t));\n    child = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(child != NULL);\n    tsk_memset(child, 1, num_rows * sizeof(tsk_id_t));\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    tsk_memset(metadata, 'a', num_rows * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n    if (options & TSK_TABLE_NO_METADATA) {\n        ret = tsk_edge_table_set_columns(\n            &table, num_rows, left, right, parent, child, metadata, metadata_offset);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_METADATA_DISABLED);\n        ret = tsk_edge_table_set_columns(\n            &table, num_rows, left, right, parent, child, NULL, NULL);\n    } else {\n        ret = tsk_edge_table_set_columns(\n            &table, num_rows, left, right, parent, child, metadata, metadata_offset);\n    }\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);\n    if (options & TSK_TABLE_NO_METADATA) {\n        CU_ASSERT_EQUAL(table.metadata, NULL);\n        CU_ASSERT_EQUAL(table.metadata_offset, NULL);\n        CU_ASSERT_EQUAL(table.metadata_length, 0);\n    } else {\n        CU_ASSERT_EQUAL(\n            tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n        CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                            (num_rows + 1) * sizeof(tsk_size_t)),\n            0);\n        CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    }\n\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n\n    /* Append another num_rows to the end. */\n    if (options & TSK_TABLE_NO_METADATA) {\n        ret = tsk_edge_table_append_columns(\n            &table, num_rows, left, right, parent, child, metadata, metadata_offset);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_METADATA_DISABLED);\n        ret = tsk_edge_table_append_columns(\n            &table, num_rows, left, right, parent, child, NULL, NULL);\n    } else {\n        ret = tsk_edge_table_append_columns(\n            &table, num_rows, left, right, parent, child, metadata, metadata_offset);\n    }\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.child + num_rows, child, num_rows * sizeof(tsk_id_t)), 0);\n    if (options & TSK_TABLE_NO_METADATA) {\n        CU_ASSERT_EQUAL(table.metadata, NULL);\n        CU_ASSERT_EQUAL(table.metadata_offset, NULL);\n        CU_ASSERT_EQUAL(table.metadata_length, 0);\n    } else {\n        CU_ASSERT_EQUAL(\n            tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n        CU_ASSERT_EQUAL(\n            tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);\n        CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);\n    }\n\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n\n    /* Truncate back to num_rows */\n    ret = tsk_edge_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);\n    if (options & TSK_TABLE_NO_METADATA) {\n        CU_ASSERT_EQUAL(table.metadata, NULL);\n        CU_ASSERT_EQUAL(table.metadata_offset, NULL);\n        CU_ASSERT_EQUAL(table.metadata_length, 0);\n    } else {\n        CU_ASSERT_EQUAL(\n            tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n        CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                            (num_rows + 1) * sizeof(tsk_size_t)),\n            0);\n        CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    }\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n\n    ret = tsk_edge_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* Test equality with and without metadata */\n    tsk_edge_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    if (!(options & TSK_TABLE_NO_METADATA)) {\n        /* Change the metadata values */\n        table2.metadata[0] = 0;\n        CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n        /* Change the last metadata entry */\n        table2.metadata_offset[table2.num_rows]\n            = table2.metadata_offset[table2.num_rows - 1];\n        CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n        /* Delete all metadata */\n        tsk_memset(table2.metadata_offset, 0,\n            (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n        CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    }\n    tsk_edge_table_free(&table2);\n\n    /* Inputs cannot be NULL */\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, NULL, right, parent, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, left, NULL, parent, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, left, right, NULL, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, left, right, parent, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, left, right, parent, child, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, left, right, parent, child, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* if metadata and metadata_offset are both null, all metadatas are zero length */\n    num_rows = 10;\n    tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    ret = tsk_edge_table_set_columns(\n        &table, num_rows, left, right, parent, child, NULL, NULL);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);\n    if (options & TSK_TABLE_NO_METADATA) {\n        CU_ASSERT_EQUAL(table.metadata, NULL);\n        CU_ASSERT_EQUAL(table.metadata_offset, NULL);\n    } else {\n        CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                            (num_rows + 1) * sizeof(tsk_size_t)),\n            0);\n    }\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    ret = tsk_edge_table_append_columns(\n        &table, num_rows, left, right, parent, child, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.child + num_rows, child, num_rows * sizeof(tsk_id_t)), 0);\n    if (options & TSK_TABLE_NO_METADATA) {\n        CU_ASSERT_EQUAL(table.metadata, NULL);\n        CU_ASSERT_EQUAL(table.metadata_offset, NULL);\n    } else {\n        CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                            (num_rows + 1) * sizeof(tsk_size_t)),\n            0);\n        CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, metadata_offset,\n                            num_rows * sizeof(tsk_size_t)),\n            0);\n    }\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    tsk_edge_table_print_state(&table, _devnull);\n    ret = tsk_edge_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Test extend method */\n    ret = tsk_edge_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&table2, options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_edge_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n    ret = tsk_edge_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_edge_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_edge_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    if (options & TSK_TABLE_NO_METADATA) {\n        ret = tsk_edge_table_set_columns(\n            &table2, num_rows, left, right, parent, child, NULL, NULL);\n    } else {\n        ret = tsk_edge_table_set_columns(\n            &table2, num_rows, left, right, parent, child, metadata, metadata_offset);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));\n    ret = tsk_edge_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_edge_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_edge_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n    if (options & TSK_TABLE_NO_METADATA) {\n        ret = tsk_edge_table_set_columns(\n            &table2, num_rows, left, right, parent, child, NULL, NULL);\n    } else {\n        ret = tsk_edge_table_set_columns(\n            &table2, num_rows, left, right, parent, child, metadata, metadata_offset);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Copy a subset */\n    ret = tsk_edge_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));\n    ret = tsk_edge_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_row_subset; j++) {\n        ret = tsk_edge_table_get_row(&table, j, &edge);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_edge_table_get_row(&table2, row_subset[j], &edge2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(edge.parent, edge2.parent);\n        CU_ASSERT_EQUAL(edge.child, edge2.child);\n        CU_ASSERT_EQUAL(edge.left, edge2.left);\n        CU_ASSERT_EQUAL(edge.right, edge2.right);\n        CU_ASSERT_EQUAL(edge.metadata_length, edge2.metadata_length)\n        CU_ASSERT_EQUAL(tsk_memcmp(edge.metadata, edge2.metadata,\n                            edge.metadata_length * sizeof(*edge.metadata)),\n            0);\n    }\n\n    ret = tsk_edge_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    ret = tsk_edge_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    ret = tsk_edge_table_copy(&table, &table2, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    ret = tsk_edge_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));\n    ret = tsk_edge_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    ret = tsk_edge_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_edge_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_edge_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n    free(left);\n    free(right);\n    free(parent);\n    free(child);\n    free(metadata);\n    free(metadata_offset);\n}\n\nstatic void\ntest_edge_table(void)\n{\n    test_edge_table_with_options(0);\n    test_edge_table_with_options(TSK_TABLE_NO_METADATA);\n}\n\nstatic void\ntest_edge_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_edge_table_t table;\n    tsk_edge_t row;\n    const char *metadata = \"ABC\";\n\n    ret = tsk_edge_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_edge_table_add_row(&table, 0, 1.0, 2, 3, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&table, 1, 2.0, 3, 4, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&table, 2, 3.0, 4, 5, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_edge_table_update_row(&table, 0, 1, 2.0, 3, 4, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 1);\n    CU_ASSERT_EQUAL_FATAL(row.right, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 3);\n    CU_ASSERT_EQUAL_FATAL(row.child, 4);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_edge_table_update_row(&table, 0, row.left + 1, row.right + 1,\n        row.parent + 1, row.child + 1, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 2);\n    CU_ASSERT_EQUAL_FATAL(row.right, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.child, 5);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_edge_table_update_row(&table, 0, 0, 0, 0, 0, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 0);\n    CU_ASSERT_EQUAL_FATAL(row.right, 0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 0);\n    CU_ASSERT_EQUAL_FATAL(row.child, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_edge_table_update_row(&table, 1, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 0);\n    CU_ASSERT_EQUAL_FATAL(row.right, 0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 0);\n    CU_ASSERT_EQUAL_FATAL(row.child, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_edge_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 2);\n    CU_ASSERT_EQUAL_FATAL(row.right, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.child, 5);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_edge_table_update_row(&table, 3, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n\n    tsk_edge_table_free(&table);\n}\n\nstatic void\ntest_edge_table_update_row_no_metadata(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_edge_table_t table;\n    tsk_edge_t row;\n    const char *metadata = \"ABC\";\n\n    ret = tsk_edge_table_init(&table, TSK_TABLE_NO_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_edge_table_add_row(&table, 0, 1.0, 2, 3, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&table, 1, 2.0, 3, 4, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&table, 2, 3.0, 4, 5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_edge_table_update_row(&table, 0, 1, 2.0, 3, 4, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 1);\n    CU_ASSERT_EQUAL_FATAL(row.right, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 3);\n    CU_ASSERT_EQUAL_FATAL(row.child, 4);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_edge_table_update_row(&table, 0, row.left + 1, row.right + 1,\n        row.parent + 1, row.child + 1, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 2);\n    CU_ASSERT_EQUAL_FATAL(row.right, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.child, 5);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_edge_table_update_row(&table, 1, 0, 0, 0, 0, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_METADATA_DISABLED);\n\n    tsk_edge_table_free(&table);\n}\n\nstatic void\ntest_edge_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_edge_table_t source, t1, t2;\n    tsk_edge_t row;\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    const char *metadata = \"ABC\";\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_edge_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_edge_table_add_row(&source, 0, 1.0, 2, 3, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&source, 1, 2.0, 3, 4, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&source, 2, 3.0, 4, 5, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_edge_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));\n\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_edge_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_edge_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.left, 1);\n    CU_ASSERT_EQUAL_FATAL(row.right, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 3);\n    CU_ASSERT_EQUAL_FATAL(row.child, 4);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_edge_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_edge_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_edge_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_edge_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_edge_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_edge_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&source, &t2, 0));\n\n        tsk_edge_table_free(&t1);\n        tsk_edge_table_free(&t2);\n    }\n\n    tsk_edge_table_free(&source);\n}\n\nstatic void\ntest_edge_table_keep_rows_no_metadata(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_edge_table_t source, t1, t2;\n    tsk_edge_t row;\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_edge_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_edge_table_add_row(&source, 0, 1.0, 2, 3, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&source, 1, 2.0, 3, 4, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&source, 2, 3.0, 4, 5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_edge_table_copy(&source, &t1, TSK_TABLE_NO_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));\n\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_edge_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_edge_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.left, 1);\n    CU_ASSERT_EQUAL_FATAL(row.right, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 3);\n    CU_ASSERT_EQUAL_FATAL(row.child, 4);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    tsk_edge_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_edge_table_copy(&source, &t2, TSK_TABLE_NO_METADATA);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_edge_table_copy(&source, &t1, TSK_TABLE_NO_METADATA);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_edge_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_edge_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_edge_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_edge_table_equals(&source, &t2, 0));\n\n        tsk_edge_table_free(&t1);\n        tsk_edge_table_free(&t2);\n    }\n\n    tsk_edge_table_free(&source);\n}\n\nstatic void\ntest_edge_table_takeset_with_options(tsk_flags_t table_options)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_edge_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    double *left;\n    double *right;\n    tsk_id_t *parent;\n    tsk_id_t *child;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n    tsk_id_t neg_ones[num_rows];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));\n    /* Make a table to copy from */\n    ret = tsk_edge_table_init(&source_table, table_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        if (table_options & TSK_TABLE_NO_METADATA) {\n            ret_id = tsk_edge_table_add_row(\n                &source_table, (double) j, (double) j + 1, j + 2, j + 3, NULL, 0);\n\n        } else {\n            ret_id = tsk_edge_table_add_row(&source_table, (double) j, (double) j + 1,\n                j + 2, j + 3, test_metadata, test_metadata_length);\n        }\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    left = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(left != NULL);\n    tsk_memcpy(left, source_table.left, num_rows * sizeof(double));\n    right = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(right != NULL);\n    tsk_memcpy(right, source_table.right, num_rows * sizeof(double));\n    parent = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(parent != NULL);\n    tsk_memcpy(parent, source_table.parent, num_rows * sizeof(tsk_id_t));\n    child = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(child != NULL);\n    tsk_memcpy(child, source_table.child, num_rows * sizeof(tsk_id_t));\n    if (table_options & TSK_TABLE_NO_METADATA) {\n        metadata = NULL;\n        metadata_offset = NULL;\n        test_metadata = NULL;\n        test_metadata_length = 0;\n    } else {\n        metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n        CU_ASSERT_FATAL(metadata != NULL);\n        tsk_memcpy(metadata, source_table.metadata,\n            num_rows * test_metadata_length * sizeof(char));\n        metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n        CU_ASSERT_FATAL(metadata_offset != NULL);\n        tsk_memcpy(metadata_offset, source_table.metadata_offset,\n            (num_rows + 1) * sizeof(tsk_size_t));\n    }\n\n    ret = tsk_edge_table_init(&table, table_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_edge_table_add_row(\n        &table, 1, 2, 3, 4, test_metadata, test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_edge_table_takeset_columns(\n        &table, num_rows, left, right, parent, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_edge_table_takeset_columns(\n        &table, num_rows, NULL, right, parent, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_takeset_columns(\n        &table, num_rows, left, NULL, parent, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_takeset_columns(\n        &table, num_rows, left, right, NULL, child, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_edge_table_takeset_columns(\n        &table, num_rows, left, right, parent, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    if (table_options & TSK_TABLE_NO_METADATA) {\n        /* It isn't used, so any pointer does for testing that presence of metadata\n            fails */\n        ret = tsk_edge_table_takeset_columns(\n            &table, num_rows, left, right, parent, child, (char *) child, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_METADATA_DISABLED);\n    } else {\n        ret = tsk_edge_table_takeset_columns(\n            &table, num_rows, left, right, parent, child, NULL, metadata_offset);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n        ret = tsk_edge_table_takeset_columns(\n            &table, num_rows, left, right, parent, child, metadata, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    }\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_edge_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    ret = tsk_edge_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_edge_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_edge_table_takeset(void)\n{\n    test_edge_table_takeset_with_options(TSK_TABLE_NO_METADATA);\n    test_edge_table_takeset_with_options(0);\n}\n\nstatic void\ntest_edge_table_copy_semantics(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t1, t2;\n    tsk_edge_table_t edges;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    insert_edge_metadata(&t1);\n\n    /* t1 now has metadata. We should be able to copy to another table with metadata */\n    ret = tsk_table_collection_copy(&t1, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    /* We should not be able to copy into a table with no metadata */\n    ret = tsk_table_collection_copy(&t1, &t2, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_METADATA_DISABLED);\n    tsk_table_collection_free(&t2);\n\n    tsk_table_collection_free(&t1);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* t1 has no metadata, but metadata is enabled. We should be able to copy\n     * into a table with either metadata enabled or disabled.\n     */\n    ret = tsk_table_collection_copy(&t1, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    ret = tsk_table_collection_copy(&t1, &t2, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    /* Try copying into a table directly */\n    ret = tsk_edge_table_copy(&t1.edges, &edges, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&t1.edges, &edges, 0));\n    tsk_edge_table_free(&edges);\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_edge_table_squash(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    const char *nodes_ex = \"1  0       -1   -1\\n\"\n                           \"1  0       -1   -1\\n\"\n                           \"0  0.253   -1   -1\\n\";\n    const char *edges_ex = \"0  2   2   0\\n\"\n                           \"2  10  2   0\\n\"\n                           \"0  2   2   1\\n\"\n                           \"2  10  2   1\\n\";\n\n    /*\n      2\n     / \\\n    0   1\n    */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 10;\n\n    parse_nodes(nodes_ex, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);\n    parse_edges(edges_ex, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 4);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Check output.\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 2);\n\n    // Free things.\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_edge_table_squash_multiple_parents(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    const char *nodes_ex = \"1  0.000   -1    -1\\n\"\n                           \"1  0.000   -1    -1\\n\"\n                           \"1  0.000   -1    -1\\n\"\n                           \"1  0.000   -1    -1\\n\"\n                           \"0  1.000   -1    -1\\n\"\n                           \"0  1.000   -1    -1\\n\";\n    const char *edges_ex = \"5  10  5   3\\n\"\n                           \"5  10  5   2\\n\"\n                           \"0  5   5   3\\n\"\n                           \"0  5   5   2\\n\"\n                           \"4  10  4   1\\n\"\n                           \"0  4   4   1\\n\"\n                           \"4  10  4   0\\n\"\n                           \"0  4   4   0\\n\";\n    /*\n                4       5\n               / \\     / \\\n              0   1   2   3\n    */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 10;\n\n    parse_nodes(nodes_ex, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 6);\n    parse_edges(edges_ex, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 8);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Check output.\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 4);\n\n    // Free things.\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_edge_table_squash_empty(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    const char *nodes_ex = \"1  0       -1   -1\\n\"\n                           \"1  0       -1   -1\\n\"\n                           \"0  0.253   -1   -1\\n\";\n    const char *edges_ex = \"\";\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 10;\n\n    parse_nodes(nodes_ex, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);\n    parse_edges(edges_ex, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 0);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Free things.\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_edge_table_squash_single_edge(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    const char *nodes_ex = \"1  0   -1   -1\\n\"\n                           \"0  0   -1   -1\\n\";\n    const char *edges_ex = \"0  1   1   0\\n\";\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    parse_nodes(nodes_ex, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 2);\n    parse_edges(edges_ex, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 1);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Free things.\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_edge_table_squash_bad_intervals(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    const char *nodes_ex = \"1  0   -1   -1\\n\"\n                           \"0  0   -1   -1\\n\";\n    const char *edges_ex = \"0  0.6   1   0\\n\"\n                           \"0.4  1   1   0\\n\";\n\n    ret = tsk_table_collection_init(&tables, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    parse_nodes(nodes_ex, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 2);\n    parse_edges(edges_ex, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);\n\n    // Free things.\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_edge_table_squash_metadata(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 10;\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 1, 1, \"metadata\", 8);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n\n    tsk_table_collection_free(&tables);\n\n    ret = tsk_table_collection_init(&tables, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 10;\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 1, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_edge_table_squash(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_site_table(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_site_table_t table, table2;\n    tsk_size_t num_rows, j;\n    char *ancestral_state;\n    char *metadata;\n    double *position;\n    tsk_site_t site, site2;\n    tsk_size_t *ancestral_state_offset;\n    tsk_size_t *metadata_offset;\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    ret = tsk_site_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_site_table_set_max_rows_increment(&table, 1);\n    tsk_site_table_set_max_metadata_length_increment(&table, 1);\n    tsk_site_table_set_max_ancestral_state_length_increment(&table, 1);\n    tsk_site_table_print_state(&table, _devnull);\n    ret = tsk_site_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_site_table_add_row(&table, 0, \"A\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    CU_ASSERT_EQUAL(table.position[0], 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_offset[0], 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_offset[1], 1);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, 1);\n    CU_ASSERT_EQUAL(table.metadata_offset[0], 0);\n    CU_ASSERT_EQUAL(table.metadata_offset[1], 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 1);\n\n    ret = tsk_site_table_get_row(&table, 0, &site);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(site.position, 0);\n    CU_ASSERT_EQUAL(site.ancestral_state_length, 1);\n    CU_ASSERT_NSTRING_EQUAL(site.ancestral_state, \"A\", 1);\n    CU_ASSERT_EQUAL(site.metadata_length, 0);\n\n    ret_id = tsk_site_table_add_row(&table, 1, \"AA\", 2, \"{}\", 2);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    CU_ASSERT_EQUAL(table.position[1], 1);\n    CU_ASSERT_EQUAL(table.ancestral_state_offset[2], 3);\n    CU_ASSERT_EQUAL(table.metadata_offset[1], 0);\n    CU_ASSERT_EQUAL(table.metadata_offset[2], 2);\n    CU_ASSERT_EQUAL(table.metadata_length, 2);\n    CU_ASSERT_EQUAL(table.num_rows, 2);\n\n    ret = tsk_site_table_get_row(&table, 1, &site);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(site.position, 1);\n    CU_ASSERT_EQUAL(site.ancestral_state_length, 2);\n    CU_ASSERT_NSTRING_EQUAL(site.ancestral_state, \"AA\", 2);\n    CU_ASSERT_EQUAL(site.metadata_length, 2);\n    CU_ASSERT_NSTRING_EQUAL(site.metadata, \"{}\", 2);\n\n    ret_id = tsk_site_table_add_row(&table, 2, \"A\", 1, \"metadata\", 8);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    CU_ASSERT_EQUAL(table.position[1], 1);\n    CU_ASSERT_EQUAL(table.ancestral_state_offset[3], 4);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, 4);\n    CU_ASSERT_EQUAL(table.metadata_offset[3], 10);\n    CU_ASSERT_EQUAL(table.metadata_length, 10);\n    CU_ASSERT_EQUAL(table.num_rows, 3);\n\n    ret = tsk_site_table_get_row(&table, 3, &site);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n\n    tsk_site_table_print_state(&table, _devnull);\n    ret = tsk_site_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_site_table_clear(&table);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_offset[0], 0);\n    CU_ASSERT_EQUAL(table.metadata_offset[0], 0);\n\n    num_rows = 100;\n    position = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(position != NULL);\n    ancestral_state = tsk_malloc(num_rows * sizeof(char));\n    CU_ASSERT_FATAL(ancestral_state != NULL);\n    ancestral_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(ancestral_state_offset != NULL);\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n\n    for (j = 0; j < num_rows; j++) {\n        position[j] = (double) j;\n        ancestral_state[j] = (char) j;\n        ancestral_state_offset[j] = (tsk_size_t) j;\n        metadata[j] = (char) ('A' + j);\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n    ancestral_state_offset[num_rows] = num_rows;\n    metadata_offset[num_rows] = num_rows;\n\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, num_rows);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n\n    /* Append another num rows */\n    ret = tsk_site_table_append_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.position + num_rows, position, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.ancestral_state + num_rows, ancestral_state,\n                        num_rows * sizeof(char)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, 2 * num_rows);\n\n    /* truncate back to num_rows */\n    ret = tsk_site_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, num_rows);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n\n    ret = tsk_site_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* Test equality with and without metadata */\n    tsk_site_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the metadata values */\n    table2.metadata[0] = 0;\n    CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the last metadata entry */\n    table2.metadata_offset[table2.num_rows]\n        = table2.metadata_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Delete all metadata */\n    tsk_memset(table2.metadata_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n    CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    tsk_site_table_free(&table2);\n\n    /* Inputs cannot be NULL */\n    ret = tsk_site_table_set_columns(&table, num_rows, NULL, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_set_columns(&table, num_rows, position, NULL,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_set_columns(\n        &table, num_rows, position, ancestral_state, NULL, metadata, metadata_offset);\n    /* Metadata and metadata_offset must both be null */\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Set metadata to NULL */\n    ret = tsk_site_table_set_columns(\n        &table, num_rows, position, ancestral_state, ancestral_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, num_rows);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n\n    /* Test extend method */\n    ret = tsk_site_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_site_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n    ret = tsk_site_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_site_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_site_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_site_table_set_columns(&table2, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));\n    ret = tsk_site_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_site_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_site_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n    ret = tsk_site_table_set_columns(&table2, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Copy a subset */\n    ret = tsk_site_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));\n    ret = tsk_site_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_row_subset; j++) {\n        ret = tsk_site_table_get_row(&table, (tsk_id_t) j, &site);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_site_table_get_row(&table2, row_subset[j], &site2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(site.position, site2.position);\n        CU_ASSERT_EQUAL(site.ancestral_state_length, site2.ancestral_state_length);\n        CU_ASSERT_EQUAL(site.metadata_length, site2.metadata_length);\n        CU_ASSERT_EQUAL(tsk_memcmp(site.ancestral_state, site2.ancestral_state,\n                            site.ancestral_state_length * sizeof(*site.ancestral_state)),\n            0);\n        CU_ASSERT_EQUAL(tsk_memcmp(site.metadata, site2.metadata,\n                            site.metadata_length * sizeof(*site.metadata)),\n            0);\n    }\n\n    /* Test for bad offsets */\n    ancestral_state_offset[0] = 1;\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    ancestral_state_offset[0] = 0;\n    ancestral_state_offset[num_rows] = 0;\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    ancestral_state_offset[0] = 0;\n\n    metadata_offset[0] = 0;\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    metadata_offset[0] = 0;\n    metadata_offset[num_rows] = 0;\n    ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    ret = tsk_site_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    tsk_site_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    tsk_site_table_copy(&table, &table2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    tsk_site_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));\n    tsk_site_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    ret = tsk_site_table_clear(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.ancestral_state_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    tsk_site_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_site_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    free(position);\n    free(ancestral_state);\n    free(ancestral_state_offset);\n    free(metadata);\n    free(metadata_offset);\n}\n\nstatic void\ntest_site_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_site_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    double *position;\n    char *ancestral_state;\n    tsk_size_t *ancestral_state_offset;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_ancestral_state = \"red\";\n    tsk_size_t test_ancestral_state_length = 3;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n    tsk_id_t neg_ones[num_rows];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));\n    /* Make a table to copy from */\n    ret = tsk_site_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_site_table_add_row(&source_table, (double) j, test_ancestral_state,\n            test_ancestral_state_length, test_metadata, test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    position = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(position != NULL);\n    tsk_memcpy(position, source_table.position, num_rows * sizeof(double));\n    ancestral_state = tsk_malloc(num_rows * test_ancestral_state_length * sizeof(char));\n    CU_ASSERT_FATAL(ancestral_state != NULL);\n    tsk_memcpy(ancestral_state, source_table.ancestral_state,\n        num_rows * test_ancestral_state_length * sizeof(char));\n    ancestral_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(ancestral_state_offset != NULL);\n    tsk_memcpy(ancestral_state_offset, source_table.ancestral_state_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    tsk_memcpy(\n        metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    tsk_memcpy(metadata_offset, source_table.metadata_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_site_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_site_table_add_row(&table, 1, test_ancestral_state,\n        test_ancestral_state_length, test_metadata, test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_site_table_takeset_columns(&table, num_rows, NULL, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_takeset_columns(&table, num_rows, position, NULL,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_takeset_columns(\n        &table, num_rows, position, ancestral_state, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* Check bad offset in ancestral_state */\n    ancestral_state_offset[0] = 1;\n    ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_site_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    position = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(position != NULL);\n    tsk_memcpy(position, source_table.position, num_rows * sizeof(double));\n    ancestral_state = tsk_malloc(num_rows * test_ancestral_state_length * sizeof(char));\n    CU_ASSERT_FATAL(ancestral_state != NULL);\n    tsk_memcpy(ancestral_state, source_table.ancestral_state,\n        num_rows * test_ancestral_state_length * sizeof(char));\n    ancestral_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(ancestral_state_offset != NULL);\n    tsk_memcpy(ancestral_state_offset, source_table.ancestral_state_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    /* if metadata and offset are both null, all entries are zero length*/\n    num_rows = 10;\n    ret = tsk_site_table_takeset_columns(\n        &table, num_rows, position, ancestral_state, ancestral_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_site_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_site_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_site_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_site_table_t table;\n    tsk_site_t row;\n    const char *ancestral_state = \"XYZ\";\n    const char *metadata = \"ABC\";\n\n    ret = tsk_site_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_site_table_add_row(&table, 0, ancestral_state, 1, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&table, 1, ancestral_state, 2, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&table, 2, ancestral_state, 3, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_site_table_update_row(\n        &table, 0, 1, &ancestral_state[1], 1, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 1);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_site_table_update_row(&table, 0, row.position + 1, row.ancestral_state,\n        row.ancestral_state_length, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 2);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_site_table_update_row(&table, 0, row.position, row.ancestral_state,\n        row.ancestral_state_length, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 2);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_site_table_update_row(\n        &table, 0, row.position, NULL, 0, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 2);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_site_table_update_row(&table, 0, 2, ancestral_state, 3, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 2);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[2], 'Z');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_site_table_update_row(&table, 1, 5, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 5);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_site_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.position, 2);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[2], 'Z');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_site_table_update_row(&table, 3, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n\n    tsk_site_table_free(&table);\n}\n\nstatic void\ntest_site_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_site_table_t source, t1, t2;\n    tsk_site_t row;\n    const char *ancestral_state = \"XYZ\";\n    const char *metadata = \"ABC\";\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_site_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_site_table_add_row(&source, 0, ancestral_state, 1, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&source, 1, ancestral_state, 2, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&source, 2, ancestral_state, 3, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_site_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &source, 0));\n\n    ret = tsk_site_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_site_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_site_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.position, 1);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_site_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_site_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_site_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_site_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_site_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_site_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_site_table_equals(&source, &t2, 0));\n\n        tsk_site_table_free(&t1);\n        tsk_site_table_free(&t2);\n    }\n\n    tsk_site_table_free(&source);\n}\n\nstatic void\ntest_mutation_table(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_mutation_table_t table, table2;\n    tsk_size_t num_rows = 100;\n    tsk_size_t max_len = 20;\n    tsk_size_t k, len;\n    tsk_id_t j;\n    tsk_id_t *node;\n    tsk_id_t *parent;\n    tsk_id_t *site;\n    double *time;\n    char *derived_state, *metadata;\n    char c[max_len + 1];\n    tsk_size_t *derived_state_offset, *metadata_offset;\n    tsk_mutation_t mutation, mutation2;\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    for (j = 0; j < (tsk_id_t) max_len; j++) {\n        c[j] = (char) ('A' + j);\n    }\n\n    ret = tsk_mutation_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_mutation_table_set_max_rows_increment(&table, 1);\n    tsk_mutation_table_set_max_metadata_length_increment(&table, 1);\n    tsk_mutation_table_set_max_derived_state_length_increment(&table, 1);\n    tsk_mutation_table_print_state(&table, _devnull);\n    ret = tsk_mutation_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    len = 0;\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        k = TSK_MIN((tsk_size_t) j + 1, max_len);\n        ret_id = tsk_mutation_table_add_row(&table, j, j, j, (double) j, c, k, c, k);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        CU_ASSERT_EQUAL(table.site[j], j);\n        CU_ASSERT_EQUAL(table.node[j], j);\n        CU_ASSERT_EQUAL(table.parent[j], j);\n        CU_ASSERT_EQUAL(table.time[j], j);\n        CU_ASSERT_EQUAL(table.derived_state_offset[j], len);\n        CU_ASSERT_EQUAL(table.metadata_offset[j], len);\n        CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);\n        len += k;\n        CU_ASSERT_EQUAL(table.derived_state_offset[j + 1], len);\n        CU_ASSERT_EQUAL(table.derived_state_length, len);\n        CU_ASSERT_EQUAL(table.metadata_offset[j + 1], len);\n        CU_ASSERT_EQUAL(table.metadata_length, len);\n\n        ret = tsk_mutation_table_get_row(&table, (tsk_id_t) j, &mutation);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(mutation.id, j);\n        CU_ASSERT_EQUAL(mutation.site, j);\n        CU_ASSERT_EQUAL(mutation.node, j);\n        CU_ASSERT_EQUAL(mutation.parent, j);\n        CU_ASSERT_EQUAL(mutation.time, j);\n        CU_ASSERT_EQUAL(mutation.metadata_length, k);\n        CU_ASSERT_NSTRING_EQUAL(mutation.metadata, c, k);\n        CU_ASSERT_EQUAL(mutation.derived_state_length, k);\n        CU_ASSERT_NSTRING_EQUAL(mutation.derived_state, c, k);\n    }\n    ret = tsk_mutation_table_get_row(&table, (tsk_id_t) num_rows, &mutation);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    tsk_mutation_table_print_state(&table, _devnull);\n    ret = tsk_mutation_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    num_rows *= 2;\n    site = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(site != NULL);\n    node = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(node != NULL);\n    parent = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(parent != NULL);\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    derived_state = tsk_malloc(num_rows * sizeof(char));\n    CU_ASSERT_FATAL(derived_state != NULL);\n    derived_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(derived_state_offset != NULL);\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        node[j] = j;\n        site[j] = j + 1;\n        parent[j] = j + 2;\n        time[j] = (double) (j + 3);\n        derived_state[j] = 'Y';\n        derived_state_offset[j] = (tsk_size_t) j;\n        metadata[j] = 'M';\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n\n    derived_state_offset[num_rows] = num_rows;\n    metadata_offset[num_rows] = num_rows;\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.derived_state_length, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    /* Append another num_rows */\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.site + num_rows, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.derived_state_length, 2 * num_rows);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n\n    /* Truncate back to num_rows */\n    ret = tsk_mutation_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.derived_state_length, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    /* Test equality with and without metadata */\n    tsk_mutation_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the metadata values */\n    table2.metadata[0] = 0;\n    CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the last metadata entry */\n    table2.metadata_offset[table2.num_rows]\n        = table2.metadata_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Delete all metadata */\n    tsk_memset(table2.metadata_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n    CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    tsk_mutation_table_free(&table2);\n\n    ret = tsk_mutation_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* Check all this again, except with parent == NULL, time == NULL\n     * and metadata == NULL. */\n    tsk_memset(parent, 0xff, num_rows * sizeof(tsk_id_t));\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        time[j] = TSK_UNKNOWN_TIME;\n    }\n    tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, NULL, NULL,\n        derived_state, derived_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.derived_state_offset, derived_state_offset,\n                        num_rows * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.derived_state_length, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    /* Append another num_rows */\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, NULL, NULL,\n        derived_state, derived_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.site + num_rows, site, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.derived_state + num_rows, derived_state,\n                        num_rows * sizeof(char)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.derived_state_length, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    /* Inputs except parent, time, metadata and metadata_offset cannot be NULL*/\n    ret = tsk_mutation_table_set_columns(&table, num_rows, NULL, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, NULL, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        NULL, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        derived_state, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Inputs except parent, time, metadata and metadata_offset cannot be NULL*/\n    ret = tsk_mutation_table_append_columns(&table, num_rows, NULL, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, NULL, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,\n        NULL, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,\n        derived_state, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Test extend method */\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        parent[j] = j + 2;\n        time[j] = (double) (j + 3);\n        metadata[j] = (char) ('A' + j);\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n    metadata_offset[num_rows] = num_rows;\n    ret = tsk_mutation_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_mutation_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n    ret = tsk_mutation_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_mutation_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_mutation_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_mutation_table_set_columns(&table2, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));\n    ret = tsk_mutation_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_mutation_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_mutation_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n    ret = tsk_mutation_table_set_columns(&table2, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Copy a subset */\n    ret = tsk_mutation_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));\n    ret = tsk_mutation_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (k = 0; k < num_row_subset; k++) {\n        ret = tsk_mutation_table_get_row(&table, (tsk_id_t) k, &mutation);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_mutation_table_get_row(&table2, row_subset[k], &mutation2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(mutation.site, mutation2.site);\n        CU_ASSERT_EQUAL(mutation.node, mutation2.node);\n        CU_ASSERT_EQUAL(mutation.parent, mutation2.parent);\n        CU_ASSERT_EQUAL(mutation.time, mutation2.time);\n        CU_ASSERT_EQUAL(mutation.derived_state_length, mutation2.derived_state_length);\n        CU_ASSERT_EQUAL(mutation.metadata_length, mutation2.metadata_length);\n        CU_ASSERT_EQUAL(\n            tsk_memcmp(mutation.derived_state, mutation2.derived_state,\n                mutation.derived_state_length * sizeof(*mutation.derived_state)),\n            0);\n        CU_ASSERT_EQUAL(tsk_memcmp(mutation.metadata, mutation2.metadata,\n                            mutation.metadata_length * sizeof(*mutation.metadata)),\n            0);\n    }\n\n    /* Test for bad offsets */\n    derived_state_offset[0] = 1;\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    derived_state_offset[0] = 0;\n    derived_state_offset[num_rows] = 0;\n    ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n\n    ret = tsk_mutation_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    tsk_mutation_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    tsk_mutation_table_copy(&table, &table2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    tsk_mutation_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));\n    tsk_mutation_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    tsk_mutation_table_clear(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.derived_state_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    tsk_mutation_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_mutation_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n    free(site);\n    free(node);\n    free(parent);\n    free(time);\n    free(derived_state);\n    free(derived_state_offset);\n    free(metadata);\n    free(metadata_offset);\n}\n\nstatic void\ntest_mutation_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_mutation_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    tsk_id_t *site;\n    tsk_id_t *node;\n    tsk_id_t *parent;\n    double *time;\n    char *derived_state;\n    tsk_size_t *derived_state_offset;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_derived_state = \"red\";\n    tsk_size_t test_derived_state_length = 3;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n    tsk_id_t neg_ones[num_rows];\n    double unknown_times[num_rows];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));\n    /* Make a table to copy from */\n    ret = tsk_mutation_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        unknown_times[j] = TSK_UNKNOWN_TIME;\n        ret_id = tsk_mutation_table_add_row(&source_table, j, j + 1, j + 2,\n            (double) j + 3, test_derived_state, test_derived_state_length, test_metadata,\n            test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    site = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(site != NULL);\n    tsk_memcpy(site, source_table.site, num_rows * sizeof(tsk_id_t));\n    node = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(node != NULL);\n    tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));\n    parent = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(parent != NULL);\n    tsk_memcpy(parent, source_table.parent, num_rows * sizeof(tsk_id_t));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memcpy(time, source_table.time, num_rows * sizeof(double));\n    derived_state = tsk_malloc(num_rows * test_derived_state_length * sizeof(char));\n    CU_ASSERT_FATAL(derived_state != NULL);\n    tsk_memcpy(derived_state, source_table.derived_state,\n        num_rows * test_derived_state_length * sizeof(char));\n    derived_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(derived_state_offset != NULL);\n    tsk_memcpy(derived_state_offset, source_table.derived_state_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    tsk_memcpy(\n        metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    tsk_memcpy(metadata_offset, source_table.metadata_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_mutation_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_mutation_table_add_row(&table, 1, 1, 1, 1, test_derived_state,\n        test_derived_state_length, test_metadata, test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, NULL, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, NULL, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    // Parent and time not tested as they have deafults\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,\n        NULL, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,\n        derived_state, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Check error on bad derived_state offset */\n    derived_state_offset[0] = 1;\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_mutation_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    // Re init non-optional arrays\n    site = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(site != NULL);\n    tsk_memcpy(site, source_table.site, num_rows * sizeof(tsk_id_t));\n    node = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(node != NULL);\n    tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));\n    derived_state = tsk_malloc(num_rows * test_derived_state_length * sizeof(char));\n    CU_ASSERT_FATAL(derived_state != NULL);\n    tsk_memcpy(derived_state, source_table.derived_state,\n        num_rows * test_derived_state_length * sizeof(char));\n    derived_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(derived_state_offset != NULL);\n    tsk_memcpy(derived_state_offset, source_table.derived_state_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    /* if metadata and offset are both null, all entries are zero length, if parent or\n     * time are NULL they default to null values*/\n    num_rows = 10;\n    ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, NULL, NULL,\n        derived_state, derived_state_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parent, neg_ones, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time, unknown_times, num_rows * sizeof(tsk_id_t)), 0);\n\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_mutation_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_mutation_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_mutation_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_mutation_table_t table;\n    tsk_mutation_t row;\n    const char *derived_state = \"XYZ\";\n    const char *metadata = \"ABC\";\n\n    ret = tsk_mutation_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id\n        = tsk_mutation_table_add_row(&table, 0, 1, 2, 3, derived_state, 1, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&table, 1, 2, 3, 4, derived_state, 2, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&table, 2, 3, 4, 5, derived_state, 3, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_mutation_table_update_row(\n        &table, 0, 1, 2, 3, 4, &derived_state[1], 1, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 1);\n    CU_ASSERT_EQUAL_FATAL(row.node, 2);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 3);\n    CU_ASSERT_EQUAL_FATAL(row.time, 4);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_mutation_table_update_row(&table, 0, row.site + 1, row.node + 1,\n        row.parent + 1, row.time + 1, row.derived_state, row.derived_state_length,\n        row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 2);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.time, 5);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_mutation_table_update_row(&table, 0, row.site, row.node, row.parent,\n        row.time, row.derived_state, row.derived_state_length, row.metadata,\n        row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 2);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.time, 5);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_mutation_table_update_row(&table, 0, row.site, row.node, row.parent,\n        row.time, NULL, 0, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 2);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.time, 5);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_mutation_table_update_row(\n        &table, 0, 2, 3, 4, 5, derived_state, 3, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 2);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.time, 5);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[2], 'Z');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_mutation_table_update_row(&table, 1, 5, 6, 7, 8, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 5);\n    CU_ASSERT_EQUAL_FATAL(row.node, 6);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 7);\n    CU_ASSERT_EQUAL_FATAL(row.time, 8);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_mutation_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.site, 2);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parent, 4);\n    CU_ASSERT_EQUAL_FATAL(row.time, 5);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[2], 'Z');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_mutation_table_update_row(&table, 3, 0, 0, 0, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n\n    tsk_mutation_table_free(&table);\n}\n\nstatic void\ntest_mutation_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_mutation_table_t source, t1, t2;\n    tsk_mutation_t row;\n    const char *derived_state = \"XYZ\";\n    const char *metadata = \"ABC\";\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_mutation_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_mutation_table_add_row(\n        &source, 0, 1, -1, 3.0, derived_state, 1, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &source, 1, 2, -1, 4.0, derived_state, 2, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &source, 2, 3, 0, 5.0, derived_state, 3, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_mutation_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &source, 0));\n\n    ret = tsk_mutation_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_mutation_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_mutation_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.site, 1);\n    CU_ASSERT_EQUAL_FATAL(row.node, 2);\n    CU_ASSERT_EQUAL_FATAL(row.parent, -1);\n    CU_ASSERT_EQUAL_FATAL(row.time, 4);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_mutation_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_mutation_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_mutation_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_mutation_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_mutation_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_mutation_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t2, 0));\n\n        tsk_mutation_table_free(&t1);\n        tsk_mutation_table_free(&t2);\n    }\n\n    tsk_mutation_table_free(&source);\n}\n\nstatic void\ntest_mutation_table_keep_rows_parent_references(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_mutation_table_t source, t;\n    tsk_bool_t keep[4] = { 1, 1, 1, 1 };\n    tsk_id_t id_map[4];\n\n    ret = tsk_mutation_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_mutation_table_add_row(&source, 0, 1, -1, 3.0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(&source, 1, 2, -1, 4.0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(&source, 2, 3, 1, 5.0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(&source, 3, 4, 1, 6.0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_mutation_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* OOB errors */\n    t.parent[0] = -2;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);\n\n    t.parent[0] = 4;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);\n    /* But ignored if row is not kept */\n    keep[0] = false;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_mutation_table_free(&t);\n\n    ret = tsk_mutation_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Try to remove referenced row 1 */\n    keep[0] = true;\n    keep[1] = false;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0));\n    tsk_mutation_table_free(&t);\n\n    ret = tsk_mutation_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* remove unreferenced row 0 */\n    keep[0] = false;\n    keep[1] = true;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t.num_rows, 3);\n    CU_ASSERT_EQUAL_FATAL(t.parent[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.parent[1], 0);\n    CU_ASSERT_EQUAL_FATAL(t.parent[2], 0);\n    tsk_mutation_table_free(&t);\n\n    /* Check that we don't change the table in error cases. */\n    source.parent[3] = -2;\n    ret = tsk_mutation_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = true;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0));\n    tsk_mutation_table_free(&t);\n\n    /* Check that we don't change the table in error cases. */\n    source.parent[3] = 0;\n    ret = tsk_mutation_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = false;\n    ret = tsk_mutation_table_keep_rows(&t, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0));\n    tsk_mutation_table_free(&t);\n\n    tsk_mutation_table_free(&source);\n}\n\nstatic void\ntest_migration_table(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_migration_table_t table, table2;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    tsk_id_t *node;\n    tsk_id_t *source, *dest;\n    double *left, *right, *time;\n    tsk_migration_t migration, migration2;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    char metadata_copy[test_metadata_length + 1];\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    metadata_copy[test_metadata_length] = '\\0';\n    ret = tsk_migration_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_migration_table_set_max_rows_increment(&table, 1);\n    tsk_migration_table_print_state(&table, _devnull);\n    ret = tsk_migration_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_migration_table_add_row(&table, (double) j, (double) j, j, j, j,\n            (double) j, test_metadata, test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        CU_ASSERT_EQUAL(table.left[j], j);\n        CU_ASSERT_EQUAL(table.right[j], j);\n        CU_ASSERT_EQUAL(table.node[j], j);\n        CU_ASSERT_EQUAL(table.source[j], j);\n        CU_ASSERT_EQUAL(table.dest[j], j);\n        CU_ASSERT_EQUAL(table.time[j], j);\n        CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);\n        CU_ASSERT_EQUAL(\n            table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);\n        CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);\n        /* check the metadata */\n        tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],\n            test_metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);\n\n        ret = tsk_migration_table_get_row(&table, (tsk_id_t) j, &migration);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(migration.id, j);\n        CU_ASSERT_EQUAL(migration.left, j);\n        CU_ASSERT_EQUAL(migration.right, j);\n        CU_ASSERT_EQUAL(migration.node, j);\n        CU_ASSERT_EQUAL(migration.source, j);\n        CU_ASSERT_EQUAL(migration.dest, j);\n        CU_ASSERT_EQUAL(migration.time, j);\n        CU_ASSERT_EQUAL(migration.metadata_length, test_metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(migration.metadata, test_metadata, test_metadata_length);\n    }\n    ret = tsk_migration_table_get_row(&table, (tsk_id_t) num_rows, &migration);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    tsk_migration_table_print_state(&table, _devnull);\n    ret = tsk_migration_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    num_rows *= 2;\n    left = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(left != NULL);\n    tsk_memset(left, 1, num_rows * sizeof(double));\n    right = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(right != NULL);\n    tsk_memset(right, 2, num_rows * sizeof(double));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memset(time, 3, num_rows * sizeof(double));\n    node = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(node != NULL);\n    tsk_memset(node, 4, num_rows * sizeof(tsk_id_t));\n    source = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(source != NULL);\n    tsk_memset(source, 5, num_rows * sizeof(tsk_id_t));\n    dest = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(dest != NULL);\n    tsk_memset(dest, 6, num_rows * sizeof(tsk_id_t));\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    tsk_memset(metadata, 'a', num_rows * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n\n    ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    /* Append another num_rows */\n    ret = tsk_migration_table_append_columns(&table, num_rows, left, right, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.source + num_rows, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.dest + num_rows, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);\n\n    /* Truncate back to num_rows */\n    ret = tsk_migration_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    /* Test equality with and without metadata */\n    tsk_migration_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the metadata values */\n    table2.metadata[0] = 0;\n    CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the last metadata entry */\n    table2.metadata_offset[table2.num_rows]\n        = table2.metadata_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Delete all metadata */\n    tsk_memset(table2.metadata_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n    CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    tsk_migration_table_free(&table2);\n\n    ret = tsk_migration_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* inputs cannot be NULL */\n    ret = tsk_migration_table_set_columns(&table, num_rows, NULL, right, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(&table, num_rows, left, NULL, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(&table, num_rows, left, right, NULL, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, NULL,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, source,\n        NULL, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, source,\n        dest, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(\n        &table, num_rows, left, right, node, source, dest, time, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_set_columns(\n        &table, num_rows, left, right, node, source, dest, time, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    tsk_migration_table_clear(&table);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n\n    /* if metadata and metadata_offset are both null, all metadatas are zero length */\n    num_rows = 10;\n    tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    ret = tsk_migration_table_set_columns(\n        &table, num_rows, left, right, node, source, dest, time, NULL, NULL);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    ret = tsk_migration_table_append_columns(\n        &table, num_rows, left, right, node, source, dest, time, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.source + num_rows, source, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.dest + num_rows, dest, num_rows * sizeof(tsk_id_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    tsk_migration_table_print_state(&table, _devnull);\n    ret = tsk_migration_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Test extend method */\n    ret = tsk_migration_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_migration_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n    ret = tsk_migration_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_migration_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_migration_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_migration_table_set_columns(&table2, num_rows, left, right, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));\n    ret = tsk_migration_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_migration_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_migration_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n    ret = tsk_migration_table_set_columns(&table2, num_rows, left, right, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Copy a subset */\n    ret = tsk_migration_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));\n    ret = tsk_migration_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_row_subset; j++) {\n        ret = tsk_migration_table_get_row(&table, j, &migration);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_migration_table_get_row(&table2, row_subset[j], &migration2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(migration.source, migration2.source);\n        CU_ASSERT_EQUAL(migration.dest, migration2.dest);\n        CU_ASSERT_EQUAL(migration.node, migration2.node);\n        CU_ASSERT_EQUAL(migration.left, migration2.left);\n        CU_ASSERT_EQUAL(migration.right, migration2.right);\n        CU_ASSERT_EQUAL(migration.time, migration2.time);\n        CU_ASSERT_EQUAL(migration.metadata_length, migration2.metadata_length);\n        CU_ASSERT_EQUAL(tsk_memcmp(migration.metadata, migration2.metadata,\n                            migration.metadata_length * sizeof(*migration.metadata)),\n            0);\n    }\n\n    ret = tsk_migration_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    tsk_migration_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    tsk_migration_table_copy(&table, &table2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    tsk_migration_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));\n    tsk_migration_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    tsk_migration_table_clear(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    tsk_migration_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_migration_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    free(left);\n    free(right);\n    free(time);\n    free(node);\n    free(source);\n    free(dest);\n    free(metadata);\n    free(metadata_offset);\n}\n\nstatic void\ntest_migration_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_migration_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    double *left;\n    double *right;\n    tsk_id_t *node;\n    tsk_id_t *source;\n    tsk_id_t *dest;\n    double *time;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    /* Make a table to copy from */\n    ret = tsk_migration_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_migration_table_add_row(&source_table, (double) j, (double) j + 1,\n            j + 2, j + 3, j + 4, (double) j + 5, test_metadata, test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    left = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(left != NULL);\n    tsk_memcpy(left, source_table.left, num_rows * sizeof(double));\n    right = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(right != NULL);\n    tsk_memcpy(right, source_table.right, num_rows * sizeof(double));\n    node = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(node != NULL);\n    tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));\n    source = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(source != NULL);\n    tsk_memcpy(source, source_table.source, num_rows * sizeof(tsk_id_t));\n    dest = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(dest != NULL);\n    tsk_memcpy(dest, source_table.dest, num_rows * sizeof(tsk_id_t));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memcpy(time, source_table.time, num_rows * sizeof(double));\n    metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    tsk_memcpy(\n        metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    tsk_memcpy(metadata_offset, source_table.metadata_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_migration_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_migration_table_add_row(\n        &table, 1, 1, 1, 1, 1, 1, test_metadata, test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node,\n        source, dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, NULL, right, node,\n        source, dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, left, NULL, node, source,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, NULL,\n        source, dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node, NULL,\n        dest, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node,\n        source, NULL, time, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node,\n        source, dest, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(\n        &table, num_rows, left, right, node, source, dest, time, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_migration_table_takeset_columns(\n        &table, num_rows, left, right, node, source, dest, time, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_migration_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    // Re init non-optional arrays\n    left = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(left != NULL);\n    tsk_memcpy(left, source_table.left, num_rows * sizeof(double));\n    right = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(right != NULL);\n    tsk_memcpy(right, source_table.right, num_rows * sizeof(double));\n    node = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(node != NULL);\n    tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));\n    source = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(source != NULL);\n    tsk_memcpy(source, source_table.source, num_rows * sizeof(tsk_id_t));\n    dest = tsk_malloc(num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(dest != NULL);\n    tsk_memcpy(dest, source_table.dest, num_rows * sizeof(tsk_id_t));\n    time = tsk_malloc(num_rows * sizeof(double));\n    CU_ASSERT_FATAL(time != NULL);\n    tsk_memcpy(time, source_table.time, num_rows * sizeof(double));\n    /* if metadata and offset are both null, all entries are zero length */\n    num_rows = 10;\n    ret = tsk_migration_table_takeset_columns(\n        &table, num_rows, left, right, node, source, dest, time, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_migration_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_migration_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_migration_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_migration_table_t table;\n    tsk_migration_t row;\n    const char *metadata = \"ABC\";\n\n    ret = tsk_migration_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_migration_table_add_row(&table, 0, 1.0, 2, 3, 4, 5, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(&table, 1, 2.0, 3, 4, 5, 6, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(&table, 2, 3.0, 4, 5, 6, 7, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_migration_table_update_row(&table, 0, 1, 2.0, 3, 4, 5, 6, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 1);\n    CU_ASSERT_EQUAL_FATAL(row.right, 2.0);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.source, 4);\n    CU_ASSERT_EQUAL_FATAL(row.dest, 5);\n    CU_ASSERT_EQUAL_FATAL(row.time, 6);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_migration_table_update_row(&table, 0, row.left + 1, row.right + 1,\n        row.node + 1, row.source + 1, row.dest + 1, row.time + 1, row.metadata,\n        row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 2);\n    CU_ASSERT_EQUAL_FATAL(row.right, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.node, 4);\n    CU_ASSERT_EQUAL_FATAL(row.source, 5);\n    CU_ASSERT_EQUAL_FATAL(row.dest, 6);\n    CU_ASSERT_EQUAL_FATAL(row.time, 7);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_migration_table_update_row(&table, 0, 0, 0, 0, 0, 0, 0, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 0);\n    CU_ASSERT_EQUAL_FATAL(row.right, 0);\n    CU_ASSERT_EQUAL_FATAL(row.node, 0);\n    CU_ASSERT_EQUAL_FATAL(row.source, 0);\n    CU_ASSERT_EQUAL_FATAL(row.dest, 0);\n    CU_ASSERT_EQUAL_FATAL(row.time, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_migration_table_update_row(&table, 1, 0, 0, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 0);\n    CU_ASSERT_EQUAL_FATAL(row.right, 0);\n    CU_ASSERT_EQUAL_FATAL(row.node, 0);\n    CU_ASSERT_EQUAL_FATAL(row.source, 0);\n    CU_ASSERT_EQUAL_FATAL(row.dest, 0);\n    CU_ASSERT_EQUAL_FATAL(row.time, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_migration_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.left, 2);\n    CU_ASSERT_EQUAL_FATAL(row.right, 3.0);\n    CU_ASSERT_EQUAL_FATAL(row.node, 4);\n    CU_ASSERT_EQUAL_FATAL(row.source, 5);\n    CU_ASSERT_EQUAL_FATAL(row.dest, 6);\n    CU_ASSERT_EQUAL_FATAL(row.time, 7);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_migration_table_update_row(&table, 3, 0, 0, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n\n    tsk_migration_table_free(&table);\n}\n\nstatic void\ntest_migration_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_migration_table_t source, t1, t2;\n    tsk_migration_t row;\n    const char *metadata = \"ABC\";\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_migration_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_migration_table_add_row(&source, 0, 1.0, 2, 3, 4, 5, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(&source, 1, 2.0, 3, 4, 5, 6, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(&source, 2, 3.0, 4, 5, 6, 7, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_migration_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &source, 0));\n\n    ret = tsk_migration_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_migration_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_migration_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.left, 1);\n    CU_ASSERT_EQUAL_FATAL(row.right, 2);\n    CU_ASSERT_EQUAL_FATAL(row.node, 3);\n    CU_ASSERT_EQUAL_FATAL(row.source, 4);\n    CU_ASSERT_EQUAL_FATAL(row.dest, 5);\n    CU_ASSERT_EQUAL_FATAL(row.time, 6);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_migration_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_migration_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_migration_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_migration_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_migration_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_migration_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_migration_table_equals(&source, &t2, 0));\n\n        tsk_migration_table_free(&t1);\n        tsk_migration_table_free(&t2);\n    }\n\n    tsk_migration_table_free(&source);\n}\n\nstatic void\ntest_individual_table(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_individual_table_t table, table2;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    tsk_size_t k;\n    tsk_flags_t *flags;\n    double *location;\n    tsk_id_t *parents;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    tsk_size_t *parents_offset;\n    tsk_size_t *location_offset;\n    tsk_individual_t individual;\n    tsk_individual_t individual2;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    char metadata_copy[test_metadata_length + 1];\n    tsk_size_t spatial_dimension = 2;\n    tsk_size_t num_parents = 3;\n    double test_location[spatial_dimension];\n    tsk_id_t test_parents[num_parents];\n    tsk_size_t zeros[num_rows + 1];\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    for (k = 0; k < spatial_dimension; k++) {\n        test_location[k] = (double) k;\n    }\n    for (k = 0; k < num_parents; k++) {\n        test_parents[k] = (tsk_id_t) k + 42;\n    }\n    metadata_copy[test_metadata_length] = '\\0';\n    ret = tsk_individual_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_individual_table_set_max_rows_increment(&table, 1);\n    tsk_individual_table_set_max_metadata_length_increment(&table, 1);\n    tsk_individual_table_set_max_location_length_increment(&table, 1);\n    tsk_individual_table_set_max_parents_length_increment(&table, 1);\n\n    tsk_individual_table_print_state(&table, _devnull);\n\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_individual_table_add_row(&table, (tsk_flags_t) j, test_location,\n            spatial_dimension, test_parents, num_parents, test_metadata,\n            test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        CU_ASSERT_EQUAL(table.flags[j], (tsk_flags_t) j);\n        for (k = 0; k < spatial_dimension; k++) {\n            test_location[k] = (double) k;\n            CU_ASSERT_EQUAL(\n                table.location[spatial_dimension * (size_t) j + k], test_location[k]);\n        }\n        CU_ASSERT_EQUAL(\n            table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);\n        CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);\n        /* check the metadata */\n        tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],\n            test_metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);\n\n        ret = tsk_individual_table_get_row(&table, (tsk_id_t) j, &individual);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(individual.id, j);\n        CU_ASSERT_EQUAL(individual.flags, (tsk_flags_t) j);\n        CU_ASSERT_EQUAL(individual.location_length, spatial_dimension);\n        CU_ASSERT_NSTRING_EQUAL(\n            individual.location, test_location, spatial_dimension * sizeof(double));\n        CU_ASSERT_EQUAL(individual.metadata_length, test_metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(\n            individual.metadata, test_metadata, test_metadata_length);\n    }\n\n    /* Test equality with and without metadata */\n    tsk_individual_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the metadata values */\n    table2.metadata[0] = 0;\n    CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the last metadata entry */\n    table2.metadata_offset[table2.num_rows]\n        = table2.metadata_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Delete all metadata */\n    tsk_memset(table2.metadata_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n    CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    tsk_individual_table_free(&table2);\n\n    ret = tsk_individual_table_get_row(&table, (tsk_id_t) num_rows, &individual);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_individual_table_print_state(&table, _devnull);\n    tsk_individual_table_clear(&table);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    num_rows *= 2;\n    flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));\n    CU_ASSERT_FATAL(flags != NULL);\n    for (k = 0; k < num_rows; k++) {\n        flags[k] = (tsk_flags_t) (k + num_rows);\n    }\n    location = tsk_malloc(spatial_dimension * num_rows * sizeof(double));\n    CU_ASSERT_FATAL(location != NULL);\n    for (k = 0; k < spatial_dimension * num_rows; k++) {\n        location[k] = (double) (k + (num_rows * 2));\n    }\n    location_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(location_offset != NULL);\n    for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {\n        location_offset[j] = (tsk_size_t) j * spatial_dimension;\n    }\n    parents = tsk_malloc(num_parents * num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(parents != NULL);\n    for (k = 0; k < num_parents * num_rows; k++) {\n        parents[k] = (tsk_id_t) (k + (num_rows * 4));\n    }\n    parents_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(parents_offset != NULL);\n    for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {\n        parents_offset[j] = (tsk_size_t) j * num_parents;\n    }\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    for (k = 0; k < num_rows; k++) {\n        metadata[k] = (char) ((k % 58) + 65);\n    }\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location_offset, location_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parents_offset, parents_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.location_length, spatial_dimension * num_rows);\n    CU_ASSERT_EQUAL(table.parents_length, num_parents * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    tsk_individual_table_print_state(&table, _devnull);\n\n    /* Append another num_rows onto the end */\n    ret = tsk_individual_table_append_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.flags + num_rows, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location + spatial_dimension * num_rows, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parents + num_parents * num_rows, parents,\n                        num_parents * num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.parents_length, 2 * num_parents * num_rows);\n    CU_ASSERT_EQUAL(table.location_length, 2 * spatial_dimension * num_rows);\n    tsk_individual_table_print_state(&table, _devnull);\n    ret = tsk_individual_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Truncate back to num_rows */\n    ret = tsk_individual_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location_offset, location_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parents_offset, parents_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.location_length, spatial_dimension * num_rows);\n    CU_ASSERT_EQUAL(table.parents_length, num_parents * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n    tsk_individual_table_print_state(&table, _devnull);\n\n    ret = tsk_individual_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* flags can't be NULL */\n    ret = tsk_individual_table_set_columns(&table, num_rows, NULL, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* location and location offset must be simultaneously NULL or not */\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, NULL,\n        parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, NULL,\n        location_offset, NULL, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* parents and parents offset must be simultaneously NULL or not */\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,\n        location_offset, parents, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,\n        location_offset, NULL, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* if location and location_offset are both null, all locations are zero length */\n    num_rows = 10;\n    ret = tsk_individual_table_set_columns(\n        &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.location_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.location_length, 0);\n    ret = tsk_individual_table_append_columns(\n        &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.location_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location_offset + num_rows, zeros,\n                        num_rows * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.location_length, 0);\n    tsk_individual_table_print_state(&table, _devnull);\n    ret = tsk_individual_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* if parents and parents_offset are both null, all parents are zero length */\n    num_rows = 10;\n    ret = tsk_individual_table_set_columns(\n        &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.parents_length, 0);\n    ret = tsk_individual_table_append_columns(\n        &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parents_offset + num_rows, zeros,\n                        num_rows * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.parents_length, 0);\n    tsk_individual_table_print_state(&table, _devnull);\n    ret = tsk_individual_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* if metadata and metadata_offset are both null, all metadatas are zero length */\n    num_rows = 10;\n    ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(double)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    ret = tsk_individual_table_append_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.location + spatial_dimension * num_rows, location,\n                        spatial_dimension * num_rows * sizeof(double)),\n        0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.parents + num_parents * num_rows, parents,\n                        num_parents * num_rows * sizeof(tsk_id_t)),\n        0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, zeros,\n                        num_rows * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    tsk_individual_table_print_state(&table, _devnull);\n    tsk_individual_table_dump_text(&table, _devnull);\n\n    /* Test extend method */\n    ret = tsk_individual_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_individual_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n    ret = tsk_individual_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_individual_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_individual_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_individual_table_set_columns(&table2, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));\n    ret = tsk_individual_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_individual_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_individual_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n    ret = tsk_individual_table_set_columns(&table2, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Copy a subset */\n    ret = tsk_individual_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));\n    ret = tsk_individual_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (k = 0; k < num_row_subset; k++) {\n        ret = tsk_individual_table_get_row(&table, (tsk_id_t) k, &individual);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_individual_table_get_row(&table2, row_subset[k], &individual2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(individual.flags, individual2.flags);\n        CU_ASSERT_EQUAL(individual.location_length, individual2.location_length);\n        CU_ASSERT_EQUAL(individual.parents_length, individual2.parents_length);\n        CU_ASSERT_EQUAL(individual.metadata_length, individual2.metadata_length);\n        CU_ASSERT_EQUAL(tsk_memcmp(individual.location, individual2.location,\n                            individual.location_length * sizeof(*individual.location)),\n            0);\n        CU_ASSERT_EQUAL(tsk_memcmp(individual.parents, individual2.parents,\n                            individual.parents_length * sizeof(*individual.parents)),\n            0);\n        CU_ASSERT_EQUAL(tsk_memcmp(individual.metadata, individual2.metadata,\n                            individual.metadata_length * sizeof(*individual.metadata)),\n            0);\n    }\n\n    ret = tsk_individual_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    tsk_individual_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    tsk_individual_table_copy(&table, &table2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    tsk_individual_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));\n    tsk_individual_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    tsk_individual_table_clear(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_individual_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_individual_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n    free(flags);\n    free(location);\n    free(location_offset);\n    free(parents);\n    free(parents_offset);\n    free(metadata);\n    free(metadata_offset);\n}\n\nstatic void\ntest_individual_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_individual_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    tsk_size_t k;\n    tsk_flags_t *flags;\n    double *location;\n    tsk_id_t *parents;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    tsk_size_t *parents_offset;\n    tsk_size_t *location_offset;\n    tsk_size_t spatial_dimension = 2;\n    tsk_size_t num_parents = 3;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    double test_location[spatial_dimension];\n    tsk_id_t test_parents[num_parents];\n    tsk_size_t zeros[num_rows + 1];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    /* Make a table to copy from */\n    ret = tsk_individual_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (k = 0; k < spatial_dimension; k++) {\n        test_location[k] = (double) k;\n    }\n    for (k = 0; k < num_parents; k++) {\n        test_parents[k] = (tsk_id_t) k + 42;\n    }\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_individual_table_add_row(&source_table, (tsk_flags_t) j,\n            test_location, spatial_dimension, test_parents, num_parents, test_metadata,\n            test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));\n    CU_ASSERT_FATAL(flags != NULL);\n    tsk_memcpy(flags, source_table.flags, num_rows * sizeof(tsk_flags_t));\n    location = tsk_malloc(spatial_dimension * num_rows * sizeof(double));\n    CU_ASSERT_FATAL(location != NULL);\n    tsk_memcpy(\n        location, source_table.location, spatial_dimension * num_rows * sizeof(double));\n    location_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(location_offset != NULL);\n    tsk_memcpy(location_offset, source_table.location_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    parents = tsk_malloc(num_parents * num_rows * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(parents != NULL);\n    tsk_memcpy(parents, source_table.parents, num_parents * num_rows * sizeof(tsk_id_t));\n    parents_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(parents_offset != NULL);\n    tsk_memcpy(parents_offset, source_table.parents_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    tsk_memcpy(\n        metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    tsk_memcpy(metadata_offset, source_table.metadata_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_individual_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_individual_table_add_row(&table, (tsk_flags_t) 1, test_location,\n        spatial_dimension, test_parents, num_parents, test_metadata,\n        test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    /* location and location offset must be simultaneously NULL or not */\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location, NULL,\n        parents, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, NULL,\n        location_offset, NULL, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* parents and parents offset must be simultaneously NULL or not */\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,\n        location_offset, parents, NULL, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,\n        location_offset, NULL, parents_offset, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    /* metadata and metadata offset must be simultaneously NULL or not */\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_individual_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    /* if ragged array and offset are both null, all entries are zero length,\n       NULL flags mean all zero entries */\n    num_rows = 10;\n    ret = tsk_individual_table_takeset_columns(\n        &table, num_rows, NULL, NULL, NULL, NULL, NULL, NULL, NULL);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.flags, zeros, num_rows * sizeof(tsk_flags_t)), 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.location_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.location_length, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.parents_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)), 0);\n    CU_ASSERT_EQUAL(table.parents_length, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    ret = tsk_individual_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_individual_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_individual_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_individual_table_t table;\n    tsk_individual_t row;\n    double location[] = { 0, 1, 2 };\n    tsk_id_t parents[] = { 0, 1, 2 };\n    const char *metadata = \"ABC\";\n\n    ret = tsk_individual_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id\n        = tsk_individual_table_add_row(&table, 0, location, 1, parents, 1, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_individual_table_add_row(&table, 1, location, 2, parents, 2, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_individual_table_add_row(&table, 2, location, 3, parents, 3, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_individual_table_update_row(\n        &table, 0, 1, &location[1], 1, &parents[1], 1, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 1);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.location[0], 1.0);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_individual_table_update_row(&table, 0, row.flags + 1, row.location,\n        row.location_length, row.parents, row.parents_length, row.metadata,\n        row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.location[0], 1.0);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_individual_table_update_row(&table, 0, row.flags, location, 1, row.parents,\n        row.parents_length, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.location[0], 0.0);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_individual_table_update_row(&table, 0, row.flags, NULL, 0, row.parents,\n        row.parents_length, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_individual_table_update_row(\n        &table, 0, 2, location, 3, parents, 3, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.location[0], 0);\n    CU_ASSERT_EQUAL_FATAL(row.location[1], 1);\n    CU_ASSERT_EQUAL_FATAL(row.location[2], 2);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], 0);\n    CU_ASSERT_EQUAL_FATAL(row.parents[1], 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[2], 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_individual_table_update_row(&table, 1, 5, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 5);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_individual_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.location[0], 0);\n    CU_ASSERT_EQUAL_FATAL(row.location[1], 1);\n    CU_ASSERT_EQUAL_FATAL(row.location[2], 2);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], 0);\n    CU_ASSERT_EQUAL_FATAL(row.parents[1], 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[2], 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_individual_table_update_row(&table, 3, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    tsk_individual_table_free(&table);\n}\n\nstatic void\ntest_individual_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_individual_t row;\n    double location[] = { 0, 1, 2 };\n    tsk_id_t parents[] = { -1, 1, -1 };\n    const char *metadata = \"ABC\";\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t indexes[] = { 0, 1, 2 };\n    tsk_id_t id_map[3];\n    tsk_individual_table_t source, t1, t2;\n    tsk_size_t j;\n\n    ret = tsk_individual_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id\n        = tsk_individual_table_add_row(&source, 0, location, 1, parents, 1, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_individual_table_add_row(&source, 1, location, 2, parents, 2, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_individual_table_add_row(&source, 2, location, 3, parents, 3, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_individual_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &source, 0));\n\n    ret = tsk_individual_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_individual_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_individual_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.flags, 1);\n    CU_ASSERT_EQUAL_FATAL(row.parents_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.parents[0], -1);\n    CU_ASSERT_EQUAL_FATAL(row.parents[1], 0);\n    CU_ASSERT_EQUAL_FATAL(row.location_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.location[0], 0);\n    CU_ASSERT_EQUAL_FATAL(row.location[1], 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_individual_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_individual_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_individual_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_individual_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_individual_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_individual_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t2, 0));\n\n        tsk_individual_table_free(&t1);\n        tsk_individual_table_free(&t2);\n    }\n\n    tsk_individual_table_free(&source);\n}\n\nstatic void\ntest_individual_table_keep_rows_parent_references(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_individual_table_t source, t;\n    tsk_bool_t keep[] = { 1, 1, 1, 1 };\n    tsk_id_t parents[] = { -1, 1, 2 };\n    tsk_id_t id_map[4];\n\n    ret = tsk_individual_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 3, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_individual_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* OOB errors */\n    t.parents[0] = -2;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);\n\n    t.parents[0] = 4;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);\n    /* But ignored if row is not kept */\n    keep[0] = false;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_individual_table_free(&t);\n\n    ret = tsk_individual_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Try to remove referenced row 2 */\n    keep[0] = true;\n    keep[2] = false;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0));\n    tsk_individual_table_free(&t);\n\n    ret = tsk_individual_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* remove unreferenced row 0 */\n    keep[0] = false;\n    keep[2] = true;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t.num_rows, 3);\n    CU_ASSERT_EQUAL_FATAL(t.parents[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.parents[1], 0);\n    CU_ASSERT_EQUAL_FATAL(t.parents[2], 1);\n    tsk_individual_table_free(&t);\n\n    /* Check that we don't change the table in error cases. */\n    source.parents[1] = -2;\n    ret = tsk_individual_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = true;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0));\n    tsk_individual_table_free(&t);\n\n    /* Check that we don't change the table in error cases. */\n    source.parents[1] = 0;\n    ret = tsk_individual_table_copy(&source, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = false;\n    ret = tsk_individual_table_keep_rows(&t, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);\n    CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0));\n    tsk_individual_table_free(&t);\n\n    tsk_individual_table_free(&source);\n}\n\nstatic void\ntest_population_table(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_population_table_t table, table2;\n    tsk_size_t num_rows = 100;\n    tsk_size_t max_len = 20;\n    tsk_size_t k, len;\n    tsk_id_t j;\n    char *metadata;\n    char c[max_len + 1];\n    tsk_size_t *metadata_offset;\n    tsk_population_t population, population2;\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    for (j = 0; j < (tsk_id_t) max_len; j++) {\n        c[j] = (char) ('A' + j);\n    }\n\n    ret = tsk_population_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_population_table_set_max_rows_increment(&table, 1);\n    tsk_population_table_set_max_metadata_length_increment(&table, 1);\n    tsk_population_table_print_state(&table, _devnull);\n    ret = tsk_population_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Adding zero length metadata with NULL should be fine */\n\n    ret_id = tsk_population_table_add_row(&table, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 1);\n    CU_ASSERT_EQUAL(table.metadata_offset[0], 0);\n    CU_ASSERT_EQUAL(table.metadata_offset[1], 0);\n    tsk_population_table_clear(&table);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n\n    len = 0;\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        k = TSK_MIN((tsk_size_t) j + 1, max_len);\n        ret_id = tsk_population_table_add_row(&table, c, k);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        CU_ASSERT_EQUAL(table.metadata_offset[j], len);\n        CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);\n        len += k;\n        CU_ASSERT_EQUAL(table.metadata_offset[j + 1], len);\n        CU_ASSERT_EQUAL(table.metadata_length, len);\n\n        ret = tsk_population_table_get_row(&table, (tsk_id_t) j, &population);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(population.id, j);\n        CU_ASSERT_EQUAL(population.metadata_length, k);\n        CU_ASSERT_NSTRING_EQUAL(population.metadata, c, k);\n    }\n\n    /* Test equality with and without metadata */\n    tsk_population_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the metadata values */\n    table2.metadata[0] = 0;\n    CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Change the last metadata entry */\n    table2.metadata_offset[table2.num_rows]\n        = table2.metadata_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    /* Delete all metadata */\n    tsk_memset(table2.metadata_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.metadata_offset));\n    CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n    tsk_population_table_free(&table2);\n\n    ret = tsk_population_table_get_row(&table, (tsk_id_t) num_rows, &population);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tsk_population_table_print_state(&table, _devnull);\n    ret = tsk_population_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    num_rows *= 2;\n    metadata = tsk_malloc(num_rows * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        metadata[j] = 'M';\n        metadata_offset[j] = (tsk_size_t) j;\n    }\n\n    metadata_offset[num_rows] = num_rows;\n    ret = tsk_population_table_set_columns(&table, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    /* Append another num_rows */\n    ret = tsk_population_table_append_columns(\n        &table, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n\n    /* Truncate back to num_rows */\n    ret = tsk_population_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.metadata_length, num_rows);\n\n    ret = tsk_population_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* Metadata = NULL gives an error */\n    ret = tsk_population_table_set_columns(&table, num_rows, NULL, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_population_table_set_columns(&table, num_rows, metadata, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_population_table_set_columns(&table, num_rows, NULL, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Test extend method */\n    ret = tsk_population_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_population_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n    ret = tsk_population_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_population_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_population_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_population_table_set_columns(&table2, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));\n    ret = tsk_population_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_population_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_population_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n    ret = tsk_population_table_set_columns(&table2, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Copy a subset */\n    ret = tsk_population_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));\n    ret = tsk_population_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (k = 0; k < num_row_subset; k++) {\n        ret = tsk_population_table_get_row(&table, (tsk_id_t) k, &population);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_population_table_get_row(&table2, row_subset[k], &population2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(population.metadata_length, population2.metadata_length);\n        CU_ASSERT_EQUAL(tsk_memcmp(population.metadata, population2.metadata,\n                            population.metadata_length * sizeof(*population.metadata)),\n            0);\n    }\n\n    /* Test for bad offsets */\n    metadata_offset[0] = 1;\n    ret = tsk_population_table_set_columns(&table, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    metadata_offset[0] = 0;\n    metadata_offset[num_rows] = 0;\n    ret = tsk_population_table_set_columns(&table, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n\n    ret = tsk_population_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, 0);\n    CU_ASSERT_EQUAL(table.metadata_schema, NULL);\n    const char *example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example);\n    const char *example2 = \"A different example 🎄🌳🌴🌲🎋\";\n    tsk_size_t example2_length = (tsk_size_t) strlen(example);\n    tsk_population_table_set_metadata_schema(&table, example, example_length);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);\n\n    tsk_population_table_copy(&table, &table2, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);\n    tsk_population_table_set_metadata_schema(&table2, example, example_length);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));\n    tsk_population_table_set_metadata_schema(&table2, example2, example2_length);\n    CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));\n\n    tsk_population_table_clear(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.metadata_length, 0);\n\n    tsk_population_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_population_table_free(&table2);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    free(metadata);\n    free(metadata_offset);\n}\n\nstatic void\ntest_population_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_population_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    char *metadata;\n    tsk_size_t *metadata_offset;\n    const char *test_metadata = \"test\";\n    tsk_size_t test_metadata_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    /* Make a table to copy from */\n    ret = tsk_population_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_population_table_add_row(\n            &source_table, test_metadata, test_metadata_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));\n    CU_ASSERT_FATAL(metadata != NULL);\n    tsk_memcpy(\n        metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));\n    metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(metadata_offset != NULL);\n    tsk_memcpy(metadata_offset, source_table.metadata_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_population_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_population_table_add_row(&table, test_metadata, test_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_population_table_takeset_columns(\n        &table, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    ret = tsk_population_table_takeset_columns(&table, num_rows, NULL, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_population_table_takeset_columns(&table, num_rows, metadata, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_population_table_takeset_columns(&table, num_rows, NULL, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Test bad offset */\n    metadata_offset[0] = 1;\n    ret = tsk_population_table_takeset_columns(\n        &table, num_rows, metadata, metadata_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_population_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    ret = tsk_population_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_population_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_population_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_population_table_t table;\n    tsk_population_t row;\n    const char *metadata = \"ABC\";\n\n    ret = tsk_population_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_population_table_add_row(&table, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&table, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&table, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_population_table_update_row(&table, 0, &metadata[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_population_table_update_row(&table, 0, row.metadata, row.metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');\n\n    ret = tsk_population_table_update_row(&table, 0, metadata, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_population_table_update_row(&table, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);\n\n    ret = tsk_population_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');\n\n    ret = tsk_population_table_update_row(&table, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    tsk_population_table_free(&table);\n}\n\nstatic void\ntest_population_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_population_table_t source, t1, t2;\n    tsk_population_t row;\n    const char *metadata = \"ABC\";\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t id_map[3];\n    tsk_id_t indexes[] = { 0, 1, 2 };\n\n    ret = tsk_population_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_population_table_add_row(&source, metadata, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&source, metadata, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&source, metadata, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_population_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &source, 0));\n\n    ret = tsk_population_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_population_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_population_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');\n\n    tsk_population_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_population_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_population_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_population_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_population_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_population_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_population_table_equals(&source, &t2, 0));\n\n        tsk_population_table_free(&t1);\n        tsk_population_table_free(&t2);\n    }\n\n    tsk_population_table_free(&source);\n}\n\nstatic void\ntest_provenance_table(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_provenance_table_t table, table2;\n    tsk_size_t num_rows = 100;\n    tsk_size_t j;\n    char *timestamp;\n    tsk_size_t *timestamp_offset;\n    const char *test_timestamp = \"2017-12-06T20:40:25+00:00\";\n    tsk_size_t test_timestamp_length = (tsk_size_t) strlen(test_timestamp);\n    char timestamp_copy[test_timestamp_length + 1];\n    char *record;\n    tsk_size_t *record_offset;\n    const char *test_record = \"{\\\"json\\\"=1234}\";\n    tsk_size_t test_record_length = (tsk_size_t) strlen(test_record);\n    char record_copy[test_record_length + 1];\n    tsk_provenance_t provenance, provenance2;\n    tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };\n    tsk_size_t num_row_subset = 6;\n\n    timestamp_copy[test_timestamp_length] = '\\0';\n    record_copy[test_record_length] = '\\0';\n    ret = tsk_provenance_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_provenance_table_set_max_rows_increment(&table, 1);\n    tsk_provenance_table_set_max_timestamp_length_increment(&table, 1);\n    tsk_provenance_table_set_max_record_length_increment(&table, 1);\n    tsk_provenance_table_print_state(&table, _devnull);\n    ret = tsk_provenance_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < num_rows; j++) {\n        ret_id = tsk_provenance_table_add_row(&table, test_timestamp,\n            test_timestamp_length, test_record, test_record_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, (tsk_id_t) j);\n        CU_ASSERT_EQUAL(table.timestamp_length, (j + 1) * test_timestamp_length);\n        CU_ASSERT_EQUAL(table.timestamp_offset[j + 1], table.timestamp_length);\n        CU_ASSERT_EQUAL(table.record_length, (j + 1) * test_record_length);\n        CU_ASSERT_EQUAL(table.record_offset[j + 1], table.record_length);\n        /* check the timestamp */\n        tsk_memcpy(timestamp_copy, table.timestamp + table.timestamp_offset[j],\n            test_timestamp_length);\n        CU_ASSERT_NSTRING_EQUAL(timestamp_copy, test_timestamp, test_timestamp_length);\n        /* check the record */\n        tsk_memcpy(\n            record_copy, table.record + table.record_offset[j], test_record_length);\n        CU_ASSERT_NSTRING_EQUAL(record_copy, test_record, test_record_length);\n\n        ret = tsk_provenance_table_get_row(&table, (tsk_id_t) j, &provenance);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(provenance.id, (tsk_id_t) j);\n        CU_ASSERT_EQUAL(provenance.timestamp_length, test_timestamp_length);\n        CU_ASSERT_NSTRING_EQUAL(\n            provenance.timestamp, test_timestamp, test_timestamp_length);\n        CU_ASSERT_EQUAL(provenance.record_length, test_record_length);\n        CU_ASSERT_NSTRING_EQUAL(provenance.record, test_record, test_record_length);\n    }\n    ret = tsk_provenance_table_get_row(&table, (tsk_id_t) num_rows, &provenance);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n    tsk_provenance_table_print_state(&table, _devnull);\n    ret = tsk_provenance_table_dump_text(&table, _devnull);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_provenance_table_clear(&table);\n    CU_ASSERT_EQUAL(table.num_rows, 0);\n    CU_ASSERT_EQUAL(table.timestamp_length, 0);\n    CU_ASSERT_EQUAL(table.record_length, 0);\n\n    num_rows *= 2;\n    timestamp = tsk_malloc(num_rows * sizeof(char));\n    tsk_memset(timestamp, 'a', num_rows * sizeof(char));\n    CU_ASSERT_FATAL(timestamp != NULL);\n    timestamp_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(timestamp_offset != NULL);\n    record = tsk_malloc(num_rows * sizeof(char));\n    tsk_memset(record, 'a', num_rows * sizeof(char));\n    CU_ASSERT_FATAL(record != NULL);\n    record_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(record_offset != NULL);\n    for (j = 0; j < num_rows + 1; j++) {\n        timestamp_offset[j] = j;\n        record_offset[j] = j;\n    }\n    ret = tsk_provenance_table_set_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp, timestamp, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp_offset, timestamp_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.record, record, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.record_offset, record_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.timestamp_length, num_rows);\n    CU_ASSERT_EQUAL(table.record_length, num_rows);\n    tsk_provenance_table_print_state(&table, _devnull);\n\n    /* Append another num_rows onto the end */\n    ret = tsk_provenance_table_append_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp, timestamp, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.timestamp + num_rows, timestamp, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.record, record, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(table.record + num_rows, record, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.timestamp_length, 2 * num_rows);\n    CU_ASSERT_EQUAL(table.record_length, 2 * num_rows);\n    tsk_provenance_table_print_state(&table, _devnull);\n\n    /* Truncate back to num_rows */\n    ret = tsk_provenance_table_truncate(&table, num_rows);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp, timestamp, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp_offset, timestamp_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.record, record, num_rows * sizeof(char)), 0);\n    CU_ASSERT_EQUAL(tsk_memcmp(table.record_offset, record_offset,\n                        (num_rows + 1) * sizeof(tsk_size_t)),\n        0);\n    CU_ASSERT_EQUAL(table.num_rows, num_rows);\n    CU_ASSERT_EQUAL(table.timestamp_length, num_rows);\n    CU_ASSERT_EQUAL(table.record_length, num_rows);\n    tsk_provenance_table_print_state(&table, _devnull);\n\n    /* Test equality with and without timestamp */\n    tsk_provenance_table_copy(&table, &table2, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));\n    /* Change the timestamp values */\n    table2.timestamp[0] = 0;\n    CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));\n    /* Change the last timestamp entry */\n    table2.timestamp_offset[table2.num_rows]\n        = table2.timestamp_offset[table2.num_rows - 1];\n    CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));\n    /* Delete all timestamps */\n    tsk_memset(table2.timestamp_offset, 0,\n        (table2.num_rows + 1) * sizeof(*table2.timestamp_offset));\n    CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));\n    CU_ASSERT_TRUE(\n        tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));\n    tsk_provenance_table_free(&table2);\n\n    /* Test equality with and without timestamp */\n    tsk_provenance_table_copy(&table, &table2, 0);\n    table2.record_length = 0;\n    CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));\n    tsk_provenance_table_free(&table2);\n\n    ret = tsk_provenance_table_truncate(&table, num_rows + 1);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);\n\n    /* No arguments can be null */\n    ret = tsk_provenance_table_set_columns(\n        &table, num_rows, NULL, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_provenance_table_set_columns(\n        &table, num_rows, timestamp, NULL, record, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_provenance_table_set_columns(\n        &table, num_rows, timestamp, timestamp_offset, NULL, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_provenance_table_set_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Test extend method */\n    ret = tsk_provenance_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_init(&table2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Can't extend from self */\n    ret = tsk_provenance_table_extend(&table, &table, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n\n    /* Two empty tables */\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));\n    ret = tsk_provenance_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));\n\n    /* Row out of bounds */\n    ret = tsk_provenance_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n\n    /* Num rows out of bounds */\n    ret = tsk_provenance_table_extend(&table, &table2, num_rows * 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n\n    /* Copy rows in order if index NULL */\n    ret = tsk_provenance_table_set_columns(\n        &table2, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));\n    ret = tsk_provenance_table_extend(&table, &table2, table2.num_rows, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));\n\n    /* Copy nothing if index not NULL but length zero */\n    ret = tsk_provenance_table_extend(&table, &table2, 0, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));\n\n    /* Copy first N rows in order if index NULL */\n    ret = tsk_provenance_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_extend(&table, &table2, num_rows / 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_truncate(&table2, num_rows / 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));\n    ret = tsk_provenance_table_set_columns(\n        &table2, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Copy a subset */\n    ret = tsk_provenance_table_truncate(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));\n    ret = tsk_provenance_table_extend(&table, &table2, num_row_subset, row_subset, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_row_subset; j++) {\n        ret = tsk_provenance_table_get_row(&table, (tsk_id_t) j, &provenance);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_provenance_table_get_row(&table2, row_subset[j], &provenance2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(provenance.timestamp_length, provenance2.timestamp_length);\n        CU_ASSERT_EQUAL(provenance.record_length, provenance2.record_length);\n        CU_ASSERT_EQUAL(tsk_memcmp(provenance.timestamp, provenance2.timestamp,\n                            provenance.timestamp_length * sizeof(*provenance.timestamp)),\n            0);\n        CU_ASSERT_EQUAL(tsk_memcmp(provenance.record, provenance2.record,\n                            provenance.record_length * sizeof(*provenance.record)),\n            0);\n    }\n\n    tsk_provenance_table_free(&table);\n    tsk_provenance_table_free(&table2);\n    free(timestamp);\n    free(timestamp_offset);\n    free(record);\n    free(record_offset);\n}\n\nstatic void\ntest_provenance_table_takeset(void)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_provenance_table_t source_table, table;\n    tsk_size_t num_rows = 100;\n    tsk_id_t j;\n    char *timestamp;\n    tsk_size_t *timestamp_offset;\n    char *record;\n    tsk_size_t *record_offset;\n    const char *test_timestamp = \"red\";\n    tsk_size_t test_timestamp_length = 3;\n    const char *test_record = \"test\";\n    tsk_size_t test_record_length = 4;\n    tsk_size_t zeros[num_rows + 1];\n    tsk_id_t neg_ones[num_rows];\n\n    tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));\n    tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));\n    /* Make a table to copy from */\n    ret = tsk_provenance_table_init(&source_table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) num_rows; j++) {\n        ret_id = tsk_provenance_table_add_row(&source_table, test_timestamp,\n            test_timestamp_length, test_record, test_record_length);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n\n    /* Prepare arrays to be taken */\n    timestamp = tsk_malloc(num_rows * test_timestamp_length * sizeof(char));\n    CU_ASSERT_FATAL(timestamp != NULL);\n    tsk_memcpy(timestamp, source_table.timestamp,\n        num_rows * test_timestamp_length * sizeof(char));\n    timestamp_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(timestamp_offset != NULL);\n    tsk_memcpy(timestamp_offset, source_table.timestamp_offset,\n        (num_rows + 1) * sizeof(tsk_size_t));\n    record = tsk_malloc(num_rows * test_record_length * sizeof(char));\n    CU_ASSERT_FATAL(record != NULL);\n    tsk_memcpy(\n        record, source_table.record, num_rows * test_record_length * sizeof(char));\n    record_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));\n    CU_ASSERT_FATAL(record_offset != NULL);\n    tsk_memcpy(\n        record_offset, source_table.record_offset, (num_rows + 1) * sizeof(tsk_size_t));\n\n    ret = tsk_provenance_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add one row so that we can check takeset frees it */\n    ret_id = tsk_provenance_table_add_row(\n        &table, test_timestamp, test_timestamp_length, test_record, test_record_length);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&source_table, &table, 0));\n\n    /* Test error states, all of these must not take the array, or free existing */\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, NULL, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, timestamp, NULL, record, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, timestamp, timestamp_offset, NULL, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, NULL);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Bad offsets */\n    timestamp_offset[0] = 1;\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    timestamp_offset[0] = 0;\n    record_offset[0] = 1;\n    ret = tsk_provenance_table_takeset_columns(\n        &table, num_rows, timestamp, timestamp_offset, record, record_offset);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n\n    /* Truncation after takeset keeps memory and max_rows */\n    ret = tsk_provenance_table_clear(&table);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);\n\n    ret = tsk_provenance_table_free(&table);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_provenance_table_free(&source_table);\n    CU_ASSERT_EQUAL(ret, 0);\n}\n\nstatic void\ntest_provenance_table_update_row(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_provenance_table_t table;\n    tsk_provenance_t row;\n    const char *timestamp = \"XYZ\";\n    const char *record = \"ABC\";\n\n    ret = tsk_provenance_table_init(&table, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_provenance_table_add_row(&table, timestamp, 1, record, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_provenance_table_add_row(&table, timestamp, 2, record, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_provenance_table_add_row(&table, timestamp, 3, record, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_provenance_table_update_row(&table, 0, &timestamp[1], 1, &record[1], 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.record[0], 'B');\n\n    ret = tsk_provenance_table_update_row(\n        &table, 0, row.timestamp, row.timestamp_length, row.record, row.record_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.record[0], 'B');\n\n    ret = tsk_provenance_table_update_row(&table, 0, row.timestamp,\n        row.timestamp_length - 1, row.record, row.record_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 1);\n    CU_ASSERT_EQUAL_FATAL(row.record[0], 'B');\n\n    ret = tsk_provenance_table_update_row(&table, 0, timestamp, 3, record, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_get_row(&table, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[2], 'Z');\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.record[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.record[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.record[2], 'C');\n\n    ret = tsk_provenance_table_update_row(&table, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_get_row(&table, 1, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 0);\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 0);\n\n    ret = tsk_provenance_table_get_row(&table, 2, &row);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[2], 'Z');\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 3);\n    CU_ASSERT_EQUAL_FATAL(row.record[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.record[1], 'B');\n    CU_ASSERT_EQUAL_FATAL(row.record[2], 'C');\n\n    ret = tsk_provenance_table_update_row(&table, 3, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n\n    tsk_provenance_table_free(&table);\n}\n\nstatic void\ntest_provenance_table_keep_rows(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_provenance_table_t source, t1, t2;\n    tsk_provenance_t row;\n    const char *timestamp = \"XYZ\";\n    const char *record = \"ABC\";\n    tsk_bool_t keep[3] = { 1, 1, 1 };\n    tsk_id_t indexes[] = { 0, 1, 2 };\n    tsk_id_t id_map[3];\n\n    ret = tsk_provenance_table_init(&source, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_provenance_table_add_row(&source, timestamp, 1, record, 1);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_provenance_table_add_row(&source, timestamp, 2, record, 2);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_provenance_table_add_row(&source, timestamp, 3, record, 3);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_provenance_table_copy(&source, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &source, 0));\n\n    ret = tsk_provenance_table_keep_rows(&t1, keep, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &source, 0));\n    CU_ASSERT_EQUAL_FATAL(id_map[0], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], 2);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_provenance_table_copy(&source, &t1, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    keep[0] = 0;\n    keep[1] = 1;\n    keep[2] = 0;\n    ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(id_map[0], -1);\n    CU_ASSERT_EQUAL_FATAL(id_map[1], 0);\n    CU_ASSERT_EQUAL_FATAL(id_map[2], -1);\n\n    ret = tsk_provenance_table_get_row(&t1, 0, &row);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X');\n    CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y');\n    CU_ASSERT_EQUAL_FATAL(row.record_length, 2);\n    CU_ASSERT_EQUAL_FATAL(row.record[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(row.record[1], 'B');\n\n    tsk_provenance_table_free(&t1);\n\n    keep[0] = 0;\n    keep[1] = 0;\n    keep[2] = 0;\n    /* Keeping first n rows equivalent to truncate */\n    for (j = 0; j < source.num_rows; j++) {\n        ret = tsk_provenance_table_copy(&source, &t2, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_provenance_table_copy(&source, &t1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_provenance_table_truncate(&t1, j + 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        keep[j] = 1;\n        ret = tsk_provenance_table_keep_rows(&t2, keep, 0, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &t2, 0));\n\n        /* Adding the remaining rows back on to the table gives the original\n         * table */\n        ret = tsk_provenance_table_extend(\n            &t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_TRUE(tsk_provenance_table_equals(&source, &t2, 0));\n\n        tsk_provenance_table_free(&t1);\n        tsk_provenance_table_free(&t2);\n    }\n\n    tsk_provenance_table_free(&source);\n}\n\nstatic void\ntest_table_size_increments(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_size_t new_size;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_location_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_ancestral_state_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_derived_state_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_metadata_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_timestamp_length_increment, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_record_length_increment, 0);\n\n    /* Setting to non-zero sets to that size */\n    new_size = 1;\n    ret = tsk_individual_table_set_max_rows_increment(&tables.individuals, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows_increment, new_size);\n    ret = tsk_individual_table_set_max_metadata_length_increment(\n        &tables.individuals, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_metadata_length_increment, new_size);\n    ret = tsk_individual_table_set_max_location_length_increment(\n        &tables.individuals, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_location_length_increment, new_size);\n\n    ret = tsk_node_table_set_max_rows_increment(&tables.nodes, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows_increment, new_size);\n    ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length_increment, new_size);\n\n    ret = tsk_edge_table_set_max_rows_increment(&tables.edges, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows_increment, new_size);\n    ret = tsk_edge_table_set_max_metadata_length_increment(&tables.edges, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_metadata_length_increment, new_size);\n\n    ret = tsk_site_table_set_max_rows_increment(&tables.sites, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows_increment, new_size);\n    ret = tsk_site_table_set_max_metadata_length_increment(&tables.sites, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_metadata_length_increment, new_size);\n    ret = tsk_site_table_set_max_ancestral_state_length_increment(\n        &tables.sites, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_ancestral_state_length_increment, new_size);\n\n    ret = tsk_mutation_table_set_max_rows_increment(&tables.mutations, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows_increment, new_size);\n    ret = tsk_mutation_table_set_max_metadata_length_increment(\n        &tables.mutations, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_metadata_length_increment, new_size);\n    ret = tsk_mutation_table_set_max_derived_state_length_increment(\n        &tables.mutations, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_derived_state_length_increment, new_size);\n\n    ret = tsk_migration_table_set_max_rows_increment(&tables.migrations, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows_increment, new_size);\n    ret = tsk_migration_table_set_max_metadata_length_increment(\n        &tables.migrations, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_metadata_length_increment, new_size);\n\n    ret = tsk_population_table_set_max_rows_increment(&tables.populations, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows_increment, new_size);\n    ret = tsk_population_table_set_max_metadata_length_increment(\n        &tables.populations, new_size);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_metadata_length_increment, new_size);\n\n    ret = tsk_provenance_table_set_max_rows_increment(&tables.provenances, new_size);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows_increment, new_size);\n    ret = tsk_provenance_table_set_max_timestamp_length_increment(\n        &tables.provenances, new_size);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_timestamp_length_increment, new_size);\n    ret = tsk_provenance_table_set_max_record_length_increment(\n        &tables.provenances, new_size);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_record_length_increment, new_size);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_expansion(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables2;\n\n    ret = tsk_table_collection_init(&tables2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Individual table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 1);\n\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 2097153);\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_individual_table_set_max_rows_increment(&tables.individuals, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_individual_table_set_max_rows_increment(&tables.individuals, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_individual_table_extend(\n        &tables.individuals, &tables2.individuals, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Node table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 1);\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 2097153);\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_node_table_set_max_rows_increment(&tables.nodes, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_node_table_set_max_rows_increment(&tables.nodes, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_extend(\n        &tables.nodes, &tables2.nodes, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Edge table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 1);\n\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 2097153);\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_edge_table_set_max_rows_increment(&tables.edges, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_edge_table_set_max_rows_increment(&tables.edges, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_extend(\n        &tables.edges, &tables2.edges, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Migration table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 1);\n\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 2097153);\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_migration_table_set_max_rows_increment(&tables.migrations, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_migration_table_set_max_rows_increment(&tables.migrations, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_extend(\n        &tables.migrations, &tables2.migrations, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Site table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 1);\n\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 2097153);\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_site_table_set_max_rows_increment(&tables.sites, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_site_table_set_max_rows_increment(&tables.sites, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_extend(\n        &tables.sites, &tables2.sites, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Mutation table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 1);\n\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_mutation_table_extend(&tables.mutations, &tables2.mutations, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 2097153);\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_mutation_table_set_max_rows_increment(&tables.mutations, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_mutation_table_set_max_rows_increment(&tables.mutations, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_extend(\n        &tables.mutations, &tables2.mutations, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Population table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 1);\n\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 2097153);\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_population_table_set_max_rows_increment(&tables.populations, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_population_table_set_max_rows_increment(&tables.populations, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_extend(\n        &tables.populations, &tables2.populations, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n\n    /* Provenance table */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 1);\n\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /*Extending by a small amount results in 1024 rows in the first case*/\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 1024);\n\n    /*Extending by an amount that fits doesn't grow the table*/\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 1023, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 1024);\n\n    /*Extending by an amount that doesn't fit doubles the table*/\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 1024, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 2048);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 4096, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4097);\n\n    /*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 2097152, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 2097153);\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 2097154, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305);\n\n    /*Extending by more rows than possible results in overflow*/\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, TSK_MAX_ID, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_provenance_table_set_max_rows_increment(&tables.provenances, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 4194305, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305 + 42);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_provenance_table_set_max_rows_increment(&tables.provenances, TSK_MAX_ID);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_provenance_table_extend(\n        &tables.provenances, &tables2.provenances, 4194305 + 42 + 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305 + 42);\n\n    tsk_table_collection_free(&tables);\n    tsk_table_collection_free(&tables2);\n}\n\nstatic void\ntest_ragged_expansion(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    char *data = tsk_malloc(104857600 * sizeof(char));\n\n    /* Test with node table metadata */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 1);\n\n    /*Extending by a small amount results in 65536 bytes in the first case*/\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 2);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 65536);\n\n    /*Extending by an amount that fits doesn't grow the column*/\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 65534);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 65536);\n\n    /*Extending by an amount that doesn't fit doubles the column*/\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 65536 * 2);\n\n    /*Extending by an amount greater than the next double extends to that amount*/\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data,\n        1 + (65536 * 2 * 2 - 2 - 65534 - 1));\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 2 + 65534 + 1 + 196608);\n\n    /*After extending beyond 100MB subsequent extension doesn't double but adds 100MB*/\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 104857600);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 4);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 105119745);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 5);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 105119745 + 104857600);\n\n    /*Extending by more bytes than possible results in overflow*/\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, TSK_MAX_SIZE);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 105119745 + 104857600);\n\n    tsk_node_table_free(&tables.nodes);\n    ret = tsk_node_table_init(&tables.nodes, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /*Setting a custom extension uses that*/\n    ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, 42);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 3);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 43);\n\n    /*Setting a custom extension that overflows errors*/\n    ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, TSK_MAX_SIZE);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 41);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 43);\n\n    tsk_table_collection_free(&tables);\n    tsk_safe_free(data);\n}\n\nstatic void\ntest_link_ancestors_input_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_edge_table_t result;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_id_t ancestors[] = { 4, 6 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add an edge with some metadata */\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 7);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 7, 6, \"metadata\", 8);\n    CU_ASSERT_FATAL(ret_id > 0);\n\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_link_ancestors(\n        &tables, NULL, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n    tsk_edge_table_free(&result);\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_link_ancestors(\n        &tables, NULL, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Bad sample IDs */\n    samples[0] = -1;\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    /* Bad ancestor IDs */\n    samples[0] = 0;\n    ancestors[0] = -1;\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    /* Duplicate sample IDs */\n    ancestors[0] = 4;\n    samples[0] = 1;\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    /* Duplicate sample IDs */\n    ancestors[0] = 6;\n    samples[0] = 0;\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    /* TODO more tests! */\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n    tsk_edge_table_free(&result);\n}\n\nstatic void\ntest_link_ancestors_single_tree(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_edge_table_t result;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_id_t ancestors[] = { 4, 6 };\n    size_t i;\n    double res_left = 0;\n    double res_right = 1;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 2, ancestors, 2, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Check we get the right result.\n    CU_ASSERT_EQUAL(result.num_rows, 3);\n    tsk_id_t res_parent[] = { 4, 4, 6 };\n    tsk_id_t res_child[] = { 0, 1, 4 };\n    for (i = 0; i < result.num_rows; i++) {\n        CU_ASSERT_EQUAL(res_parent[i], result.parent[i]);\n        CU_ASSERT_EQUAL(res_child[i], result.child[i]);\n        CU_ASSERT_EQUAL(res_left, result.left[i]);\n        CU_ASSERT_EQUAL(res_right, result.right[i]);\n    }\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n    tsk_edge_table_free(&result);\n}\n\nstatic void\ntest_link_ancestors_no_edges(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_edge_table_t result;\n    tsk_id_t samples[] = { 2 };\n    tsk_id_t ancestors[] = { 4 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 1, ancestors, 1, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_table_collection_free(&tables);\n    tsk_edge_table_free(&result);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_link_ancestors_samples_and_ancestors_overlap(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_edge_table_t result;\n    tsk_id_t samples[] = { 0, 1, 2, 4 };\n    tsk_id_t ancestors[] = { 2 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 4, ancestors, 1, 0, &result);\n\n    // tsk_edge_table_print_state(&result, stdout);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    // Check we get the right result.\n    CU_ASSERT_EQUAL(result.num_rows, 2);\n    size_t i;\n    tsk_id_t res_parent = 4;\n    tsk_id_t res_child[] = { 0, 1 };\n    double res_left = 0;\n    double res_right = 1;\n    for (i = 0; i < result.num_rows; i++) {\n        CU_ASSERT_EQUAL(res_parent, result.parent[i]);\n        CU_ASSERT_EQUAL(res_child[i], result.child[i]);\n        CU_ASSERT_EQUAL(res_left, result.left[i]);\n        CU_ASSERT_EQUAL(res_right, result.right[i]);\n    }\n\n    tsk_table_collection_free(&tables);\n    tsk_edge_table_free(&result);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_link_ancestors_paper(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_edge_table_t result;\n    tsk_id_t samples[] = { 0, 1, 2 };\n    tsk_id_t ancestors[] = { 5, 6, 7 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 3, ancestors, 3, 0, &result);\n\n    // tsk_edge_table_print_state(&result, stdout);\n\n    // Check we get the right result.\n    CU_ASSERT_EQUAL(result.num_rows, 6);\n    size_t i;\n    tsk_id_t res_parent[] = { 5, 5, 6, 6, 7, 7 };\n    tsk_id_t res_child[] = { 1, 2, 0, 5, 0, 5 };\n    double res_left[] = { 0, 2, 0, 0, 7, 7 };\n    double res_right[] = { 10, 10, 7, 7, 10, 10 };\n    for (i = 0; i < result.num_rows; i++) {\n        CU_ASSERT_EQUAL(res_parent[i], result.parent[i]);\n        CU_ASSERT_EQUAL(res_child[i], result.child[i]);\n        CU_ASSERT_EQUAL(res_left[i], result.left[i]);\n        CU_ASSERT_EQUAL(res_right[i], result.right[i]);\n    }\n\n    tsk_table_collection_free(&tables);\n    tsk_edge_table_free(&result);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_link_ancestors_multiple_to_single_tree(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_edge_table_t result;\n    tsk_id_t samples[] = { 1, 3 };\n    tsk_id_t ancestors[] = { 5 };\n    size_t i;\n    tsk_id_t res_parent = 5;\n    tsk_id_t res_child[] = { 1, 3 };\n    double res_left = 0;\n    double res_right = 10;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_init(&result, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_link_ancestors(\n        &tables, samples, 2, ancestors, 1, 0, &result);\n\n    CU_ASSERT_EQUAL(result.num_rows, 2);\n    for (i = 0; i < result.num_rows; i++) {\n        CU_ASSERT_EQUAL(res_parent, result.parent[i]);\n        CU_ASSERT_EQUAL(res_child[i], result.child[i]);\n        CU_ASSERT_EQUAL(res_left, result.left[i]);\n        CU_ASSERT_EQUAL(res_right, result.right[i]);\n    }\n\n    tsk_table_collection_free(&tables);\n    tsk_edge_table_free(&result);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\nverify_ibd_segment_list(tsk_identity_segment_list_t *list, tsk_size_t num_nodes)\n{\n    tsk_identity_segment_t *seg;\n    double total_span = 0;\n    tsk_size_t num_segments = 0;\n    /* double last_right = 0; */\n\n    for (seg = list->head; seg != NULL; seg = seg->next) {\n        CU_ASSERT_FATAL(seg->left < seg->right);\n        CU_ASSERT_FATAL(seg->node >= 0);\n        CU_ASSERT_FATAL(seg->node < (tsk_id_t) num_nodes);\n        total_span += seg->right - seg->left;\n        num_segments++;\n\n        /* TODO the segments are not necessarily in order - issue #1682 */\n        /* CU_ASSERT_FATAL(seg->left >= last_right); */\n        /* last_right = seg->right; */\n    }\n    CU_ASSERT_EQUAL_FATAL(total_span, list->total_span);\n    CU_ASSERT_EQUAL_FATAL(num_segments, list->num_segments);\n}\n\nstatic void\nverify_ibd_result(tsk_identity_segments_t *result)\n{\n    int ret;\n    tsk_size_t j;\n    tsk_id_t a, b;\n    int64_t index;\n    tsk_size_t total_segments = 0;\n    double total_span = 0;\n    tsk_size_t num_pairs = tsk_identity_segments_get_num_pairs(result);\n    tsk_id_t *pairs\n        = tsk_malloc(2 * tsk_identity_segments_get_num_pairs(result) * sizeof(*pairs));\n    tsk_id_t *pairs2\n        = tsk_malloc(2 * tsk_identity_segments_get_num_pairs(result) * sizeof(*pairs));\n    tsk_identity_segment_list_t **lists\n        = tsk_malloc(tsk_identity_segments_get_num_pairs(result) * sizeof(*lists));\n    tsk_avl_node_int_t **avl_nodes\n        = tsk_malloc(result->pair_map.size * sizeof(*avl_nodes));\n\n    CU_ASSERT_FATAL(pairs != NULL);\n    CU_ASSERT_FATAL(pairs2 != NULL);\n    CU_ASSERT_FATAL(avl_nodes != NULL);\n    CU_ASSERT_FATAL(lists != NULL);\n    CU_ASSERT_EQUAL_FATAL(num_pairs, result->pair_map.size);\n    tsk_identity_segments_print_state(result, _devnull);\n\n    ret = tsk_identity_segments_get_keys(result, pairs);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_avl_tree_int_ordered_nodes(&result->pair_map, avl_nodes);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < num_pairs; j++) {\n        a = pairs[2 * j];\n        b = pairs[2 * j + 1];\n        index = a * (int64_t) result->num_nodes + b;\n        CU_ASSERT(a < b);\n        CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&result->pair_map, index), avl_nodes[j]);\n        index = b * (int64_t) result->num_nodes + a;\n        CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&result->pair_map, index), NULL);\n    }\n\n    ret = tsk_identity_segments_get_items(result, pairs2, lists);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_pairs; j++) {\n        CU_ASSERT_EQUAL_FATAL(pairs[2 * j], pairs2[2 * j]);\n        CU_ASSERT_EQUAL_FATAL(pairs[2 * j + 1], pairs2[2 * j + 1]);\n        verify_ibd_segment_list(lists[j], result->num_nodes);\n        total_segments += lists[j]->num_segments;\n        total_span += lists[j]->total_span;\n    }\n    CU_ASSERT_EQUAL_FATAL(result->num_segments, total_segments);\n    CU_ASSERT_DOUBLE_EQUAL(result->total_span, total_span, 1e-6);\n\n    free(pairs);\n    free(pairs2);\n    free(lists);\n    free(avl_nodes);\n}\n\nstatic void\ntest_ibd_segments_debug(void)\n{\n    tsk_treeseq_t ts;\n    int ret;\n    tsk_identity_segments_t result;\n    tsk_size_t sizes[] = { 2, 2 };\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    tsk_set_debug_stream(_devnull);\n    /* Run the DEBUG code */\n    ret = tsk_table_collection_ibd_within(\n        ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_DEBUG);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_between(\n        ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, TSK_DEBUG);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_within(\n        ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_DEBUG | TSK_IBD_STORE_PAIRS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_within(\n        ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_DEBUG | TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_identity_segments_free(&result);\n\n    tsk_set_debug_stream(stdout);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_caterpillar_tree(void)\n{\n    int ret;\n    tsk_identity_segments_t result;\n    tsk_treeseq_t *ts = caterpillar_tree(100, 1, 5);\n\n    /* We're just testing out the memory expansion in ibd_finder */\n    ret = tsk_table_collection_ibd_within(ts->tables, &result, NULL, 0, 0.0, DBL_MAX, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_identity_segments_free(&result);\n\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_ibd_segments_single_tree(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_size_t sizes[] = { 1, 1 };\n    tsk_identity_segments_t result;\n    tsk_identity_segment_list_t *list = NULL;\n    tsk_identity_segment_t *seg = NULL;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Only get IBD segs for (0, 1) */\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, samples, 2, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_identity_segments_get(&result, samples[0], samples[1], &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(list != NULL);\n    seg = list->head;\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 4);\n\n    /* Queries for other sample pairs fail */\n    ret = tsk_identity_segments_get(&result, 0, 2, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list, NULL);\n    ret = tsk_identity_segments_get(&result, 1, 3, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list, NULL);\n\n    tsk_identity_segments_print_state(&result, _devnull);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 1);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_total_span(&result), 1);\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    /* Get IBD segs among all pairs of samples */\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* We have 4 samples, so 4 choose 2 sample pairs */\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 6);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_total_span(&result), 6);\n\n    ret = tsk_identity_segments_get(&result, 0, 1, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 4);\n\n    ret = tsk_identity_segments_get(&result, 3, 0, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 6);\n\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    /* Get segs between {0} and {1} */\n    ret = tsk_table_collection_ibd_between(\n        ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    verify_ibd_result(&result);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 1);\n    ret = tsk_identity_segments_get(&result, 0, 1, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 4);\n\n    tsk_identity_segments_free(&result);\n\n    /* within an empty list gives no segments */\n    ret = tsk_table_collection_ibd_within(&tables, &result, samples, 0, 0.0, DBL_MAX, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 0);\n    tsk_identity_segments_free(&result);\n\n    /* Between an empty list gives no segments */\n    ret = tsk_table_collection_ibd_between(\n        ts.tables, &result, 0, sizes, samples, 0.0, DBL_MAX, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 0);\n    tsk_identity_segments_free(&result);\n\n    /* Between one empty list gives no segments*/\n    sizes[0] = 0;\n    ret = tsk_table_collection_ibd_between(\n        ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 0);\n    tsk_identity_segments_free(&result);\n    sizes[0] = 2;\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_single_tree_options(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_identity_segments_t result;\n    tsk_identity_segment_list_t *list = NULL;\n    tsk_id_t pairs[12];\n    tsk_identity_segment_list_t *lists[6];\n    tsk_flags_t options[2];\n    int k;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_ibd_within(&tables, &result, NULL, 0, 0.0, DBL_MAX, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* We have 4 samples, so 4 choose 2 sample pairs */\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 6);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_total_span(&result), 6);\n    /* out-of-bounds is still detected */\n    ret = tsk_identity_segments_get(&result, 0, 100, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    /* By default all specific queries fail on the ibd_segments result */\n    ret = tsk_identity_segments_get(&result, 0, 1, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IBD_PAIRS_NOT_STORED);\n    ret = tsk_identity_segments_get_keys(&result, pairs);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IBD_PAIRS_NOT_STORED);\n    ret = tsk_identity_segments_get_items(&result, pairs, lists);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IBD_PAIRS_NOT_STORED);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_PAIRS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* out-of-bounds is still detected */\n    ret = tsk_identity_segments_get(&result, 0, 100, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    /* Getters for the lists now work, but the lists themselves are NULL */\n    ret = tsk_identity_segments_get(&result, 0, 1, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list->head, NULL);\n    CU_ASSERT_EQUAL_FATAL(list->total_span, 1);\n    CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);\n    ret = tsk_identity_segments_get_keys(&result, pairs);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(pairs[0], 0);\n    CU_ASSERT_EQUAL_FATAL(pairs[1], 1);\n    ret = tsk_identity_segments_get_items(&result, pairs, lists);\n    CU_ASSERT_EQUAL_FATAL(pairs[0], 0);\n    CU_ASSERT_EQUAL_FATAL(pairs[1], 1);\n    CU_ASSERT_EQUAL_FATAL(lists[0]->head, NULL);\n    CU_ASSERT_EQUAL_FATAL(lists[0]->total_span, 1);\n    CU_ASSERT_EQUAL_FATAL(lists[0]->num_segments, 1);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_identity_segments_free(&result);\n\n    /* store_segments implies store_pairs */\n    options[0] = TSK_IBD_STORE_SEGMENTS;\n    options[1] = TSK_IBD_STORE_PAIRS | TSK_IBD_STORE_SEGMENTS;\n    for (k = 0; k < 2; k++) {\n\n        ret = tsk_table_collection_ibd_within(\n            &tables, &result, NULL, 0, 0.0, DBL_MAX, options[k]);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        /* out-of-bounds is still detected */\n        ret = tsk_identity_segments_get(&result, 0, 100, &list);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_identity_segments_get(&result, 0, 1, &list);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_FATAL(list->head != NULL);\n        CU_ASSERT_EQUAL_FATAL(list->head->left, 0);\n        CU_ASSERT_EQUAL_FATAL(list->head->right, 1);\n        CU_ASSERT_EQUAL_FATAL(list->head->next, NULL);\n        CU_ASSERT_EQUAL_FATAL(list->total_span, 1);\n        CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);\n        ret = tsk_identity_segments_get_keys(&result, pairs);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(pairs[0], 0);\n        CU_ASSERT_EQUAL_FATAL(pairs[1], 1);\n        ret = tsk_identity_segments_get_items(&result, pairs, lists);\n        CU_ASSERT_EQUAL_FATAL(pairs[0], 0);\n        CU_ASSERT_EQUAL_FATAL(pairs[1], 1);\n        CU_ASSERT_FATAL(lists[0]->head != NULL);\n        CU_ASSERT_EQUAL_FATAL(lists[0]->head->left, 0);\n        CU_ASSERT_EQUAL_FATAL(lists[0]->head->right, 1);\n        CU_ASSERT_EQUAL_FATAL(lists[0]->head->next, NULL);\n        CU_ASSERT_EQUAL_FATAL(lists[0]->total_span, 1);\n        CU_ASSERT_EQUAL_FATAL(lists[0]->num_segments, 1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        tsk_identity_segments_free(&result);\n    }\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_single_tree_between(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t sizes[] = { 2, 2 };\n    tsk_identity_segments_t result;\n    tsk_identity_segment_list_t *list = NULL;\n    tsk_identity_segment_t *seg = NULL;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Get segs between {0, 1} and {2, 3} */\n    ret = tsk_table_collection_ibd_between(\n        ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    verify_ibd_result(&result);\n    CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 4);\n\n    ret = tsk_identity_segments_get(&result, 0, 2, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 6);\n\n    ret = tsk_identity_segments_get(&result, 0, 3, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 6);\n\n    ret = tsk_identity_segments_get(&result, 1, 2, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 6);\n\n    ret = tsk_identity_segments_get(&result, 1, 3, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    seg = list->head;\n    CU_ASSERT_FATAL(seg != NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->next, NULL);\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 6);\n\n    tsk_identity_segments_free(&result);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_multiple_trees(void)\n{\n    int ret;\n    tsk_size_t j, k;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1, 2 };\n    tsk_id_t pairs[][2] = { { 0, 1 }, { 0, 2 } };\n    tsk_size_t num_samples = 3;\n    tsk_size_t num_pairs = 2;\n    tsk_identity_segments_t result;\n    double true_left[2][2] = { { 0.0, 0.75 }, { 0.75, 0.0 } };\n    double true_right[2][2] = { { 0.75, 1.0 }, { 1.0, 0.75 } };\n    double true_node[2][2] = { { 4, 5 }, { 5, 6 } };\n    tsk_identity_segment_list_t *list;\n    tsk_identity_segment_t *seg;\n\n    tsk_treeseq_from_text(&ts, 2, multiple_tree_ex_nodes, multiple_tree_ex_edges, NULL,\n        NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, samples, num_samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_pairs; j++) {\n        ret = tsk_identity_segments_get(&result, pairs[j][0], pairs[j][1], &list);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(list->num_segments, 2);\n        k = 0;\n        for (seg = list->head; seg != NULL; seg = seg->next) {\n            CU_ASSERT_EQUAL_FATAL(seg->left, true_left[j][k]);\n            CU_ASSERT_EQUAL_FATAL(seg->right, true_right[j][k]);\n            CU_ASSERT_EQUAL_FATAL(seg->node, true_node[j][k]);\n            k++;\n        }\n        CU_ASSERT_EQUAL_FATAL(list->num_segments, k);\n    }\n\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_empty_result(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_identity_segments_t result;\n    tsk_identity_segment_list_t *list;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, samples, 1, 0.0, 0.5, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_identity_segments_get(&result, samples[0], samples[1], &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(list == NULL);\n\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_min_span_max_time(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_identity_segments_t result;\n    tsk_identity_segment_list_t *list;\n    tsk_identity_segment_t *seg;\n\n    tsk_treeseq_from_text(&ts, 2, multiple_tree_ex_nodes, multiple_tree_ex_edges, NULL,\n        NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_table_collection_ibd_within(\n        ts.tables, &result, NULL, 0, 0.5, 3.0, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_identity_segments_get(&result, 0, 1, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);\n    seg = list->head;\n    CU_ASSERT_EQUAL_FATAL(seg->left, 0.0);\n    CU_ASSERT_EQUAL_FATAL(seg->right, 0.75);\n    CU_ASSERT_EQUAL_FATAL(seg->node, 4);\n\n    ret = tsk_identity_segments_get(&result, 1, 2, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list, NULL);\n\n    ret = tsk_identity_segments_get(&result, 0, 2, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list, NULL);\n\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1, 2 };\n    tsk_id_t duplicate_samples[] = { 0, 1, 0 };\n    tsk_id_t samples2[] = { -1, 1 };\n    tsk_size_t sample_set_sizes[] = { 3 };\n    tsk_identity_segments_t result;\n    tsk_identity_segment_list_t *list;\n\n    tsk_treeseq_from_text(&ts, 2, multiple_tree_ex_nodes, multiple_tree_ex_edges, NULL,\n        NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Invalid sample IDs\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, samples2, 1, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_between(&tables, &result, 1, sample_set_sizes,\n        samples2, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_identity_segments_free(&result);\n\n    // Bad length or time\n    ret = tsk_table_collection_ibd_within(&tables, &result, samples, 2, 0.0, -1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_identity_segments_free(&result);\n    ret = tsk_table_collection_ibd_within(&tables, &result, samples, 2, -1, 0.0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_between(&tables, &result, 1, sample_set_sizes,\n        samples, -1, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_identity_segments_free(&result);\n    ret = tsk_table_collection_ibd_between(\n        &tables, &result, 1, sample_set_sizes, samples, 0, -1, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_identity_segments_free(&result);\n\n    // Duplicate samples\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, duplicate_samples, 3, 0.0, DBL_MAX, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    tsk_identity_segments_free(&result);\n\n    ret = tsk_table_collection_ibd_between(&tables, &result, 1, sample_set_sizes,\n        duplicate_samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    tsk_identity_segments_free(&result);\n\n    // Check for bad inputs to result\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_identity_segments_get(&result, 0, -1, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_identity_segments_get(&result, -1, 0, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_identity_segments_get(&result, 0, 100, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_identity_segments_get(&result, 100, 0, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_identity_segments_get(&result, 0, 5, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(list, NULL);\n    /* TODO add more checks here */\n    ret = tsk_identity_segments_get(&result, 0, 0, &list);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAME_NODES_IN_PAIR);\n\n    tsk_identity_segments_free(&result);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_samples_are_descendants(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_id_t samples[] = { 0, 1, 2, 3, 4, 5 };\n    tsk_size_t num_samples = 6;\n    tsk_identity_segments_t result;\n    tsk_id_t pairs[][2] = { { 0, 2 }, { 0, 4 }, { 2, 4 }, { 1, 3 }, { 1, 5 }, { 3, 5 } };\n    tsk_size_t num_pairs = 6;\n    tsk_id_t true_node[] = { 2, 4, 4, 3, 5, 5 };\n    tsk_size_t j;\n    tsk_identity_segment_list_t *list;\n    tsk_identity_segment_t *seg;\n\n    tsk_treeseq_from_text(&ts, 1, multi_root_tree_ex_nodes, multi_root_tree_ex_edges,\n        NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_table_collection_ibd_within(\n        ts.tables, &result, samples, num_samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < num_pairs; j++) {\n        tsk_identity_segments_get(&result, pairs[j][0], pairs[j][1], &list);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_FATAL(list != NULL);\n        CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);\n        seg = list->head;\n\n        CU_ASSERT_EQUAL_FATAL(seg->left, 0);\n        CU_ASSERT_EQUAL_FATAL(seg->right, 1);\n        CU_ASSERT_EQUAL_FATAL(seg->node, true_node[j]);\n    }\n\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_multiple_ibd_paths(void)\n{\n    int ret;\n    tsk_size_t j, k;\n    tsk_treeseq_t ts;\n    tsk_id_t pairs[][2] = { { 0, 1 }, { 0, 2 }, { 1, 2 } };\n    tsk_size_t num_pairs = 3;\n    tsk_identity_segments_t result;\n    double true_left[3][2] = { { 0.2, 0.0 }, { 0.2, 0.0 }, { 0.0, 0.2 } };\n    double true_right[3][2] = { { 1.0, 0.2 }, { 1.0, 0.2 }, { 0.2, 1.0 } };\n    double true_node[3][2] = { { 4, 5 }, { 3, 5 }, { 4, 4 } };\n    tsk_identity_segment_list_t *list;\n    tsk_identity_segment_t *seg;\n\n    tsk_treeseq_from_text(&ts, 2, multi_path_tree_ex_nodes, multi_path_tree_ex_edges,\n        NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_table_collection_ibd_within(\n        ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_pairs; j++) {\n        tsk_identity_segments_get(&result, pairs[j][0], pairs[j][1], &list);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        k = 0;\n        for (seg = list->head; seg != NULL; seg = seg->next) {\n            CU_ASSERT_EQUAL_FATAL(seg->left, true_left[j][k]);\n            CU_ASSERT_EQUAL_FATAL(seg->right, true_right[j][k]);\n            CU_ASSERT_EQUAL_FATAL(seg->node, true_node[j][k]);\n            k++;\n        }\n        CU_ASSERT_EQUAL_FATAL(k, 2);\n    }\n\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_ibd_segments_odd_topologies(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_id_t samples1[] = { 0, 2 };\n    tsk_identity_segments_t result;\n\n    tsk_treeseq_from_text(\n        &ts, 1, odd_tree1_ex_nodes, odd_tree1_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // Multiple roots.\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, samples, 1, 0, 0, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    // Parent is a sample.\n    ret = tsk_table_collection_ibd_within(\n        &tables, &result, samples1, 1, 0, 0, TSK_IBD_STORE_SEGMENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    verify_ibd_result(&result);\n    tsk_identity_segments_free(&result);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplify_tables_drops_indexes(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0))\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplify_empty_tables(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret = tsk_table_collection_simplify(&tables, NULL, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 0);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplify_metadata(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 10;\n    tsk_edge_table_add_row(&tables.edges, 0, 0, 1, 1, \"metadata\", 8);\n    ret = tsk_table_collection_simplify(&tables, NULL, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_edge_update_invalidates_index(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    /* Any operations on the edge table should now invalidate the index */\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n    /* Even though the actual indexes still exist */\n    CU_ASSERT_FALSE(tables.indexes.edge_insertion_order == NULL);\n    CU_ASSERT_FALSE(tables.indexes.edge_removal_order == NULL);\n    CU_ASSERT_EQUAL_FATAL(tables.indexes.num_edges, tsk_treeseq_get_num_edges(&ts));\n\n    ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 0, 1, NULL, 0);\n    CU_ASSERT_TRUE(ret_id > 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n    /* Even though the actual indexes still exist */\n    CU_ASSERT_FALSE(tables.indexes.edge_insertion_order == NULL);\n    CU_ASSERT_FALSE(tables.indexes.edge_removal_order == NULL);\n    CU_ASSERT_EQUAL_FATAL(tables.indexes.num_edges, tsk_treeseq_get_num_edges(&ts));\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_copy_table_collection(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables, tables_copy;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add some migrations, population and provenance */\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 1, 2, 3, 4, 5, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 1, 2, 3, 4, 5, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_population_table_add_row(&tables.populations, \"metadata\", 8);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, \"other\", 5);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, \"time\", 4, \"record\", 6);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, \"time \", 5, \"record \", 7);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n\n    tsk_table_collection_copy(&tables, &tables_copy, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &tables_copy, 0));\n\n    tsk_table_collection_free(&tables);\n    tsk_table_collection_free(&tables_copy);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_sort_tables_offsets(void)\n{\n    int ret;\n    tsk_treeseq_t *ts;\n    tsk_table_collection_t tables, copy;\n    tsk_bookmark_t bookmark;\n\n    ts = caterpillar_tree(10, 5, 5);\n    ret = tsk_treeseq_copy_tables(ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Check that setting edge offset = len(edges) does nothing */\n    reverse_edges(&tables);\n    ret = tsk_table_collection_copy(&tables, &copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.edges = tables.edges.num_rows;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Check that setting migration offset = len(migrations) does nothing */\n    reverse_migrations(&tables);\n    ret = tsk_table_collection_copy(&tables, &copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.migrations = tables.migrations.num_rows;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tables.sites.num_rows > 2);\n    CU_ASSERT_FATAL(tables.mutations.num_rows > 2);\n\n    /* Check that setting mutation and site offset = to the len\n     * of the tables leaves them untouched. */\n    reverse_mutations(&tables);\n    /* Swap the positions of the first two sites, as a quick way\n     * to disorder the site table */\n    tables.sites.position[0] = tables.sites.position[1];\n    tables.sites.position[1] = 0;\n    ret = tsk_table_collection_copy(&tables, &copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.sites = tables.sites.num_rows;\n    bookmark.mutations = tables.mutations.num_rows;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    /* Anything other than len(table) leads to an error for sites\n     * and mutations, and we can't specify one without the other. */\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.sites = tables.sites.num_rows;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);\n\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.mutations = tables.mutations.num_rows;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);\n\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.sites = tables.sites.num_rows - 1;\n    bookmark.mutations = tables.mutations.num_rows - 1;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);\n\n    /* Individuals must either all be sorted or all skipped */\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Add a parent relation that unsorts the table */\n    tables.individuals.parents[0] = 5;\n    ret = tsk_table_collection_copy(&tables, &copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.individuals = tables.individuals.num_rows;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));\n\n    /* Check that sorting would have had no effect as individuals not in default sort*/\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));\n\n    /* Individual bookmark ignored */\n    tsk_memset(&bookmark, 0, sizeof(bookmark));\n    bookmark.individuals = tables.individuals.num_rows - 1;\n    ret = tsk_table_collection_sort(&tables, &bookmark, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_table_collection_free(&tables);\n    tsk_table_collection_free(&copy);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_sort_tables_drops_indexes_with_options(tsk_flags_t tc_options)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, tc_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0))\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_sort_tables_drops_indexes(void)\n{\n    test_sort_tables_drops_indexes_with_options(0);\n    test_sort_tables_drops_indexes_with_options(TSK_TC_NO_EDGE_METADATA);\n}\n\nstatic void\ntest_sort_tables_edge_metadata(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t1, t2;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    insert_edge_metadata(&t1);\n    ret = tsk_table_collection_copy(&t1, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    reverse_edges(&t1);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));\n    ret = tsk_table_collection_sort(&t1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_sort_tables_no_edge_metadata(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t1, t2;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(t1.edges.options & TSK_TABLE_NO_METADATA);\n    ret = tsk_table_collection_copy(&t1, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(t2.edges.options & TSK_TABLE_NO_METADATA);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    reverse_edges(&t1);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));\n    ret = tsk_table_collection_sort(&t1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    ret = tsk_table_collection_copy(&t1, &t2, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(t1.edges.options & TSK_TABLE_NO_METADATA);\n    CU_ASSERT_TRUE(t2.edges.options & TSK_TABLE_NO_METADATA);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    reverse_edges(&t1);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));\n    ret = tsk_table_collection_sort(&t1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    tsk_table_collection_free(&t1);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_sort_tables_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_bookmark_t pos;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_memset(&pos, 0, sizeof(pos));\n    /* Everything 0 should be fine */\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Everything is sorted already */\n    pos.edges = tables.edges.num_rows;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    pos.edges = (tsk_size_t) -1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n\n    pos.edges = tables.edges.num_rows + 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n\n    tsk_memset(&pos, 0, sizeof(pos));\n    pos.migrations = (tsk_size_t) -1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n\n    pos.migrations = tables.migrations.num_rows + 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n\n    /* Node, population and provenance positions are ignored */\n    tsk_memset(&pos, 0, sizeof(pos));\n    pos.nodes = 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_memset(&pos, 0, sizeof(pos));\n    pos.populations = 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_memset(&pos, 0, sizeof(pos));\n    pos.provenances = 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Specifying only one of sites or mutations is an error */\n    tsk_memset(&pos, 0, sizeof(pos));\n    pos.sites = 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);\n\n    tsk_memset(&pos, 0, sizeof(pos));\n    pos.mutations = 1;\n    ret = tsk_table_collection_sort(&tables, &pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);\n\n    /* Test TSK_ERR_MUTATION_PARENT_INCONSISTENT */\n    ret = tsk_table_collection_clear(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.0, \"x\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, \"a\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 3, 0.0, \"b\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 1, 0.0, \"c\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, \"d\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_INCONSISTENT);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_sort_tables_mutation_times(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables, t1, t2;\n    const char *sites = \"0       0\\n\"\n                        \"0.1     0\\n\"\n                        \"0.2     0\\n\"\n                        \"0.3     0\\n\";\n    const char *mutations = \"0   0  1  -1  3\\n\"\n                            \"1   1  1  -1  3\\n\"\n                            \"2   4  1  -1  8\\n\"\n                            \"2   1  0  -1   4\\n\"\n                            \"2   2  1  -1  3\\n\"\n                            \"2   1  1  -1   2\\n\"\n                            \"3   6  1  -1  10\\n\";\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    tables.nodes.time[4] = 6;\n    tables.nodes.time[5] = 8;\n    tables.nodes.time[6] = 10;\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    parse_sites(sites, &tables.sites);\n    parse_mutations(mutations, &tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 4);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 7);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Check to make sure we have legal mutations */\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_copy(&t1, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    reverse_mutations(&t1);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));\n    ret = tsk_table_collection_sort(&t1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    tsk_table_collection_free(&t2);\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_sort_tables_mutations(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n\n    /* Sorting hierarchy:\n     * 1. site\n     * 2. time (when known)\n     * 3. node_time\n     * 4. num_descendants: parent mutations first\n     * 5. node_id\n     * 6. mutation_id\n     */\n\n    const char *sites = \"0.0   A\\n\"\n                        \"0.5   T\\n\"\n                        \"0.75  G\\n\";\n\n    const char *mutations_unsorted =\n        /* Test site criterion (primary) - site 1 should come after site 0 */\n        \"1   0  X  -1  0.0\\n\" /* mut 0: site 1, will be sorted after site 0 mutations */\n        \"0   0  Y  -1  0.0\\n\" /* mut 1: site 0, will be sorted before site 1 mutations */\n\n        /* Test time criterion - within same site, earlier time first */\n        \"0   4  B  -1  2.0\\n\" /* mut 2: site 0, node 4 (time 1.0), time 2.0 (later time)\n                               */\n        \"0   5  A  -1  2.5\\n\" /* mut 3: site 0, node 5 (time 2.0), time 2.5 (earlier\n                                 relative) */\n\n        /* Test unknown vs known times - unknown times at site 2, fall back to node_time\n           sorting */\n        \"2   4  U2  -1\\n\" /* mut 4: site 2, node 4 (time 1.0), unknown time - falls back\n                             to node_time */\n        \"2   4  U3  -1\\n\" /* mut 5: site 2, node 4 (time 1.0), unknown time - should use\n                             mutation_id as tiebreaker */\n        \"2   5  U1  -1\\n\" /* mut 6: site 2, node 5 (time 2.0), unknown time - falls back\n                             to node_time */\n\n        /* Test node_time criterion - same site, same mut time, different node times */\n        \"0   4  D  -1  1.5\\n\" /* mut 7: site 0, node 4 (time 1.0), mut time 1.5 */\n        \"0   5  C  -1  2.5\\n\" /* mut 8: site 0, node 5 (time 2.0), mut time 2.5 - same\n                                 mut time */\n\n        /* Test num_descendants criterion with mutation parent-child relationships */\n        \"0   2  P  -1  0.0\\n\"  /* mut 9: site 0, node 2, parent mutation (0 descendants\n                                  initially) */\n        \"0   1  C1  9  0.0\\n\"  /* mut 10: site 0, node 1, child of mut 9 (parent now has\n                                  1+ descendants) */\n        \"0   1  C2  9  0.0\\n\"  /* mut 11: site 0, node 1, another child of mut 9 (parent\n                                  now has 2+ descendants) */\n        \"0   3  Q  -1  0.0\\n\"  /* mut 12: site 0, node 3, no children (0 descendants) */\n        \"0   0  C3  10  0.0\\n\" /* mut 13: site 0, node 0, child of mut 10 (making mut 9 a\n                                  grandparent) */\n\n        /* Test node and mutation_id criteria for final tiebreaking */\n        \"0   0  Z1  -1  0.0\\n\"  /* mut 14: site 0, node 0, no parent, will test node+id\n                                   ordering */\n        \"0   0  Z2  -1  0.0\\n\"; /* mut 15: site 0, node 0, no parent, later in input =\n                                   higher ID */\n\n    const char *mutations_sorted =\n        /* Site 0 mutations - known times first, sorted by time */\n        \"0   5  A  -1  2.5\\n\"\n        \"0   5  C  -1  2.5\\n\"\n        \"0   4  B  -1  2.0\\n\"\n        \"0   4  D  -1  1.5\\n\"\n        \"0   2  P  -1  0.0\\n\"\n        \"0   1  C1  4  0.0\\n\"\n        \"0   0  Y  -1  0.0\\n\"\n        \"0   0  C3  5  0.0\\n\"\n        \"0   0  Z1  -1  0.0\\n\"\n        \"0   0  Z2  -1  0.0\\n\"\n        \"0   1  C2  4  0.0\\n\"\n        \"0   3  Q  -1  0.0\\n\"\n\n        /* Site 1 mutations */\n        \"1   0  X  -1  0.0\\n\"\n\n        /* Site 2 mutations - unknown times, sorted by node_time then other criteria */\n        \"2   5  U1  -1\\n\"\n        \"2   4  U2  -1\\n\"\n        \"2   4  U3  -1\\n\";\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n\n    parse_sites(sites, &tables.sites);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 3);\n\n    parse_mutations(mutations_unsorted, &tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 16);\n\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_table_collection_t expected;\n    ret = tsk_table_collection_init(&expected, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    expected.sequence_length = 1.0;\n    parse_nodes(single_tree_ex_nodes, &expected.nodes);\n    parse_edges(single_tree_ex_edges, &expected.edges);\n    parse_sites(sites, &expected.sites);\n    parse_mutations(mutations_sorted, &expected.mutations);\n\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&tables.mutations, &expected.mutations, 0));\n\n    tsk_table_collection_free(&expected);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_sort_tables_canonical_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    tsk_id_t null_p[] = { -1 };\n    tsk_id_t zero_p[] = { 0 };\n    tsk_id_t one_p[] = { 1 };\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.0, \"x\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, \"a\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 3, 0.0, \"b\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 1, 0.0, \"c\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, \"d\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_canonicalise(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_INCONSISTENT);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_FATAL(ret == 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, \"a\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 3, 0.0, \"b\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 1, 0.0, \"c\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, 0.0, \"d\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_canonicalise(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_canonicalise(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);\n\n    ret = tsk_individual_table_clear(&tables.individuals);\n    CU_ASSERT_FATAL(ret == 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_canonicalise(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT);\n\n    ret = tsk_individual_table_clear(&tables.individuals);\n    CU_ASSERT_FATAL(ret == 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, null_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_canonicalise(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_sort_tables_canonical(void)\n{\n    int ret;\n    tsk_table_collection_t t1, t2;\n    // this is single_tree_ex with individuals and populations\n    const char *nodes = \"1  0   -1    1\\n\"\n                        \"1  0    2    3\\n\"\n                        \"1  0    0   -1\\n\"\n                        \"1  0   -1    3\\n\"\n                        \"0  1    2   -1\\n\"\n                        \"0  2   -1    2\\n\"\n                        \"0  3   -1   -1\\n\";\n    const char *individuals = \"0 0.0 1\\n\"\n                              \"0 1.0 -1\\n\"\n                              \"0 2.0 1,3\\n\"\n                              \"0 3.0 -1,1\\n\";\n    const char *sites = \"0       0\\n\"\n                        \"0.2     0\\n\"\n                        \"0.1     0\\n\";\n    const char *mutations = \"0   0  2   3 0.5\\n\"\n                            \"2   1  1  -1 0.5\\n\"\n                            \"1   4  3  -1   3\\n\"\n                            \"0   4  1  -1 2.5\\n\"\n                            \"2   2  1  -1   2\\n\"\n                            \"1   1  5   7 0.5\\n\"\n                            \"1   2  1  -1   2\\n\"\n                            \"1   1  4   2 0.5\\n\"\n                            \"1   1  6   7 0.5\\n\";\n    const char *nodes_sorted = \"1  0   -1    0\\n\"\n                               \"1  0    0    1\\n\"\n                               \"1  0    1   -1\\n\"\n                               \"1  0   -1    1\\n\"\n                               \"0  1    0   -1\\n\"\n                               \"0  2   -1    2\\n\"\n                               \"0  3   -1   -1\\n\";\n    const char *individuals_sorted = \"0 1.0 -1\\n\"\n                                     \"0 3.0 -1,0\\n\"\n                                     \"0 2.0 0,1\\n\";\n    const char *sites_sorted = \"0       0\\n\"\n                               \"0.1     0\\n\"\n                               \"0.2     0\\n\";\n    const char *mutations_sorted = \"0   4  1  -1 2.5\\n\"\n                                   \"0   0  2   0 0.5\\n\"\n                                   \"1   2  1  -1   2\\n\"\n                                   \"1   1  1  -1 0.5\\n\"\n                                   \"2   4  3  -1   3\\n\"\n                                   \"2   2  1  -1   2\\n\"\n                                   \"2   1  4   4 0.5\\n\"\n                                   \"2   1  5   6 0.5\\n\"\n                                   \"2   1  6   6 0.5\\n\";\n    const char *individuals_sorted_kept = \"0 1.0 -1\\n\"\n                                          \"0 3.0 -1,0\\n\"\n                                          \"0 2.0 0,1\\n\"\n                                          \"0 0.0 0\\n\";\n\n    ret = tsk_table_collection_init(&t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    t1.sequence_length = 1.0;\n    ret = tsk_table_collection_init(&t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    t2.sequence_length = 1.0;\n\n    parse_nodes(nodes, &t1.nodes);\n    CU_ASSERT_EQUAL_FATAL(t1.nodes.num_rows, 7);\n    parse_individuals(individuals, &t1.individuals);\n    CU_ASSERT_EQUAL_FATAL(t1.individuals.num_rows, 4);\n    tsk_population_table_add_row(&t1.populations, \"A\", 1);\n    tsk_population_table_add_row(&t1.populations, \"B\", 1);\n    tsk_population_table_add_row(&t1.populations, \"C\", 1);\n    parse_edges(single_tree_ex_edges, &t1.edges);\n    CU_ASSERT_EQUAL_FATAL(t1.edges.num_rows, 6);\n    parse_sites(sites, &t1.sites);\n    CU_ASSERT_EQUAL_FATAL(t1.sites.num_rows, 3);\n    parse_mutations(mutations, &t1.mutations);\n    CU_ASSERT_EQUAL_FATAL(t1.mutations.num_rows, 9);\n\n    ret = tsk_table_collection_canonicalise(&t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    parse_nodes(nodes_sorted, &t2.nodes);\n    tsk_population_table_add_row(&t2.populations, \"C\", 1);\n    tsk_population_table_add_row(&t2.populations, \"A\", 1);\n    CU_ASSERT_EQUAL_FATAL(t2.nodes.num_rows, 7);\n    parse_individuals(individuals_sorted, &t2.individuals);\n    CU_ASSERT_EQUAL_FATAL(t2.individuals.num_rows, 3);\n    parse_edges(single_tree_ex_edges, &t2.edges);\n    CU_ASSERT_EQUAL_FATAL(t2.edges.num_rows, 6);\n    parse_sites(sites_sorted, &t2.sites);\n    parse_mutations(mutations_sorted, &t2.mutations);\n    CU_ASSERT_EQUAL_FATAL(t2.sites.num_rows, 3);\n    CU_ASSERT_EQUAL_FATAL(t2.mutations.num_rows, 9);\n\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    ret = tsk_table_collection_clear(&t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_clear(&t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // now with KEEP_UNREFERENCED\n    parse_nodes(nodes, &t1.nodes);\n    parse_individuals(individuals, &t1.individuals);\n    tsk_population_table_add_row(&t1.populations, \"A\", 1);\n    tsk_population_table_add_row(&t1.populations, \"B\", 1);\n    tsk_population_table_add_row(&t1.populations, \"C\", 1);\n    parse_edges(single_tree_ex_edges, &t1.edges);\n    parse_sites(sites, &t1.sites);\n    parse_mutations(mutations, &t1.mutations);\n\n    ret = tsk_table_collection_canonicalise(&t1, TSK_SUBSET_KEEP_UNREFERENCED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    parse_nodes(nodes_sorted, &t2.nodes);\n    tsk_population_table_add_row(&t2.populations, \"C\", 1);\n    tsk_population_table_add_row(&t2.populations, \"A\", 1);\n    tsk_population_table_add_row(&t2.populations, \"B\", 1);\n    parse_individuals(individuals_sorted_kept, &t2.individuals);\n    CU_ASSERT_EQUAL_FATAL(t2.individuals.num_rows, 4);\n    parse_edges(single_tree_ex_edges, &t2.edges);\n    parse_sites(sites_sorted, &t2.sites);\n    parse_mutations(mutations_sorted, &t2.mutations);\n\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t2);\n    tsk_table_collection_free(&t1);\n}\n\nstatic void\ntest_sort_tables_migrations(void)\n{\n    int ret;\n    tsk_treeseq_t *ts;\n    tsk_table_collection_t tables, copy;\n\n    ts = caterpillar_tree(13, 1, 1);\n    ret = tsk_treeseq_copy_tables(ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tables.migrations.num_rows > 0);\n\n    ret = tsk_table_collection_copy(&tables, &copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    reverse_migrations(&tables);\n    CU_ASSERT_FATAL(!tsk_table_collection_equals(&tables, &copy, 0));\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_migration_table_equals(&tables.migrations, &copy.migrations, 0));\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    /* Make sure we test the deeper comparison keys. The full key is\n     * (time, source, dest, left, node) */\n    tsk_migration_table_clear(&tables.migrations);\n\n    /* params = left, right, node, source, dest, time */\n    tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 0, NULL, 0);\n    tsk_migration_table_add_row(&tables.migrations, 0, 1, 1, 0, 1, 0, NULL, 0);\n    ret = tsk_migration_table_copy(&tables.migrations, &copy.migrations, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    reverse_migrations(&tables);\n    CU_ASSERT_FATAL(!tsk_table_collection_equals(&tables, &copy, 0));\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_migration_table_equals(&tables.migrations, &copy.migrations, 0));\n\n    tsk_table_collection_free(&tables);\n    tsk_table_collection_free(&copy);\n    tsk_treeseq_free(ts);\n    free(ts);\n}\n\nstatic void\ntest_sort_tables_individuals(void)\n{\n    int ret;\n    tsk_table_collection_t tables, copy;\n    const char *individuals = \"1      0.25   2,3 0\\n\"\n                              \"2      0.5    5,-1  1\\n\"\n                              \"3      0.3    -1  2\\n\"\n                              \"4      0.3    -1  3\\n\"\n                              \"5      0.3    3   4\\n\"\n                              \"6      0.3    4   5\\n\";\n    const char *individuals_cycle = \"1      0.2    2  0\\n\"\n                                    \"2      0.5    0  1\\n\"\n                                    \"3      0.3    1  2\\n\";\n    const tsk_id_t bad_parents[] = { 200 };\n    tsk_id_t ret_id;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n    parse_individuals(individuals, &tables.individuals);\n\n    ret = tsk_table_collection_copy(&tables, &copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Table sort doesn't touch individuals by default*/\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    /* Not calling with TSK_CHECK_TREES so casting is safe */\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_INDIVIDUAL_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);\n\n    ret = tsk_table_collection_individual_topological_sort(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_INDIVIDUAL_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Check that the sort is stable */\n    tsk_table_collection_free(&copy);\n    ret = tsk_table_collection_copy(&tables, &copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_individual_topological_sort(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));\n\n    /* Errors on bad table */\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, bad_parents, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 6);\n    ret = tsk_table_collection_individual_topological_sort(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    /* Errors on cycle */\n    tsk_individual_table_clear(&tables.individuals);\n    parse_individuals(individuals_cycle, &tables.individuals);\n    ret = tsk_table_collection_individual_topological_sort(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);\n\n    tsk_table_collection_free(&tables);\n    tsk_table_collection_free(&copy);\n}\n\nstatic void\ntest_sorter_interface(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_table_sorter_t sorter;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));\n\n    /* Nominal case */\n    reverse_edges(&tables);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0));\n    ret = tsk_table_sorter_init(&sorter, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));\n    CU_ASSERT_EQUAL(sorter.user_data, NULL);\n    tsk_table_sorter_free(&sorter);\n\n    /* If we set the sort_edges function to NULL then we should leave the\n     * node table as is. */\n    reverse_edges(&tables);\n    CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0));\n    ret = tsk_table_sorter_init(&sorter, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    sorter.sort_edges = NULL;\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0));\n    tsk_table_sorter_free(&sorter);\n\n    /* Reversing again should make them equal */\n    reverse_edges(&tables);\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0));\n\n    /* Do not check integrity before sorting */\n    reverse_edges(&tables);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0));\n    ret = tsk_table_sorter_init(&sorter, &tables, TSK_NO_CHECK_INTEGRITY);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));\n    tsk_table_sorter_free(&sorter);\n\n    /* The user_data shouldn't be touched */\n    reverse_edges(&tables);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0));\n    ret = tsk_table_sorter_init(&sorter, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    sorter.user_data = (void *) &ts;\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));\n    CU_ASSERT_EQUAL_FATAL(sorter.user_data, &ts);\n    tsk_table_sorter_free(&sorter);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_dump_unindexed_with_options(tsk_flags_t tc_options)\n{\n    tsk_table_collection_t tables, loaded;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, tc_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n    ret = tsk_table_collection_dump(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n\n    ret = tsk_table_collection_load(&loaded, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&loaded, 0));\n    CU_ASSERT_TRUE(tsk_node_table_equals(&tables.nodes, &loaded.nodes, 0));\n    CU_ASSERT_TRUE(tsk_edge_table_equals(&tables.edges, &loaded.edges, 0));\n\n    tsk_table_collection_free(&loaded);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_dump_unindexed(void)\n{\n    test_dump_unindexed_with_options(0);\n    test_dump_unindexed_with_options(TSK_TC_NO_EDGE_METADATA);\n}\n\nstatic void\ntest_dump_load_empty_with_options(tsk_flags_t tc_options)\n{\n    int ret;\n    tsk_table_collection_t t1, t2;\n\n    ret = tsk_table_collection_init(&t1, tc_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    t1.sequence_length = 1.0;\n    ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n}\n\nstatic void\ntest_dump_load_empty(void)\n{\n    test_dump_load_empty_with_options(0);\n    test_dump_load_empty_with_options(TSK_TC_NO_EDGE_METADATA);\n}\n\nstatic void\ntest_dump_load_unsorted_with_options(tsk_flags_t tc_options)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t t1, t2;\n\n    ret = tsk_table_collection_init(&t1, tc_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    t1.sequence_length = 1.0;\n\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 1, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 2, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 4);\n\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n\n    /* Verify that it's unsorted */\n    ret = (int) tsk_table_collection_check_integrity(&t1, TSK_CHECK_EDGE_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);\n\n    ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&t1, 0));\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&t1, 0));\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n}\n\nstatic void\ntest_dump_load_unsorted(void)\n{\n    test_dump_load_unsorted_with_options(0);\n    test_dump_load_unsorted_with_options(TSK_TC_NO_EDGE_METADATA);\n}\n\nstatic void\ntest_dump_load_metadata_schema(void)\n{\n    int ret;\n    tsk_table_collection_t t1, t2;\n\n    ret = tsk_table_collection_init(&t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    t1.sequence_length = 1.0;\n    char example[100] = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_length = (tsk_size_t) strlen(example) + 4;\n    tsk_node_table_set_metadata_schema(\n        &t1.nodes, strcat(example, \"node\"), example_length);\n    tsk_edge_table_set_metadata_schema(\n        &t1.edges, strcat(example, \"edge\"), example_length);\n    tsk_site_table_set_metadata_schema(\n        &t1.sites, strcat(example, \"site\"), example_length);\n    tsk_mutation_table_set_metadata_schema(\n        &t1.mutations, strcat(example, \"muta\"), example_length);\n    tsk_migration_table_set_metadata_schema(\n        &t1.migrations, strcat(example, \"migr\"), example_length);\n    tsk_individual_table_set_metadata_schema(\n        &t1.individuals, strcat(example, \"indi\"), example_length);\n    tsk_population_table_set_metadata_schema(\n        &t1.populations, strcat(example, \"popu\"), example_length);\n    ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n}\n\nstatic void\ntest_dump_fail_no_file(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t t1;\n\n    ret = tsk_table_collection_init(&t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    t1.sequence_length = 1.0;\n\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 1, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n    ret_id = tsk_node_table_add_row(\n        &t1.nodes, TSK_NODE_IS_SAMPLE, 2, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 4);\n\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n\n    /* Verify that it's unsorted */\n    ret = (int) tsk_table_collection_check_integrity(&t1, TSK_CHECK_EDGE_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);\n\n    /* Make sure the file doesn't exist beforehand. */\n    unlink(_tmp_file_name);\n    errno = 0;\n\n    CU_ASSERT_EQUAL(access(_tmp_file_name, F_OK), -1);\n\n    tsk_table_collection_free(&t1);\n}\n\nstatic void\ntest_load_reindex(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_dump(&ts, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_drop_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0));\n\n    ret = tsk_table_collection_drop_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Dump the unindexed version */\n    ret = tsk_table_collection_dump(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_free(&tables);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0));\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_table_overflow(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_size_t max_rows = ((tsk_size_t) TSK_MAX_ID);\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Simulate overflows */\n    tables.individuals.max_rows = max_rows;\n    tables.individuals.num_rows = max_rows;\n    ret_id\n        = tsk_individual_table_add_row(&tables.individuals, 0, 0, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.nodes.max_rows = max_rows;\n    tables.nodes.num_rows = max_rows;\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.edges.max_rows = max_rows;\n    tables.edges.num_rows = max_rows;\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.migrations.max_rows = max_rows;\n    tables.migrations.num_rows = max_rows;\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.sites.max_rows = max_rows;\n    tables.sites.num_rows = max_rows;\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, 0, 0, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.mutations.max_rows = max_rows;\n    tables.mutations.num_rows = max_rows;\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, 0, 0, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.provenances.max_rows = max_rows;\n    tables.provenances.num_rows = max_rows;\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, 0, 0, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tables.populations.max_rows = max_rows;\n    tables.populations.num_rows = max_rows;\n    ret_id = tsk_population_table_add_row(&tables.populations, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_column_overflow(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_size_t too_big = TSK_MAX_SIZE;\n    double zero = 0;\n    char zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };\n    tsk_id_t id_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // location\n    /* We can't trigger a column overflow with one element because the parameter\n     * value is 32 bit */\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, &zero, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    // Check normal overflow from additional length\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, too_big, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    // Check overflow from minimum increment\n    ret = tsk_individual_table_set_max_location_length_increment(\n        &tables.individuals, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    // parents\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, id_zeros, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, too_big, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_individual_table_set_max_parents_length_increment(\n        &tables.individuals, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    // metadata\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_individual_table_set_max_metadata_length_increment(\n        &tables.individuals, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_edge_table_set_max_metadata_length_increment(&tables.edges, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, zeros, 1, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    // ancestral state\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, too_big, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_site_table_set_max_ancestral_state_length_increment(\n        &tables.sites, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    // metadata\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_site_table_set_max_metadata_length_increment(&tables.sites, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, zeros, 1, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    // derived state\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, 0, 0, NULL, too_big, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_mutation_table_set_max_derived_state_length_increment(\n        &tables.mutations, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, NULL, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    // metadata\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, 0, 0, NULL, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_mutation_table_set_max_metadata_length_increment(\n        &tables.mutations, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, NULL, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, zeros, 1, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0)\n    // timestamp\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, too_big, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_provenance_table_set_max_timestamp_length_increment(\n        &tables.provenances, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    // record\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_provenance_table_set_max_record_length_increment(\n        &tables.provenances, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id = tsk_population_table_add_row(&tables.populations, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_population_table_set_max_metadata_length_increment(\n        &tables.populations, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, zeros, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0, 0, 0, 0, 0, 0, NULL, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n    ret = tsk_migration_table_set_max_metadata_length_increment(\n        &tables.migrations, too_big);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_check_integrity_with_options(tsk_flags_t tc_options)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    const char *individuals = \"1      0.25     -1\\n\"\n                              \"2      0.5,0.25 2\\n\"\n                              \"3      0.5,0.25 0\\n\";\n\n    ret = tsk_table_collection_init(&tables, tc_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    /* nodes */\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, INFINITY, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    /* Not calling with TSK_CHECK_TREES so casting is safe */\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);\n\n    ret = tsk_node_table_clear(&tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret_id);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_NO_CHECK_POPULATION_REFS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    ret = tsk_node_table_clear(&tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, ret_id);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    ret = tsk_node_table_clear(&tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* edges */\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_PARENT);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_CHILD);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, INFINITY, 1, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, -1.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_LEFT_LESS_ZERO);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.1, 1, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.5, 0.1, 1, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 0.5, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);\n\n    ret = tsk_edge_table_clear(&tables.edges);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* sites */\n    ret_id = tsk_site_table_add_row(&tables.sites, INFINITY, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);\n\n    ret = tsk_site_table_clear(&tables.sites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, -0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);\n\n    ret = tsk_site_table_clear(&tables.sites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 1.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);\n\n    ret = tsk_site_table_clear(&tables.sites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_SITE_DUPLICATES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SITE_POSITION);\n\n    ret = tsk_site_table_clear(&tables.sites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.4, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_SITE_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);\n\n    ret = tsk_site_table_clear(&tables.sites);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.6, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    /* mutations */\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 2, 0, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 2, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    /* A mixture of known and unknown times on a site fails */\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN);\n\n    /* But on different sites, passes */\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 0, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 1, 2, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 1, 0, 1.0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_EQUAL);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id\n        = tsk_mutation_table_add_row(&tables.mutations, 0, 1, 1, 1.0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 1, TSK_NULL, 1.0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, 0, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);\n\n    /* Unknown times pass */\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Correctly ordered times pass */\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Incorrectly ordered times fail */\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);\n\n    /* Putting incorrectly ordered times on diff sites passes */\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 0, TSK_NULL, 2, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 0, TSK_NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, NAN, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, INFINITY, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, TSK_NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);\n\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, TSK_NULL, 1, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 1, 1, 0, 2, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MUTATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* migrations */\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 0.5, 2, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 0.5, 1, 2, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 0.5, 1, 0, 2, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 0.5, 1, 0, 1, INFINITY, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 0.5, 1, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 0.5, 1, 1, 0, 0.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_MIGRATION_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MIGRATIONS);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, INFINITY, 1, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, -0.3, 0.5, 1, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_LEFT_LESS_ZERO);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.0, 1.5, 1, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);\n\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.6, 0.5, 1, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);\n    ret = tsk_migration_table_clear(&tables.migrations);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    parse_individuals(individuals, &tables.individuals);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 3);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_INDIVIDUAL_ORDERING);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Check that an individual can't be its own parent */\n    tables.individuals.parents[0] = 0;\n    tables.individuals.parents[1] = 1;\n    tables.individuals.parents[2] = 2;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT);\n\n    tables.individuals.parents[0] = -2;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_check_integrity_no_populations(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_id_t ret_num_trees;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Add in some bad population references and check that we can use\n     * TSK_NO_CHECK_POPULATION_REFS with TSK_CHECK_TREES */\n    tables.nodes.population[0] = 10;\n    /* Not calling with TSK_CHECK_TREES so casting is safe */\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_NO_CHECK_POPULATION_REFS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_num_trees = tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_TREES | TSK_NO_CHECK_POPULATION_REFS);\n    /* CHECK_TREES returns the number of trees */\n    CU_ASSERT_EQUAL_FATAL(ret_num_trees, 3);\n    tables.nodes.population[0] = TSK_NULL;\n\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_migration_table_add_row(\n        &tables.migrations, 0.4, 0.5, 1, 0, 1, 1.5, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    ret = (int) tsk_table_collection_check_integrity(\n        &tables, TSK_NO_CHECK_POPULATION_REFS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_num_trees = tsk_table_collection_check_integrity(\n        &tables, TSK_CHECK_TREES | TSK_NO_CHECK_POPULATION_REFS);\n    CU_ASSERT_EQUAL_FATAL(ret_num_trees, 3);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_table_collection_check_integrity(void)\n{\n    test_table_collection_check_integrity_with_options(0);\n    test_table_collection_check_integrity_with_options(TSK_TC_NO_EDGE_METADATA);\n}\n\nstatic void\ntest_table_collection_check_integrity_bad_indexes_example(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n\n    /* We start with a concrete example where you can get bad trees\n     * by building some valid tables, clearing the edges, and then\n     * building new ones without rebuilding the indexes. */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 5;\n    /* nodes */\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    /* edges */\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 5.0, 2, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 5.0, 2, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    /* build index */\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* okay now build a new table without rebuilding the indexes */\n    tsk_edge_table_clear(&tables.edges);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* make sure we don't use too-long indexes */\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_NOT_INDEXED);\n\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 4.0, 2, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n\n    /* should error, as tree sequence will be wrong */\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_check_integrity_bad_indexes(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n\n    /* Now hit some other weird cases by manipulating the indexes directly */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 5;\n    /* nodes */\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    /* edges */\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 1.0, 2.0, 2, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 2.0, 5.0, 2, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 1.0, 3.0, 2, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n    /* build index */\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT(ret_id > 0);\n\n    /* edge removed before it is added */\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.indexes.edge_insertion_order[0] = 1;\n    tables.indexes.edge_insertion_order[2] = 0;\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);\n\n    /* edge added twice (implies another is never added) */\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.indexes.edge_insertion_order[0] = 0;\n    tables.indexes.edge_insertion_order[1] = 0;\n    tables.indexes.edge_removal_order[0] = 1;\n    tables.indexes.edge_removal_order[2] = 2;\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);\n\n    /* edge never removed but should have been */\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.indexes.edge_removal_order[0] = 0;\n    tables.indexes.edge_removal_order[1] = 1;\n    tables.indexes.edge_removal_order[2] = 2;\n    tables.indexes.edge_removal_order[3] = 3;\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);\n\n    /* edge progression out of order */\n    tables.edges.right[2] = 4.0;\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);\n\n    /* edge never used */\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.indexes.edge_insertion_order[0] = 0;\n    tables.indexes.edge_insertion_order[1] = 3;\n    tables.indexes.edge_insertion_order[2] = 0;\n    tables.indexes.edge_insertion_order[3] = 3;\n    tables.indexes.edge_removal_order[0] = 0;\n    tables.indexes.edge_removal_order[1] = 3;\n    tables.indexes.edge_removal_order[2] = 0;\n    tables.indexes.edge_removal_order[3] = 3;\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);\n\n    /* make sure we don't use the too-short indexes */\n    ret_id = tsk_edge_table_add_row(&tables.edges, 4.0, 5.0, 2, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 4);\n    ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_NOT_INDEXED);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_check_integrity_bad_mutation_parent_topology(void)\n{\n    int ret;\n    tsk_id_t ret_trees;\n    tsk_table_collection_t tables;\n    const char *sites = \"0       0\\n\";\n    /* Make a mutation on a parallel branch the parent*/\n    const char *bad_mutations = \"0   0  1  -1\\n\"\n                                \"0   1  1  0\\n\";\n\n    /* A mutation above is set as child*/\n    const char *reverse_mutations = \"0   0  1  -1\\n\"\n                                    \"0   4  1  0\\n\";\n\n    const char *reverse_sites = \"0.5       0\\n\"\n                                \"0       0\\n\";\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    parse_sites(sites, &tables.sites);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 1);\n    parse_mutations(bad_mutations, &tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 2);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_trees, 1);\n    ret_trees\n        = tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_BAD_MUTATION_PARENT);\n\n    parse_mutations(reverse_mutations, &tables.mutations);\n    ret_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    CU_ASSERT_EQUAL_FATAL(ret_trees, 1);\n    ret_trees\n        = tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n\n    /* Now check that TSK_CHECK_MUTATION_PARENTS implies TSK_CHECK_TREES\n       by triggering an error with reversed sites */\n    parse_sites(reverse_sites, &tables.sites);\n    ret_trees\n        = tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_UNSORTED_SITES);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_compute_mutation_parents_tolerates_invalid_input(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_id_t site;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    site = tsk_site_table_add_row(&tables.sites, 0.0, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(site >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, site, 1, TSK_NULL, TSK_UNKNOWN_TIME, \"C\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.mutations.parent[0] = 42;\n\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tables.mutations.parent[0] == TSK_NULL);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_compute_mutation_parents_restores_on_error(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_id_t site;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    site = tsk_site_table_add_row(&tables.sites, 0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(site >= 0);\n\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, site, 1, TSK_NULL, TSK_UNKNOWN_TIME, \"C\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, site, 0, TSK_NULL, TSK_UNKNOWN_TIME, \"G\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.mutations.parent[0] = 111;\n    tables.mutations.parent[1] = 222;\n\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n    CU_ASSERT_EQUAL(tables.mutations.parent[0], 111);\n    CU_ASSERT_EQUAL(tables.mutations.parent[1], 222);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_subset_with_options(tsk_flags_t options)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables_copy;\n    int k;\n    tsk_id_t nodes[4];\n    tsk_id_t zero_p[] = { 0 };\n    tsk_id_t one_p[] = { 1 };\n\n    ret = tsk_table_collection_init(&tables, options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret = tsk_table_collection_init(&tables_copy, options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // does not error on empty tables\n    ret = tsk_table_collection_subset(&tables, NULL, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // four nodes from two diploids; the first is from pop 0\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 2.0, TSK_NULL, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    // unused individual who is the parent of others\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    // unused individual\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    // unused population\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.4, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    // unused site\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.5, \"C\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, 0, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    // empty nodes should get empty tables\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(&tables_copy, NULL, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.individuals.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);\n\n    // unless NO_CHANGE_POPULATIONS is provided\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(\n        &tables_copy, NULL, 0, TSK_SUBSET_NO_CHANGE_POPULATIONS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.individuals.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);\n    CU_ASSERT_FATAL(\n        tsk_population_table_equals(&tables.populations, &tables_copy.populations, 0));\n\n    // or KEEP_UNREFERENCED\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(\n        &tables_copy, NULL, 0, TSK_SUBSET_KEEP_UNREFERENCED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);\n    CU_ASSERT_FATAL(\n        tsk_individual_table_equals(&tables.individuals, &tables_copy.individuals, 0));\n    CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);\n    CU_ASSERT_FATAL(tsk_site_table_equals(&tables.sites, &tables_copy.sites, 0));\n\n    // or both\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(&tables_copy, NULL, 0,\n        TSK_SUBSET_KEEP_UNREFERENCED | TSK_SUBSET_NO_CHANGE_POPULATIONS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);\n    CU_ASSERT_FATAL(\n        tsk_individual_table_equals(&tables.individuals, &tables_copy.individuals, 0));\n    CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);\n    CU_ASSERT_FATAL(\n        tsk_population_table_equals(&tables.populations, &tables_copy.populations, 0));\n    CU_ASSERT_FATAL(tsk_site_table_equals(&tables.sites, &tables_copy.sites, 0));\n\n    // the identity transformation, since unused pops are at the end\n    for (k = 0; k < 4; k++) {\n        nodes[k] = k;\n    }\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(\n        &tables_copy, nodes, 4, TSK_SUBSET_KEEP_UNREFERENCED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));\n\n    // or, remove unused things:\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_node_table_equals(&tables.nodes, &tables_copy.nodes, 0));\n    CU_ASSERT_EQUAL_FATAL(tables_copy.individuals.num_rows, 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, 1);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, 2);\n    CU_ASSERT_FATAL(\n        tsk_mutation_table_equals(&tables.mutations, &tables_copy.mutations, 0));\n\n    // reverse twice should get back to the start, since unused pops are at the end\n    for (k = 0; k < 4; k++) {\n        nodes[k] = 3 - k;\n    }\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(\n        &tables_copy, nodes, 4, TSK_SUBSET_KEEP_UNREFERENCED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(\n        &tables_copy, nodes, 4, TSK_SUBSET_KEEP_UNREFERENCED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));\n\n    tsk_table_collection_free(&tables_copy);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_subset(void)\n{\n    test_table_collection_subset_with_options(0);\n    test_table_collection_subset_with_options(TSK_TC_NO_EDGE_METADATA);\n}\n\nstatic void\ntest_table_collection_subset_unsorted(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables_copy;\n    int k;\n    tsk_id_t nodes[3];\n    tsk_id_t one_p[] = { 1 };\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret = tsk_table_collection_init(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // these tables are a big mess\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, TSK_NULL, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 0.5, 2, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.5, 1.0, 2, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.4, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, 2, TSK_UNKNOWN_TIME, \"B\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    // but still, this should leave them unchanged\n    for (k = 0; k < 3; k++) {\n        nodes[k] = k;\n    }\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(\n        &tables_copy, nodes, 3, TSK_SUBSET_KEEP_UNREFERENCED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));\n\n    tsk_table_collection_free(&tables_copy);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_subset_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables_copy;\n    tsk_id_t nodes[4] = { 0, 1, 2, 3 };\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret = tsk_table_collection_init(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // four nodes from two diploids; the first is from pop 0\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 2.0, TSK_NULL, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Migrations are not supported */\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_migration_table_add_row(&tables_copy.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.migrations.num_rows, 1);\n    ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n\n    // test out of bounds nodes\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    nodes[0] = -1;\n    ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    nodes[0] = 6;\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    // check integrity\n    nodes[0] = 0;\n    nodes[1] = 1;\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_truncate(&tables_copy.nodes, 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables_copy.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    tsk_table_collection_free(&tables);\n    tsk_table_collection_free(&tables_copy);\n}\n\nstatic void\ntest_table_collection_union(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables_empty;\n    tsk_table_collection_t tables_copy;\n    tsk_id_t node_mapping[3];\n    tsk_id_t parents[2] = { -1, -1 };\n    char example_metadata[100] = \"An example of metadata with unicode 🎄🌳🌴🌲🎋\";\n    tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);\n\n    tsk_memset(node_mapping, 0xff, sizeof(node_mapping));\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret = tsk_table_collection_init(&tables_empty, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables_empty.sequence_length = 1;\n    ret = tsk_table_collection_init(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // does not error on empty tables\n    ret = tsk_table_collection_union(&tables, &tables_empty, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // does not error on empty tables but that differ on top level metadata\n    ret = tsk_table_collection_set_metadata(\n        &tables, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_union(&tables, &tables_empty, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // three nodes, two pop, three ind, two edge, two site, two mut\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 2, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, parents, 2, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    parents[0] = 0;\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, parents, 2, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    parents[1] = 1;\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, parents, 2, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.4, \"T\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_sort(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // union with empty should not change\n    // other is empty\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_union(\n        &tables_copy, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));\n    // self is empty\n    ret = tsk_table_collection_clear(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_union(\n        &tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));\n\n    // union all shared nodes + subset original nodes = original table\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_union(\n        &tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    node_mapping[0] = 0;\n    node_mapping[1] = 1;\n    node_mapping[2] = 2;\n    ret = tsk_table_collection_subset(&tables_copy, node_mapping, 3, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));\n\n    // union with one shared node\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    node_mapping[0] = TSK_NULL;\n    node_mapping[1] = TSK_NULL;\n    node_mapping[2] = 2;\n    ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(\n        tables_copy.populations.num_rows, tables.populations.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(\n        tables_copy.individuals.num_rows, tables.individuals.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, tables.nodes.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.edges.num_rows, tables.edges.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, tables.sites.num_rows);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, tables.mutations.num_rows + 2);\n\n    // union with one shared node, but no add pop\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    node_mapping[0] = TSK_NULL;\n    node_mapping[1] = TSK_NULL;\n    node_mapping[2] = 2;\n    ret = tsk_table_collection_union(\n        &tables_copy, &tables, node_mapping, TSK_UNION_NO_ADD_POP);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, tables.populations.num_rows);\n    CU_ASSERT_EQUAL_FATAL(\n        tables_copy.individuals.num_rows, tables.individuals.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, tables.nodes.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.edges.num_rows, tables.edges.num_rows + 2);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, tables.sites.num_rows);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, tables.mutations.num_rows + 2);\n\n    tsk_table_collection_free(&tables_copy);\n    tsk_table_collection_free(&tables_empty);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_disjoint_union(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables1;\n    tsk_table_collection_t tables2;\n    tsk_table_collection_t tables12;\n    tsk_id_t node_mapping[4];\n\n    tsk_memset(node_mapping, 0xff, sizeof(node_mapping));\n\n    ret = tsk_table_collection_init(&tables1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables1.sequence_length = 2;\n\n    // set up nodes, which will be shared\n    // flags, time, pop, ind, metadata, metadata_length\n    ret_id = tsk_node_table_add_row(\n        &tables1.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tables1.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tables1.nodes, 0, 0.5, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(&tables1.nodes, 0, 1.5, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_copy(&tables1, &tables2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // for tables1:\n    // on [0, 1] we have 0, 1 inherit from 2\n    // left, right, parent, child, metadata, metadata_length\n    ret_id = tsk_edge_table_add_row(&tables1.edges, 0.0, 1.0, 2, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables1.edges, 0.0, 1.0, 2, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables1.sites, 0.4, \"T\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables1.mutations, ret_id, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_build_index(&tables1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_sort(&tables1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // all this goes in tables12 so far\n    ret = tsk_table_collection_copy(&tables1, &tables12, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // for tables2; and need to add to tables12 also:\n    // on [1, 2] we have 0, 1 inherit from 3\n    // left, right, parent, child, metadata, metadata_length\n    ret_id = tsk_edge_table_add_row(&tables2.edges, 1.0, 2.0, 3, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables2.edges, 1.0, 2.0, 3, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables2.sites, 1.4, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables2.mutations, ret_id, 1, TSK_NULL, TSK_UNKNOWN_TIME, \"T\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_build_index(&tables2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_sort(&tables2, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    // also tables12\n    ret_id = tsk_edge_table_add_row(&tables12.edges, 1.0, 2.0, 3, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables12.edges, 1.0, 2.0, 3, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables12.sites, 1.4, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables12.mutations, ret_id, 1, TSK_NULL, TSK_UNKNOWN_TIME, \"T\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_build_index(&tables12, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_sort(&tables12, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // now disjoint union-ing tables1 and tables2 should get tables12\n    ret = tsk_table_collection_copy(&tables1, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    node_mapping[0] = 0;\n    node_mapping[1] = 1;\n    node_mapping[2] = 2;\n    node_mapping[3] = 3;\n    ret = tsk_table_collection_union(&tables, &tables2, node_mapping,\n        TSK_UNION_NO_CHECK_SHARED | TSK_UNION_ALL_EDGES | TSK_UNION_ALL_MUTATIONS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FATAL(\n        tsk_table_collection_equals(&tables, &tables12, TSK_CMP_IGNORE_PROVENANCE));\n\n    tsk_table_collection_free(&tables12);\n    tsk_table_collection_free(&tables2);\n    tsk_table_collection_free(&tables1);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_union_middle_merge(void)\n{\n    /* Test ability to have non-shared history both above and below the\n     * shared bits. The full genealogy, in `tu`, is:\n     *  3   4\n     *   \\ /\n     *    2\n     *   / \\\n     *  0   1\n     * and the left lineage is in `ta` and right in `tb` */\n    int ret;\n    tsk_id_t ret_id;\n    tsk_id_t node_mapping[] = { TSK_NULL, 1, TSK_NULL };\n    tsk_id_t node_order[] = { 0, 3, 1, 2, 4 };\n    tsk_table_collection_t ta, tb, tu;\n    ret = tsk_table_collection_init(&ta, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ta.sequence_length = 1;\n    ret = tsk_table_collection_init(&tb, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tb.sequence_length = 1;\n    ret = tsk_table_collection_init(&tu, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tu.sequence_length = 1;\n\n    ret_id = tsk_node_table_add_row(\n        &tu.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node u0\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &ta.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node a0 = u0\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tu.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node u1\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tb.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node b0 = u1\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tu.nodes, 0, 1, TSK_NULL, TSK_NULL, NULL, 0); // node u2\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tu.edges, 0, 1, 2, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tu.edges, 0, 1, 2, 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &ta.nodes, 0, 1, TSK_NULL, TSK_NULL, NULL, 0); // node a1 = u2\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&ta.edges, 0, 1, 1, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tb.nodes, 0, 1, TSK_NULL, TSK_NULL, NULL, 0); // node b1 = u2\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tb.edges, 0, 1, 1, 0, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tu.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node u3\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tu.edges, 0, 0.5, 3, 2, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &ta.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node a2 = u3\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&ta.edges, 0, 0.5, 2, 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tu.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node u4\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tu.edges, 0.5, 1, 4, 2, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_node_table_add_row(\n        &tb.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node b2 = u4\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tb.edges, 0.5, 1, 2, 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n\n    ret_id = tsk_site_table_add_row(&ta.sites, 0.25, \"A\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&ta.sites, 0.75, \"X\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tb.sites, 0.25, \"A\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tb.sites, 0.75, \"X\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tu.sites, 0.25, \"A\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tu.sites, 0.75, \"X\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n\n    ret_id = tsk_mutation_table_add_row(\n        &tu.mutations, 0, 3, TSK_NULL, 3.5, \"B\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &ta.mutations, 0, 2, TSK_NULL, 3.5, \"B\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tu.mutations, 0, 2, TSK_NULL, 1.5, \"D\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &ta.mutations, 0, 1, TSK_NULL, 1.5, \"D\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tb.mutations, 0, 1, TSK_NULL, 1.5, \"D\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tu.mutations, 0, 2, TSK_NULL, 1.2, \"E\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &ta.mutations, 0, 1, TSK_NULL, 1.2, \"E\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tb.mutations, 0, 1, TSK_NULL, 1.2, \"E\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tu.mutations, 0, 0, TSK_NULL, 0.5, \"C\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &ta.mutations, 0, 0, TSK_NULL, 0.5, \"C\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tu.mutations, 1, 4, TSK_NULL, 2.4, \"Y\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tb.mutations, 1, 2, TSK_NULL, 2.4, \"Y\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tu.mutations, 1, 1, TSK_NULL, 0.4, \"Z\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tb.mutations, 1, 0, TSK_NULL, 0.4, \"Z\", 1, NULL, 0);\n    CU_ASSERT(ret_id >= 0);\n\n    ret = tsk_table_collection_build_index(&ta, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&ta, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_build_index(&tb, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&tb, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_build_index(&tu, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&tu, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    ret = tsk_table_collection_union(&ta, &tb, node_mapping, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_table_collection_subset(&ta, node_order, 5, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_provenance_table_clear(&ta.provenances);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_FATAL(tsk_table_collection_equals(&tu, &ta, 0));\n\n    tsk_table_collection_free(&ta);\n    tsk_table_collection_free(&tb);\n    tsk_table_collection_free(&tu);\n}\n\nstatic void\ntest_table_collection_union_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_table_collection_t tables_copy;\n    tsk_id_t node_mapping[] = { 0, 1 };\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret = tsk_table_collection_init(&tables_copy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    // two nodes, two pop, two ind, one edge, one site, one mut\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    // trigger diff histories error\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables_copy.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_DIFF_HISTORIES);\n\n    // Migrations are not supported\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_migration_table_add_row(&tables_copy.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(tables_copy.migrations.num_rows, 1);\n    ret = tsk_table_collection_union(\n        &tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n\n    // test out of bounds node_mapping\n    node_mapping[0] = -4;\n    node_mapping[1] = 6;\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_BAD_MAP);\n\n    // check integrity\n    node_mapping[0] = 0;\n    node_mapping[1] = 1;\n    ret_id = tsk_node_table_add_row(\n        &tables_copy.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret = tsk_table_collection_union(&tables, &tables_copy, node_mapping, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n\n    tsk_table_collection_free(&tables_copy);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_clear_with_options(tsk_flags_t options)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    bool clear_provenance = !!(options & TSK_CLEAR_PROVENANCE);\n    bool clear_metadata_schemas = !!(options & TSK_CLEAR_METADATA_SCHEMAS);\n    bool clear_ts_metadata = !!(options & TSK_CLEAR_TS_METADATA_AND_SCHEMA);\n    tsk_bookmark_t num_rows;\n    tsk_bookmark_t expected_rows = { .provenances = clear_provenance ? 0 : 1 };\n    tsk_size_t expected_len = clear_metadata_schemas ? 0 : 4;\n    tsk_size_t expected_len_ts = clear_ts_metadata ? 0 : 4;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id\n        = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0.2, \"A\", 1, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n    ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_individual_table_set_metadata_schema(&tables.individuals, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_node_table_set_metadata_schema(&tables.nodes, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_edge_table_set_metadata_schema(&tables.edges, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_migration_table_set_metadata_schema(&tables.migrations, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_site_table_set_metadata_schema(&tables.sites, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_mutation_table_set_metadata_schema(&tables.mutations, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_population_table_set_metadata_schema(&tables.populations, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_set_time_units(&tables, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(&tables, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata_schema(&tables, \"test\", 4);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, \"today\", 5, \"test\", 4);\n    CU_ASSERT_FATAL(ret_id >= 0);\n\n    ret = tsk_table_collection_clear(&tables, options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_record_num_rows(&tables, &num_rows);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(num_rows.individuals, expected_rows.individuals);\n    CU_ASSERT_EQUAL(num_rows.nodes, expected_rows.nodes);\n    CU_ASSERT_EQUAL(num_rows.edges, expected_rows.edges);\n    CU_ASSERT_EQUAL(num_rows.migrations, expected_rows.migrations);\n    CU_ASSERT_EQUAL(num_rows.sites, expected_rows.sites);\n    CU_ASSERT_EQUAL(num_rows.mutations, expected_rows.mutations);\n    CU_ASSERT_EQUAL(num_rows.populations, expected_rows.populations);\n    CU_ASSERT_EQUAL(num_rows.provenances, expected_rows.provenances);\n\n    CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));\n\n    CU_ASSERT_EQUAL(tables.individuals.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.nodes.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.edges.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.migrations.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.sites.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.mutations.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.populations.metadata_schema_length, expected_len);\n    CU_ASSERT_EQUAL(tables.metadata_schema_length, expected_len_ts);\n    CU_ASSERT_EQUAL(tables.metadata_length, expected_len_ts);\n    CU_ASSERT_EQUAL(tables.time_units_length, 4);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_table_collection_clear(void)\n{\n    test_table_collection_clear_with_options(0);\n    test_table_collection_clear_with_options(TSK_CLEAR_PROVENANCE);\n    test_table_collection_clear_with_options(TSK_CLEAR_METADATA_SCHEMAS);\n    test_table_collection_clear_with_options(TSK_CLEAR_TS_METADATA_AND_SCHEMA);\n    test_table_collection_clear_with_options(\n        TSK_CLEAR_PROVENANCE | TSK_CLEAR_METADATA_SCHEMAS);\n    test_table_collection_clear_with_options(\n        TSK_CLEAR_PROVENANCE | TSK_CLEAR_TS_METADATA_AND_SCHEMA);\n    test_table_collection_clear_with_options(\n        TSK_CLEAR_METADATA_SCHEMAS | TSK_CLEAR_TS_METADATA_AND_SCHEMA);\n    test_table_collection_clear_with_options(TSK_CLEAR_PROVENANCE\n                                             | TSK_CLEAR_METADATA_SCHEMAS\n                                             | TSK_CLEAR_TS_METADATA_AND_SCHEMA);\n}\n\nstatic void\ntest_table_collection_takeset_indexes(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t1, t2;\n    tsk_id_t *ins;\n    tsk_id_t *rem;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ins = tsk_malloc(t1.edges.num_rows * sizeof(*ins));\n    CU_ASSERT_FATAL(ins != NULL);\n    rem = tsk_malloc(t1.edges.num_rows * sizeof(*rem));\n    CU_ASSERT_FATAL(rem != NULL);\n    memcpy(ins, t1.indexes.edge_insertion_order,\n        (size_t) (t1.edges.num_rows * sizeof(*ins)));\n    memcpy(\n        rem, t1.indexes.edge_removal_order, (size_t) (t1.edges.num_rows * sizeof(*rem)));\n\n    ret = tsk_table_collection_copy(&t1, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_drop_index(&t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_takeset_indexes(&t2, ins, rem);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(\n        tsk_memcmp(t1.indexes.edge_insertion_order, t2.indexes.edge_insertion_order,\n            t1.edges.num_rows * sizeof(*ins)),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(t1.indexes.edge_removal_order,\n                        t2.indexes.edge_removal_order, t1.edges.num_rows * sizeof(*rem)),\n        0);\n\n    ret = tsk_table_collection_takeset_indexes(&t2, ins, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_table_collection_takeset_indexes(&t2, NULL, rem);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_table_collection_delete_older(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t;\n\n    const char *mutations = \"0      2   1   -1\\n\"\n                            \"0      2   0   0\\n\"\n                            \"1      0   1   -1\\n\"\n                            \"2      5   1   -1\\n\";\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    /* Add some migrations */\n    tsk_population_table_add_row(&t.populations, NULL, 0);\n    tsk_population_table_add_row(&t.populations, NULL, 0);\n    tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 0, 1, 0.05, NULL, 0);\n    tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 1, 0, 0.09, NULL, 0);\n    tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 0, 1, 0.10, NULL, 0);\n    CU_ASSERT_EQUAL(t.migrations.num_rows, 3);\n\n    /* Note: trees 1 and 2 are identical now\n     *\n    0.09┊    5    ┊     5   ┊     5   ┊\n        ┊   ┏┻┓   ┊   ┏━┻┓  ┊   ┏━┻┓  ┊\n    0.07┊   ┃ ┃   ┊   ┃  4  ┊   ┃  4  ┊\n        ┊   ┃ ┃   ┊   ┃ ┏┻┓ ┊   ┃ ┏┻┓ ┊\n    0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n      0.00      2.00      7.00      10.00\n    */\n\n    ret = tsk_table_collection_delete_older(&t, 0.09, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_init(&ts, &t, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 9);\n    /* Lost the mutation over 5 */\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);\n    /* We delete the migration at exactly 0.09. */\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_migrations(&ts), 1);\n\n    tsk_table_collection_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        { \"test_node_table\", test_node_table },\n        { \"test_node_table_update_row\", test_node_table_update_row },\n        { \"test_node_table_keep_rows\", test_node_table_keep_rows },\n        { \"test_node_table_takeset\", test_node_table_takeset },\n        { \"test_edge_table\", test_edge_table },\n        { \"test_edge_table_update_row\", test_edge_table_update_row },\n        { \"test_edge_table_update_row_no_metadata\",\n            test_edge_table_update_row_no_metadata },\n        { \"test_edge_table_keep_rows\", test_edge_table_keep_rows },\n        { \"test_edge_table_keep_rows_no_metadata\",\n            test_edge_table_keep_rows_no_metadata },\n        { \"test_edge_table_takeset\", test_edge_table_takeset },\n        { \"test_edge_table_copy_semantics\", test_edge_table_copy_semantics },\n        { \"test_edge_table_squash\", test_edge_table_squash },\n        { \"test_edge_table_squash_multiple_parents\",\n            test_edge_table_squash_multiple_parents },\n        { \"test_edge_table_squash_empty\", test_edge_table_squash_empty },\n        { \"test_edge_table_squash_single_edge\", test_edge_table_squash_single_edge },\n        { \"test_edge_table_squash_bad_intervals\", test_edge_table_squash_bad_intervals },\n        { \"test_edge_table_squash_metadata\", test_edge_table_squash_metadata },\n        { \"test_site_table\", test_site_table },\n        { \"test_site_table_update_row\", test_site_table_update_row },\n        { \"test_site_table_keep_rows\", test_site_table_keep_rows },\n        { \"test_site_table_takeset\", test_site_table_takeset },\n        { \"test_mutation_table\", test_mutation_table },\n        { \"test_mutation_table_update_row\", test_mutation_table_update_row },\n        { \"test_mutation_table_takeset\", test_mutation_table_takeset },\n        { \"test_mutation_table_keep_rows\", test_mutation_table_keep_rows },\n        { \"test_mutation_table_keep_rows_parent_references\",\n            test_mutation_table_keep_rows_parent_references },\n        { \"test_migration_table\", test_migration_table },\n        { \"test_migration_table_update_row\", test_migration_table_update_row },\n        { \"test_migration_table_keep_rows\", test_migration_table_keep_rows },\n        { \"test_migration_table_takeset\", test_migration_table_takeset },\n        { \"test_individual_table\", test_individual_table },\n        { \"test_individual_table_takeset\", test_individual_table_takeset },\n        { \"test_individual_table_update_row\", test_individual_table_update_row },\n        { \"test_individual_table_keep_rows\", test_individual_table_keep_rows },\n        { \"test_individual_table_keep_rows_parent_references\",\n            test_individual_table_keep_rows_parent_references },\n        { \"test_population_table\", test_population_table },\n        { \"test_population_table_update_row\", test_population_table_update_row },\n        { \"test_population_table_keep_rows\", test_population_table_keep_rows },\n        { \"test_population_table_takeset\", test_population_table_takeset },\n        { \"test_provenance_table\", test_provenance_table },\n        { \"test_provenance_table_update_row\", test_provenance_table_update_row },\n        { \"test_provenance_table_keep_rows\", test_provenance_table_keep_rows },\n        { \"test_provenance_table_takeset\", test_provenance_table_takeset },\n        { \"test_table_size_increments\", test_table_size_increments },\n        { \"test_table_expansion\", test_table_expansion },\n        { \"test_ragged_expansion\", test_ragged_expansion },\n        { \"test_table_collection_equals_options\", test_table_collection_equals_options },\n        { \"test_table_collection_simplify_errors\",\n            test_table_collection_simplify_errors },\n        { \"test_table_collection_time_units\", test_table_collection_time_units },\n        { \"test_table_collection_reference_sequence\",\n            test_table_collection_reference_sequence },\n        { \"test_table_collection_has_reference_sequence\",\n            test_table_collection_has_reference_sequence },\n        { \"test_table_collection_metadata\", test_table_collection_metadata },\n        { \"test_reference_sequence_state_machine\",\n            test_reference_sequence_state_machine },\n        { \"test_reference_sequence_take\", test_reference_sequence_take },\n        { \"test_reference_sequence\", test_reference_sequence },\n\n        { \"test_simplify_tables_drops_indexes\", test_simplify_tables_drops_indexes },\n        { \"test_simplify_empty_tables\", test_simplify_empty_tables },\n        { \"test_simplify_metadata\", test_simplify_metadata },\n        { \"test_link_ancestors_no_edges\", test_link_ancestors_no_edges },\n        { \"test_link_ancestors_input_errors\", test_link_ancestors_input_errors },\n        { \"test_link_ancestors_single_tree\", test_link_ancestors_single_tree },\n        { \"test_link_ancestors_paper\", test_link_ancestors_paper },\n        { \"test_link_ancestors_samples_and_ancestors_overlap\",\n            test_link_ancestors_samples_and_ancestors_overlap },\n        { \"test_link_ancestors_multiple_to_single_tree\",\n            test_link_ancestors_multiple_to_single_tree },\n        { \"test_ibd_segments_debug\", test_ibd_segments_debug },\n        { \"test_ibd_segments_caterpillar_tree\", test_ibd_segments_caterpillar_tree },\n        { \"test_ibd_segments_single_tree\", test_ibd_segments_single_tree },\n        { \"test_ibd_segments_single_tree_options\",\n            test_ibd_segments_single_tree_options },\n        { \"test_ibd_segments_multiple_trees\", test_ibd_segments_multiple_trees },\n        { \"test_ibd_segments_empty_result\", test_ibd_segments_empty_result },\n        { \"test_ibd_segments_min_span_max_time\", test_ibd_segments_min_span_max_time },\n        { \"test_ibd_segments_single_tree_between\",\n            test_ibd_segments_single_tree_between },\n        { \"test_ibd_segments_samples_are_descendants\",\n            test_ibd_segments_samples_are_descendants },\n        { \"test_ibd_segments_multiple_ibd_paths\", test_ibd_segments_multiple_ibd_paths },\n        { \"test_ibd_segments_odd_topologies\", test_ibd_segments_odd_topologies },\n        { \"test_ibd_segments_errors\", test_ibd_segments_errors },\n        { \"test_sorter_interface\", test_sorter_interface },\n        { \"test_sort_tables_canonical_errors\", test_sort_tables_canonical_errors },\n        { \"test_sort_tables_canonical\", test_sort_tables_canonical },\n        { \"test_sort_tables_drops_indexes\", test_sort_tables_drops_indexes },\n        { \"test_sort_tables_edge_metadata\", test_sort_tables_edge_metadata },\n        { \"test_sort_tables_errors\", test_sort_tables_errors },\n        { \"test_sort_tables_individuals\", test_sort_tables_individuals },\n        { \"test_sort_tables_mutation_times\", test_sort_tables_mutation_times },\n        { \"test_sort_tables_mutations\", test_sort_tables_mutations },\n        { \"test_sort_tables_migrations\", test_sort_tables_migrations },\n        { \"test_sort_tables_no_edge_metadata\", test_sort_tables_no_edge_metadata },\n        { \"test_sort_tables_offsets\", test_sort_tables_offsets },\n        { \"test_edge_update_invalidates_index\", test_edge_update_invalidates_index },\n        { \"test_copy_table_collection\", test_copy_table_collection },\n        { \"test_dump_unindexed\", test_dump_unindexed },\n        { \"test_dump_load_empty\", test_dump_load_empty },\n        { \"test_dump_load_unsorted\", test_dump_load_unsorted },\n        { \"test_dump_load_metadata_schema\", test_dump_load_metadata_schema },\n        { \"test_dump_fail_no_file\", test_dump_fail_no_file },\n        { \"test_load_reindex\", test_load_reindex },\n        { \"test_table_overflow\", test_table_overflow },\n        { \"test_column_overflow\", test_column_overflow },\n        { \"test_table_collection_check_integrity\",\n            test_table_collection_check_integrity },\n        { \"test_table_collection_check_integrity_no_populations\",\n            test_table_collection_check_integrity_no_populations },\n        { \"test_table_collection_check_integrity_bad_indexes_example\",\n            test_table_collection_check_integrity_bad_indexes_example },\n        { \"test_table_collection_check_integrity_bad_indexes\",\n            test_table_collection_check_integrity_bad_indexes },\n        { \"test_check_integrity_bad_mutation_parent_topology\",\n            test_check_integrity_bad_mutation_parent_topology },\n        { \"test_table_collection_compute_mutation_parents_tolerates_invalid_input\",\n            test_table_collection_compute_mutation_parents_tolerates_invalid_input },\n        { \"test_table_collection_compute_mutation_parents_restores_on_error\",\n            test_table_collection_compute_mutation_parents_restores_on_error },\n        { \"test_table_collection_subset\", test_table_collection_subset },\n        { \"test_table_collection_subset_unsorted\",\n            test_table_collection_subset_unsorted },\n        { \"test_table_collection_subset_errors\", test_table_collection_subset_errors },\n        { \"test_table_collection_union\", test_table_collection_union },\n        { \"test_table_collection_disjoint_union\", test_table_collection_disjoint_union },\n        { \"test_table_collection_union_middle_merge\",\n            test_table_collection_union_middle_merge },\n        { \"test_table_collection_union_errors\", test_table_collection_union_errors },\n        { \"test_table_collection_clear\", test_table_collection_clear },\n        { \"test_table_collection_takeset_indexes\",\n            test_table_collection_takeset_indexes },\n        { \"test_table_collection_delete_older\", test_table_collection_delete_older },\n        { NULL, NULL },\n    };\n\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/test_trees.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n#include <tskit/trees.h>\n#include <tskit/genotypes.h>\n\n#include <unistd.h>\n#include <stdlib.h>\n\n/*=======================================================\n * Verification utilities.\n *======================================================*/\n\n/* Checks if the specified trees are topologically equivalent, i.e, represent\n * the same tree without checking state specific to seeking.*/\nstatic void\ncheck_trees_equal(tsk_tree_t *self, tsk_tree_t *other)\n{\n    tsk_size_t N = self->num_nodes;\n\n    CU_ASSERT_FATAL(self->tree_sequence == other->tree_sequence);\n    CU_ASSERT_FATAL(self->index == other->index);\n    CU_ASSERT_FATAL(self->interval.left == other->interval.left);\n    CU_ASSERT_FATAL(self->interval.right == other->interval.right);\n    CU_ASSERT_FATAL(self->sites_length == other->sites_length);\n    CU_ASSERT_FATAL(self->sites == other->sites);\n    CU_ASSERT_FATAL(self->samples == other->samples);\n    CU_ASSERT_FATAL(self->num_edges == other->num_edges);\n    CU_ASSERT_FATAL(tsk_memcmp(self->parent, other->parent, N * sizeof(tsk_id_t)) == 0);\n    CU_ASSERT_FATAL(tsk_tree_equals(self, other));\n}\n\nstatic void\ncheck_trees_identical(tsk_tree_t *self, tsk_tree_t *other)\n{\n    tsk_size_t N = self->num_nodes;\n\n    check_trees_equal(self, other);\n    CU_ASSERT_FATAL(self->left_index == other->left_index);\n    CU_ASSERT_FATAL(self->right_index == other->right_index);\n    CU_ASSERT_FATAL(self->direction == other->direction);\n\n    CU_ASSERT_FATAL(\n        tsk_memcmp(self->left_child, other->left_child, N * sizeof(tsk_id_t)) == 0);\n    CU_ASSERT_FATAL(\n        tsk_memcmp(self->right_child, other->right_child, N * sizeof(tsk_id_t)) == 0);\n    CU_ASSERT_FATAL(\n        tsk_memcmp(self->left_sib, other->left_sib, N * sizeof(tsk_id_t)) == 0);\n    CU_ASSERT_FATAL(\n        tsk_memcmp(self->right_sib, other->right_sib, N * sizeof(tsk_id_t)) == 0);\n    CU_ASSERT_FATAL(\n        tsk_memcmp(self->num_children, other->num_children, N * sizeof(tsk_id_t)) == 0);\n    CU_ASSERT_FATAL(tsk_memcmp(self->edge, other->edge, N * sizeof(tsk_id_t)) == 0);\n\n    CU_ASSERT_EQUAL_FATAL(self->num_samples == NULL, other->num_samples == NULL)\n    CU_ASSERT_EQUAL_FATAL(\n        self->num_tracked_samples == NULL, other->num_tracked_samples == NULL)\n    if (self->num_samples != NULL) {\n        CU_ASSERT_FATAL(tsk_memcmp(self->num_samples, other->num_samples,\n                            N * sizeof(*self->num_samples))\n                        == 0);\n        CU_ASSERT_FATAL(tsk_memcmp(self->num_tracked_samples, other->num_tracked_samples,\n                            N * sizeof(*self->num_tracked_samples))\n                        == 0);\n    }\n\n    CU_ASSERT_EQUAL_FATAL(self->left_sample == NULL, other->left_sample == NULL)\n    CU_ASSERT_EQUAL_FATAL(self->right_sample == NULL, other->left_sample == NULL)\n    CU_ASSERT_EQUAL_FATAL(self->next_sample == NULL, other->next_sample == NULL)\n    if (self->left_sample != NULL) {\n        CU_ASSERT_FATAL(tsk_memcmp(self->left_sample, other->left_sample,\n                            N * sizeof(*self->left_sample))\n                        == 0);\n        CU_ASSERT_FATAL(tsk_memcmp(self->right_sample, other->right_sample,\n                            N * sizeof(*self->right_sample))\n                        == 0);\n        CU_ASSERT_FATAL(\n            tsk_memcmp(self->next_sample, other->next_sample,\n                self->tree_sequence->num_samples * sizeof(*self->next_sample))\n            == 0);\n    }\n}\n\nstatic void\nverify_compute_mutation_parents(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_size_t size = tsk_treeseq_get_num_mutations(ts) * sizeof(tsk_id_t);\n    tsk_id_t *parent = tsk_malloc(size);\n    tsk_table_collection_t tables;\n\n    CU_ASSERT_FATAL(parent != NULL);\n    ret = tsk_treeseq_copy_tables(ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_memcpy(parent, tables.mutations.parent, size);\n    /* tsk_table_collection_print_state(&tables, stdout); */\n    /* Make sure the tables are actually updated */\n    tsk_memset(tables.mutations.parent, 0xff, size);\n\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_memcmp(parent, tables.mutations.parent, size), 0);\n    /* printf(\"after\\n\"); */\n    /* tsk_table_collection_print_state(&tables, stdout); */\n\n    free(parent);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\nverify_compute_mutation_times(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_size_t j;\n    tsk_size_t size = tsk_treeseq_get_num_mutations(ts) * sizeof(tsk_id_t);\n    tsk_id_t *time = tsk_malloc(size);\n    tsk_table_collection_t tables;\n\n    CU_ASSERT_FATAL(time != NULL);\n    ret = tsk_treeseq_copy_tables(ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_memcpy(time, tables.mutations.time, size);\n    /* Time should be set to TSK_UNKNOWN_TIME before computing */\n    for (j = 0; j < size; j++) {\n        tables.mutations.time[j] = TSK_UNKNOWN_TIME;\n    }\n\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_memcmp(time, tables.mutations.time, size), 0);\n\n    free(time);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\nverify_individual_nodes(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_individual_t individual;\n    tsk_id_t k;\n    tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts);\n    tsk_size_t num_individuals = tsk_treeseq_get_num_individuals(ts);\n    tsk_size_t j;\n\n    for (k = 0; k < (tsk_id_t) num_individuals; k++) {\n        ret = tsk_treeseq_get_individual(ts, k, &individual);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        for (j = 0; j < individual.nodes_length; j++) {\n            CU_ASSERT_FATAL(individual.nodes[j] < (tsk_id_t) num_nodes);\n            CU_ASSERT_EQUAL_FATAL(k, ts->tables->nodes.individual[individual.nodes[j]]);\n        }\n    }\n}\n\nstatic void\nverify_tree_pos(const tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *tree_parents)\n{\n    int ret;\n    const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);\n    const tsk_id_t *edges_parent = ts->tables->edges.parent;\n    const tsk_id_t *edges_child = ts->tables->edges.child;\n    const double *restrict edges_left = ts->tables->edges.left;\n    const double *restrict edges_right = ts->tables->edges.right;\n    tsk_tree_position_t tree_pos;\n    tsk_id_t *known_parent;\n    tsk_id_t *parent = tsk_malloc(N * sizeof(*parent));\n    tsk_id_t u, index, j, e;\n    bool valid;\n\n    CU_ASSERT_FATAL(parent != NULL);\n\n    ret = tsk_tree_position_init(&tree_pos, ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (u = 0; u < (tsk_id_t) N; u++) {\n        parent[u] = TSK_NULL;\n    }\n\n    for (index = 0; index < (tsk_id_t) num_trees; index++) {\n        known_parent = tree_parents + N * (tsk_size_t) index;\n\n        valid = tsk_tree_position_next(&tree_pos);\n        CU_ASSERT_TRUE(valid);\n        CU_ASSERT_EQUAL(index, tree_pos.index);\n\n        for (j = tree_pos.out.start; j < tree_pos.out.stop; j++) {\n            e = tree_pos.out.order[j];\n            parent[edges_child[e]] = TSK_NULL;\n        }\n\n        for (j = tree_pos.in.start; j < tree_pos.in.stop; j++) {\n            e = tree_pos.in.order[j];\n            parent[edges_child[e]] = edges_parent[e];\n        }\n\n        for (u = 0; u < (tsk_id_t) N; u++) {\n            CU_ASSERT_EQUAL(parent[u], known_parent[u]);\n        }\n    }\n\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FALSE(valid);\n    for (j = tree_pos.out.start; j < tree_pos.out.stop; j++) {\n        e = tree_pos.out.order[j];\n        parent[edges_child[e]] = TSK_NULL;\n    }\n    for (u = 0; u < (tsk_id_t) N; u++) {\n        CU_ASSERT_EQUAL(parent[u], TSK_NULL);\n    }\n\n    for (index = (tsk_id_t) num_trees - 1; index >= 0; index--) {\n        known_parent = tree_parents + N * (tsk_size_t) index;\n\n        valid = tsk_tree_position_prev(&tree_pos);\n        CU_ASSERT_TRUE(valid);\n        CU_ASSERT_EQUAL(index, tree_pos.index);\n\n        for (j = tree_pos.out.start; j > tree_pos.out.stop; j--) {\n            e = tree_pos.out.order[j];\n            parent[edges_child[e]] = TSK_NULL;\n        }\n\n        for (j = tree_pos.in.start; j > tree_pos.in.stop; j--) {\n            CU_ASSERT_FATAL(j >= 0);\n            e = tree_pos.in.order[j];\n            parent[edges_child[e]] = edges_parent[e];\n        }\n\n        for (u = 0; u < (tsk_id_t) N; u++) {\n            CU_ASSERT_EQUAL(parent[u], known_parent[u]);\n        }\n    }\n\n    valid = tsk_tree_position_prev(&tree_pos);\n    CU_ASSERT_FALSE(valid);\n    for (j = tree_pos.out.start; j > tree_pos.out.stop; j--) {\n        e = tree_pos.out.order[j];\n        parent[edges_child[e]] = TSK_NULL;\n    }\n    for (u = 0; u < (tsk_id_t) N; u++) {\n        CU_ASSERT_EQUAL(parent[u], TSK_NULL);\n    }\n\n    for (index = 0; index < (tsk_id_t) num_trees; index++) {\n        known_parent = tree_parents + N * (tsk_size_t) index;\n        ret = tsk_tree_position_init(&tree_pos, ts, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        ret = tsk_tree_position_seek_forward(&tree_pos, index);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(index, tree_pos.index);\n\n        for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {\n            e = tree_pos.in.order[j];\n            if (edges_left[e] <= tree_pos.interval.left\n                && tree_pos.interval.left < edges_right[e]) {\n                parent[edges_child[e]] = edges_parent[e];\n            }\n        }\n        for (u = 0; u < (tsk_id_t) N; u++) {\n            CU_ASSERT_EQUAL(parent[u], known_parent[u]);\n        }\n\n        tsk_tree_position_free(&tree_pos);\n        for (u = 0; u < (tsk_id_t) N; u++) {\n            parent[u] = TSK_NULL;\n        }\n    }\n\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FALSE(valid);\n\n    for (index = (tsk_id_t) num_trees - 1; index >= 0; index--) {\n        known_parent = tree_parents + N * (tsk_size_t) index;\n        ret = tsk_tree_position_init(&tree_pos, ts, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        ret = tsk_tree_position_seek_backward(&tree_pos, index);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(index, tree_pos.index);\n\n        for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {\n            e = tree_pos.in.order[j];\n            if (edges_right[e] >= tree_pos.interval.right\n                && tree_pos.interval.right > edges_left[e]) {\n                parent[edges_child[e]] = edges_parent[e];\n            }\n        }\n\n        for (u = 0; u < (tsk_id_t) N; u++) {\n            CU_ASSERT_EQUAL(parent[u], known_parent[u]);\n        }\n\n        for (u = 0; u < (tsk_id_t) N; u++) {\n            parent[u] = TSK_NULL;\n        }\n        tsk_tree_position_free(&tree_pos);\n    }\n\n    tsk_safe_free(parent);\n}\n\nstatic void\nverify_trees(tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *parents)\n{\n    int ret;\n    tsk_id_t u, j, v;\n    uint32_t mutation_index, site_index;\n    tsk_size_t k, l, tree_sites_length;\n    const tsk_site_t *sites = NULL;\n    tsk_tree_t tree, skip_tree;\n    tsk_size_t num_edges;\n    tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts);\n    tsk_size_t num_sites = tsk_treeseq_get_num_sites(ts);\n    tsk_size_t num_mutations = tsk_treeseq_get_num_mutations(ts);\n    const double *breakpoints = tsk_treeseq_get_breakpoints(ts);\n\n    ret = tsk_tree_init(&tree, ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_init(&skip_tree, ts, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(ts), num_trees);\n\n    CU_ASSERT_EQUAL(tree.index, -1);\n    site_index = 0;\n    mutation_index = 0;\n    j = 0;\n    for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {\n        CU_ASSERT_EQUAL(j, (tsk_id_t) tree.index);\n        tsk_tree_print_state(&tree, _devnull);\n        /* tsk_tree_print_state(&tree, stdout); */\n        CU_ASSERT_EQUAL(tree.interval.left, breakpoints[j]);\n        num_edges = 0;\n        for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n            ret = tsk_tree_get_parent(&tree, u, &v);\n            CU_ASSERT_EQUAL(ret, 0);\n            CU_ASSERT_EQUAL(v, parents[j * (tsk_id_t) num_nodes + u]);\n            if (v != TSK_NULL) {\n                num_edges++;\n            }\n        }\n        CU_ASSERT_EQUAL(num_edges, tree.num_edges);\n        ret = tsk_tree_get_sites(&tree, &sites, &tree_sites_length);\n        CU_ASSERT_EQUAL(ret, 0);\n        for (k = 0; k < tree_sites_length; k++) {\n            CU_ASSERT_EQUAL(sites[k].id, (tsk_id_t) site_index);\n            for (l = 0; l < sites[k].mutations_length; l++) {\n                CU_ASSERT_EQUAL(sites[k].mutations[l].id, (tsk_id_t) mutation_index);\n                CU_ASSERT_EQUAL(sites[k].mutations[l].site, (tsk_id_t) site_index);\n                mutation_index++;\n            }\n            site_index++;\n        }\n        /* Check the skip tree */\n        ret = tsk_tree_first(&skip_tree);\n        CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n        ret = tsk_tree_seek(&skip_tree, breakpoints[j], TSK_SEEK_SKIP);\n        CU_ASSERT_EQUAL(ret, 0);\n        /* Calling print_state here also verifies the integrity of the tree */\n        tsk_tree_print_state(&skip_tree, _devnull);\n        check_trees_equal(&tree, &skip_tree);\n        ret = tsk_tree_last(&skip_tree);\n        CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n        ret = tsk_tree_seek(&skip_tree, breakpoints[j], TSK_SEEK_SKIP);\n        CU_ASSERT_EQUAL(ret, 0);\n        tsk_tree_print_state(&skip_tree, _devnull);\n        check_trees_equal(&tree, &skip_tree);\n\n        j++;\n    }\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(site_index, num_sites);\n    CU_ASSERT_EQUAL(mutation_index, num_mutations);\n    CU_ASSERT_EQUAL(tree.index, -1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(ts), breakpoints[j]);\n\n    tsk_tree_free(&tree);\n    tsk_tree_free(&skip_tree);\n\n    verify_tree_pos(ts, num_trees, parents);\n}\n\nstatic tsk_tree_t *\nget_tree_list(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_tree_t t, *trees;\n    tsk_size_t num_trees;\n\n    num_trees = tsk_treeseq_get_num_trees(ts);\n    ret = tsk_tree_init(&t, ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    trees = tsk_malloc(num_trees * sizeof(tsk_tree_t));\n    CU_ASSERT_FATAL(trees != NULL);\n    for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {\n        CU_ASSERT_FATAL(t.index < (tsk_id_t) num_trees);\n        ret = tsk_tree_copy(&t, &trees[t.index], 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        check_trees_equal(&trees[t.index], &t);\n        /* Make sure the left and right coordinates are also OK */\n        CU_ASSERT_EQUAL(trees[t.index].interval.left, t.interval.left);\n        CU_ASSERT_EQUAL(trees[t.index].interval.right, t.interval.right);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_free(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    return trees;\n}\n\nstatic void\nverify_tree_next_prev(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_tree_t *trees, t;\n    tsk_id_t j;\n    tsk_id_t num_trees = (tsk_id_t) tsk_treeseq_get_num_trees(ts);\n\n    trees = get_tree_list(ts);\n    ret = tsk_tree_init(&t, ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Single forward pass */\n    j = 0;\n    for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {\n        CU_ASSERT_EQUAL_FATAL(j, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j++;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, num_trees);\n\n    /* Single reverse pass */\n    j = num_trees;\n    for (ret = tsk_tree_last(&t); ret == TSK_TREE_OK; ret = tsk_tree_prev(&t)) {\n        CU_ASSERT_EQUAL_FATAL(j - 1, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j--;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, 0);\n\n    /* Full forward, then reverse */\n    j = 0;\n    for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {\n        CU_ASSERT_EQUAL_FATAL(j, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j++;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, num_trees);\n    while ((ret = tsk_tree_prev(&t)) == TSK_TREE_OK) {\n        CU_ASSERT_EQUAL_FATAL(j - 1, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j--;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, 0);\n    CU_ASSERT_EQUAL_FATAL(t.index, -1);\n\n    /* Full reverse then forward */\n    j = num_trees;\n    for (ret = tsk_tree_last(&t); ret == TSK_TREE_OK; ret = tsk_tree_prev(&t)) {\n        CU_ASSERT_EQUAL_FATAL(j - 1, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j--;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, 0);\n    while ((ret = tsk_tree_next(&t)) == TSK_TREE_OK) {\n        CU_ASSERT_EQUAL_FATAL(j, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j++;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, num_trees);\n    CU_ASSERT_EQUAL_FATAL(t.index, -1);\n\n    /* Do a zigzagging traversal */\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    for (j = 1; j < TSK_MIN(10, num_trees / 2); j++) {\n        while (t.index < num_trees - j) {\n            ret = tsk_tree_next(&t);\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n        }\n        CU_ASSERT_EQUAL_FATAL(t.index, num_trees - j);\n        check_trees_equal(&t, &trees[t.index]);\n        while (t.index > j) {\n            ret = tsk_tree_prev(&t);\n            CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n        }\n        CU_ASSERT_EQUAL_FATAL(t.index, j);\n        check_trees_equal(&t, &trees[t.index]);\n    }\n\n    ret = tsk_tree_clear(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Calling next() on a cleared tree should be the same as first() */\n    j = 0;\n    while ((ret = tsk_tree_next(&t)) == TSK_TREE_OK) {\n        CU_ASSERT_EQUAL_FATAL(j, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j++;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, num_trees);\n\n    ret = tsk_tree_free(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_init(&t, ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* Calling prev() on an uninitialised tree should be the same as last() */\n\n    j = num_trees;\n    while ((ret = tsk_tree_prev(&t)) == TSK_TREE_OK) {\n        CU_ASSERT_EQUAL_FATAL(j - 1, t.index);\n        check_trees_equal(&t, &trees[t.index]);\n        j--;\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(j, 0);\n\n    /* Free the trees. */\n    ret = tsk_tree_free(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < (tsk_id_t) tsk_treeseq_get_num_trees(ts); j++) {\n        tsk_tree_free(&trees[j]);\n    }\n    free(trees);\n}\n\nstatic void\nverify_edge_array_single_tree(\n    tsk_tree_t *tree, tsk_edge_table_t *edge_table, tsk_size_t num_nodes)\n{\n    int ret;\n    tsk_id_t c, edge_id;\n    tsk_edge_t edge;\n    tsk_size_t count_edges = 0;\n\n    for (c = 0; c <= (tsk_id_t) num_nodes; c++) {\n        edge_id = tree->edge[c];\n        if (edge_id == TSK_NULL) {\n            /*c is either (virtual) root,\n            or is not associated with an edge along this tree */\n            CU_ASSERT_EQUAL(tree->parent[c], TSK_NULL);\n        } else {\n            ret = tsk_edge_table_get_row(edge_table, edge_id, &edge);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL(edge.id, edge_id);\n            CU_ASSERT_EQUAL(edge.parent, tree->parent[c]);\n            CU_ASSERT_EQUAL(edge.child, c);\n            count_edges++;\n        }\n    }\n\n    CU_ASSERT_EQUAL(count_edges, tree->num_edges);\n}\n\nstatic void\nverify_edge_array_trees(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_tree_t t;\n    tsk_edge_table_t edge_table;\n    tsk_size_t num_nodes;\n    tsk_id_t c;\n\n    num_nodes = tsk_treeseq_get_num_nodes(ts);\n\n    edge_table = ts->tables->edges;\n    ret = tsk_tree_init(&t, ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* verify initialized edge array */\n    for (c = 0; c <= (tsk_id_t) num_nodes; c++) {\n        CU_ASSERT_EQUAL(t.edge[c], TSK_NULL)\n    }\n    /* verify edge array for each tree in treesequence */\n    for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {\n        verify_edge_array_single_tree(&t, &edge_table, num_nodes);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    /* verify cleared edge array */\n    for (c = 0; c <= (tsk_id_t) num_nodes; c++) {\n        CU_ASSERT_EQUAL(t.edge[c], TSK_NULL)\n    }\n\n    tsk_tree_free(&t);\n}\n\n/* When we keep all sites in simplify, the genotypes for the subset of the\n * samples should be the same as the original */\nstatic void\nverify_simplify_genotypes(tsk_treeseq_t *ts, tsk_treeseq_t *subset,\n    const tsk_id_t *samples, tsk_size_t num_samples)\n{\n    int ret;\n    tsk_size_t m = tsk_treeseq_get_num_sites(ts);\n    tsk_vargen_t vargen, subset_vargen;\n    tsk_variant_t *variant, *subset_variant;\n    tsk_size_t j, k;\n    int32_t a1, a2;\n    const tsk_id_t *sample_index_map;\n\n    sample_index_map = tsk_treeseq_get_sample_index_map(ts);\n\n    /* tsk_treeseq_print_state(ts, stdout); */\n    /* tsk_treeseq_print_state(subset, stdout); */\n\n    ret = tsk_vargen_init(&vargen, ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_vargen_init(\n        &subset_vargen, subset, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(m, tsk_treeseq_get_num_sites(subset));\n\n    for (j = 0; j < m; j++) {\n        ret = tsk_vargen_next(&vargen, &variant);\n        CU_ASSERT_EQUAL_FATAL(ret, 1);\n        ret = tsk_vargen_next(&subset_vargen, &subset_variant);\n        CU_ASSERT_EQUAL_FATAL(ret, 1);\n        CU_ASSERT_EQUAL(variant->site.id, (tsk_id_t) j)\n        CU_ASSERT_EQUAL(subset_variant->site.id, (tsk_id_t) j)\n        CU_ASSERT_EQUAL(variant->site.position, subset_variant->site.position);\n        for (k = 0; k < num_samples; k++) {\n            CU_ASSERT_FATAL(sample_index_map[samples[k]] < (tsk_id_t) ts->num_samples);\n            a1 = variant->genotypes[sample_index_map[samples[k]]];\n            a2 = subset_variant->genotypes[k];\n            /* printf(\"a1 = %d, a2 = %d\\n\", a1, a2); */\n            /* printf(\"k = %d original node = %d \" */\n            /*         \"original_index = %d a1=%.*s a2=%.*s\\n\", */\n            /*         (int) k, samples[k], sample_index_map[samples[k]], */\n            /*         variant->allele_lengths[a1], variant->alleles[a1], */\n            /*         subset_variant->allele_lengths[a2], subset_variant->alleles[a2]);\n             */\n            CU_ASSERT_FATAL(a1 < (int) variant->num_alleles);\n            CU_ASSERT_FATAL(a2 < (int) subset_variant->num_alleles);\n            CU_ASSERT_EQUAL_FATAL(\n                variant->allele_lengths[a1], subset_variant->allele_lengths[a2]);\n            CU_ASSERT_NSTRING_EQUAL_FATAL(variant->alleles[a1],\n                subset_variant->alleles[a2], variant->allele_lengths[a1]);\n        }\n    }\n    tsk_vargen_free(&vargen);\n    tsk_vargen_free(&subset_vargen);\n}\n\nstatic void\nverify_simplify_properties(tsk_treeseq_t *ts, tsk_treeseq_t *subset,\n    const tsk_id_t *samples, tsk_size_t num_samples, tsk_id_t *node_map)\n{\n    int ret;\n    tsk_node_t n1, n2;\n    tsk_tree_t full_tree, subset_tree;\n    const tsk_site_t *tree_sites;\n    tsk_size_t tree_sites_length;\n    uint32_t j, k;\n    tsk_id_t u, mrca1, mrca2;\n    tsk_size_t total_sites;\n\n    CU_ASSERT_EQUAL(\n        tsk_treeseq_get_sequence_length(ts), tsk_treeseq_get_sequence_length(subset));\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(subset), num_samples);\n    CU_ASSERT(tsk_treeseq_get_num_nodes(ts) >= tsk_treeseq_get_num_nodes(subset));\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(subset), num_samples);\n\n    /* Check the sample properties */\n    for (j = 0; j < num_samples; j++) {\n        ret = tsk_treeseq_get_node(ts, samples[j], &n1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(node_map[samples[j]], (tsk_id_t) j);\n        ret = tsk_treeseq_get_node(subset, node_map[samples[j]], &n2);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(n1.population, n2.population);\n        CU_ASSERT_EQUAL_FATAL(n1.time, n2.time);\n        CU_ASSERT_EQUAL_FATAL(n1.flags, n2.flags);\n        CU_ASSERT_EQUAL_FATAL(n1.metadata_length, n2.metadata_length);\n        CU_ASSERT_NSTRING_EQUAL(n1.metadata, n2.metadata, n2.metadata_length);\n    }\n    /* Check that node mappings are correct */\n    for (j = 0; j < tsk_treeseq_get_num_nodes(ts); j++) {\n        ret = tsk_treeseq_get_node(ts, (tsk_id_t) j, &n1);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        if (node_map[j] != TSK_NULL) {\n            ret = tsk_treeseq_get_node(subset, node_map[j], &n2);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(n1.population, n2.population);\n            CU_ASSERT_EQUAL_FATAL(n1.time, n2.time);\n            CU_ASSERT_EQUAL_FATAL(n1.flags, n2.flags);\n            CU_ASSERT_EQUAL_FATAL(n1.metadata_length, n2.metadata_length);\n            CU_ASSERT_NSTRING_EQUAL(n1.metadata, n2.metadata, n2.metadata_length);\n        }\n    }\n    if (num_samples == 0) {\n        CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(subset), 0);\n        CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(subset), 0);\n    } else if (num_samples == 1) {\n        CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(subset), 0);\n        CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(subset), 1);\n    }\n    /* Check the pairwise MRCAs */\n    ret = tsk_tree_init(&full_tree, ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_init(&subset_tree, subset, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&full_tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_first(&subset_tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n\n    total_sites = 0;\n    while (1) {\n        while (full_tree.interval.right <= subset_tree.interval.right) {\n            for (j = 0; j < num_samples; j++) {\n                for (k = j + 1; k < num_samples; k++) {\n                    ret = tsk_tree_get_mrca(&full_tree, samples[j], samples[k], &mrca1);\n                    CU_ASSERT_EQUAL_FATAL(ret, 0);\n                    ret = tsk_tree_get_mrca(&subset_tree, node_map[samples[j]],\n                        node_map[samples[k]], &mrca2);\n                    CU_ASSERT_EQUAL_FATAL(ret, 0);\n                    if (mrca1 == TSK_NULL) {\n                        CU_ASSERT_EQUAL_FATAL(mrca2, TSK_NULL);\n                    } else {\n                        CU_ASSERT_EQUAL(node_map[mrca1], mrca2);\n                    }\n                }\n            }\n            ret = tsk_tree_next(&full_tree);\n            CU_ASSERT_FATAL(ret >= 0);\n            if (ret != 1) {\n                break;\n            }\n        }\n        /* Check the sites in this tree */\n        ret = tsk_tree_get_sites(&subset_tree, &tree_sites, &tree_sites_length);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n        for (j = 0; j < tree_sites_length; j++) {\n            CU_ASSERT(subset_tree.interval.left <= tree_sites[j].position);\n            CU_ASSERT(tree_sites[j].position < subset_tree.interval.right);\n            for (k = 0; k < tree_sites[j].mutations_length; k++) {\n                ret = tsk_tree_get_parent(\n                    &subset_tree, tree_sites[j].mutations[k].node, &u);\n                CU_ASSERT_EQUAL(ret, 0);\n            }\n            total_sites++;\n        }\n        ret = tsk_tree_next(&subset_tree);\n        if (ret != 1) {\n            break;\n        }\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(subset), total_sites);\n\n    tsk_tree_free(&subset_tree);\n    tsk_tree_free(&full_tree);\n}\n\nstatic void\nverify_simplify(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_size_t n = tsk_treeseq_get_num_samples(ts);\n    tsk_size_t num_samples[] = { 0, 1, 2, 3, n / 2, n - 1, n };\n    tsk_size_t j;\n    const tsk_id_t *sample;\n    tsk_id_t *node_map = tsk_malloc(tsk_treeseq_get_num_nodes(ts) * sizeof(tsk_id_t));\n    tsk_treeseq_t subset;\n    tsk_flags_t options = TSK_SIMPLIFY_FILTER_SITES;\n\n    CU_ASSERT_FATAL(node_map != NULL);\n    sample = tsk_treeseq_get_samples(ts);\n    if (tsk_treeseq_get_num_migrations(ts) > 0) {\n        ret = tsk_treeseq_simplify(ts, sample, 2, 0, &subset, NULL);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED);\n        /* Exiting early here because simplify isn't supported with migrations. */\n        goto out;\n    }\n\n    for (j = 0; j < sizeof(num_samples) / sizeof(*num_samples); j++) {\n        if (num_samples[j] <= n) {\n            ret = tsk_treeseq_simplify(\n                ts, sample, num_samples[j], options, &subset, node_map);\n            /* printf(\"ret = %s\\n\", tsk_strerror(ret)); */\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            verify_simplify_properties(ts, &subset, sample, num_samples[j], node_map);\n            tsk_treeseq_free(&subset);\n\n            /* Keep all sites */\n            ret = tsk_treeseq_simplify(ts, sample, num_samples[j], 0, &subset, node_map);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            verify_simplify_properties(ts, &subset, sample, num_samples[j], node_map);\n            verify_simplify_genotypes(ts, &subset, sample, num_samples[j]);\n            tsk_treeseq_free(&subset);\n        }\n    }\nout:\n    free(node_map);\n}\n\ntypedef struct {\n    tsk_id_t tree_index;\n    tsk_id_t node;\n    tsk_size_t count;\n} sample_count_test_t;\n\nstatic void\nverify_sample_counts(tsk_treeseq_t *ts, tsk_size_t num_tests, sample_count_test_t *tests,\n    tsk_flags_t seek_options)\n{\n    int ret;\n    tsk_size_t j, num_samples, n, k;\n    tsk_id_t stop, sample_index;\n    tsk_tree_t tree;\n    const tsk_id_t *samples;\n\n    n = tsk_treeseq_get_num_samples(ts);\n    samples = tsk_treeseq_get_samples(ts);\n\n    /* First run with the TSK_NO_SAMPLE_COUNTS feature */\n\n    ret = tsk_tree_init(&tree, ts, TSK_NO_SAMPLE_COUNTS);\n    CU_ASSERT_EQUAL(ret, 0);\n    for (j = 0; j < num_tests; j++) {\n        ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(tests[j].count, num_samples);\n        /* all operations depending on tracked samples should fail. */\n        ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);\n        /* The root should be NULL */\n        CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);\n    }\n    tsk_tree_free(&tree);\n\n    /* Now run with TSK_SAMPLE_COUNTS but with no samples tracked. */\n    ret = tsk_tree_init(&tree, ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    for (j = 0; j < num_tests; j++) {\n        ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(tests[j].count, num_samples);\n        /* all operations depending on tracked samples should fail. */\n        ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(num_samples, 0);\n        /* The root should not be NULL */\n        CU_ASSERT_NOT_EQUAL(tree.virtual_root, TSK_NULL);\n    }\n    tsk_tree_free(&tree);\n\n    /* Run with TSK_SAMPLE_LISTS and TSK_NO_SAMPLE_COUNTS */\n    ret = tsk_tree_init(&tree, ts, TSK_SAMPLE_LISTS | TSK_NO_SAMPLE_COUNTS);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    for (j = 0; j < num_tests; j++) {\n        ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(tests[j].count, num_samples);\n        /* all operations depending on tracked samples should fail. */\n        ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);\n\n        sample_index = tree.left_sample[tests[j].node];\n        k = 0;\n        if (sample_index != TSK_NULL) {\n            stop = tree.right_sample[tests[j].node];\n            while (true) {\n                k++;\n                CU_ASSERT_FATAL(k <= tests[j].count);\n                if (sample_index == stop) {\n                    break;\n                }\n                sample_index = tree.next_sample[sample_index];\n            }\n        }\n        CU_ASSERT_EQUAL(tests[j].count, k);\n    }\n    tsk_tree_free(&tree);\n\n    /* Now use TSK_SAMPLE_LISTS */\n    ret = tsk_tree_init(&tree, ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_set_tracked_samples(&tree, n, samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    for (j = 0; j < num_tests; j++) {\n        ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(tests[j].count, num_samples);\n\n        /* We're tracking all samples, so the count should be the same */\n        ret = tsk_tree_get_num_tracked_samples(&tree, tests[j].node, &num_samples);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(tests[j].count, num_samples);\n\n        sample_index = tree.left_sample[tests[j].node];\n        k = 0;\n        if (sample_index != TSK_NULL) {\n            stop = tree.right_sample[tests[j].node];\n            while (true) {\n                k++;\n                if (sample_index == stop) {\n                    break;\n                }\n                sample_index = tree.next_sample[sample_index];\n            }\n        }\n        CU_ASSERT_EQUAL(tests[j].count, k);\n    }\n    tsk_tree_free(&tree);\n}\n\nstatic void\nverify_sample_sets_for_tree(tsk_tree_t *tree)\n{\n    int ret, stack_top, j;\n    tsk_id_t u, v;\n    tsk_size_t tmp, n, num_nodes, num_samples;\n    tsk_id_t *stack, *samples;\n    const tsk_treeseq_t *ts = tree->tree_sequence;\n    tsk_id_t *sample_index_map = ts->sample_index_map;\n    const tsk_id_t *list_left = tree->left_sample;\n    const tsk_id_t *list_right = tree->right_sample;\n    const tsk_id_t *list_next = tree->next_sample;\n    tsk_id_t stop, sample_index;\n\n    n = tsk_treeseq_get_num_samples(ts);\n    num_nodes = tsk_treeseq_get_num_nodes(ts);\n    stack = tsk_malloc(n * sizeof(tsk_id_t));\n    samples = tsk_malloc(n * sizeof(tsk_id_t));\n    CU_ASSERT_FATAL(stack != NULL);\n    CU_ASSERT_FATAL(samples != NULL);\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        if (tree->left_child[u] == TSK_NULL && !tsk_treeseq_is_sample(ts, u)) {\n            CU_ASSERT_EQUAL(list_left[u], TSK_NULL);\n            CU_ASSERT_EQUAL(list_right[u], TSK_NULL);\n        } else {\n            stack_top = 0;\n            num_samples = 0;\n            stack[stack_top] = u;\n            while (stack_top >= 0) {\n                v = stack[stack_top];\n                stack_top--;\n                if (tsk_treeseq_is_sample(ts, v)) {\n                    samples[num_samples] = v;\n                    num_samples++;\n                }\n                for (v = tree->right_child[v]; v != TSK_NULL; v = tree->left_sib[v]) {\n                    stack_top++;\n                    stack[stack_top] = v;\n                }\n            }\n            ret = tsk_tree_get_num_samples(tree, u, &tmp);\n            CU_ASSERT_EQUAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(num_samples, tmp);\n\n            j = 0;\n            sample_index = list_left[u];\n            if (sample_index != TSK_NULL) {\n                stop = list_right[u];\n                while (true) {\n                    CU_ASSERT_TRUE_FATAL(j < (tsk_id_t) n);\n                    CU_ASSERT_EQUAL_FATAL(sample_index, sample_index_map[samples[j]]);\n                    j++;\n                    if (sample_index == stop) {\n                        break;\n                    }\n                    sample_index = list_next[sample_index];\n                }\n            }\n            CU_ASSERT_EQUAL_FATAL(j, (int) num_samples);\n        }\n    }\n    free(stack);\n    free(samples);\n}\n\nstatic void\nverify_sample_sets(tsk_treeseq_t *ts)\n{\n    int ret;\n    tsk_tree_t t;\n    tsk_id_t j;\n\n    ret = tsk_tree_init(&t, ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {\n        verify_sample_sets_for_tree(&t);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (ret = tsk_tree_last(&t); ret == TSK_TREE_OK; ret = tsk_tree_prev(&t)) {\n        verify_sample_sets_for_tree(&t);\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < (tsk_id_t) tsk_treeseq_get_num_trees(ts); j++) {\n        ret = tsk_tree_first(&t);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n        ret = tsk_tree_seek_index(&t, j, TSK_SEEK_SKIP);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        verify_sample_sets_for_tree(&t);\n\n        ret = tsk_tree_last(&t);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n        ret = tsk_tree_seek_index(&t, j, TSK_SEEK_SKIP);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        verify_sample_sets_for_tree(&t);\n    }\n\n    tsk_tree_free(&t);\n}\n\nstatic void\nverify_empty_tree_sequence(tsk_treeseq_t *ts, double sequence_length)\n{\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_migrations(ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(ts), sequence_length);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(ts), 1);\n}\n\n/*=======================================================\n * Simplest test cases.\n *======================================================*/\n\nstatic void\ntest_simplest_discrete_genome(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t ret_id;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));\n\n    ret = tsk_table_collection_copy(ts.tables, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    tables.sequence_length = 1.001;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n    tables.sequence_length = 1;\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n\n    tables.edges.right[0] = 0.999;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n    tables.edges.right[0] = 1.0;\n\n    tables.edges.left[0] = 0.999;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n    tables.edges.left[0] = 0;\n\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, \"A\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n\n    tables.sites.position[0] = 0.001;\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n    tables.sites.position[0] = 0;\n\n    /* Need another population for a migration */\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n\n    ret_id\n        = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n\n    tables.migrations.left[0] = 0.001;\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n    tables.migrations.left[0] = 0;\n\n    tables.migrations.right[0] = 0.999;\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n    tables.migrations.right[0] = 1;\n\n    /* An empty tree sequence is has a discrete genome. */\n    tsk_table_collection_clear(&tables, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_discrete_time(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  0   0\";\n    const char *edges = \"0  1   2   0,1,3,4\\n\";\n    const char *sites = \"0.1  0\\n\"\n                        \"0.2  0\\n\"\n                        \"0.3  0\\n\"\n                        \"0.4  0\\n\";\n    const char *mutations = \"0    0     1\\n\"\n                            \"1    1     1\\n\"\n                            \"2    3     1\\n\"\n                            \"3    4     1\";\n    const char *migrations = \"0  1  0  0  1  1\";\n\n    tsk_treeseq_from_text(\n        &ts, 1, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));\n\n    ret = tsk_table_collection_copy(ts.tables, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));\n    tsk_treeseq_free(&ts);\n\n    tables.nodes.time[0] = 0.0001;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));\n    tsk_treeseq_free(&ts);\n    tables.nodes.time[0] = 0;\n\n    tables.mutations.time[0] = 0.001;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));\n    tsk_treeseq_free(&ts);\n    tables.mutations.time[0] = 0;\n\n    tables.migrations.time[0] = 0.001;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));\n    tsk_treeseq_free(&ts);\n    tables.migrations.time[0] = 0;\n\n    tables.mutations.time[0] = TSK_UNKNOWN_TIME;\n    tables.mutations.time[1] = TSK_UNKNOWN_TIME;\n    tables.mutations.time[2] = TSK_UNKNOWN_TIME;\n    tables.mutations.time[3] = TSK_UNKNOWN_TIME;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));\n    tsk_treeseq_free(&ts);\n\n    /* An empty tree sequence is has a discrete time. */\n    tsk_table_collection_clear(&tables, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_min_time(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    const char *nodes = \"1  0.1 0  -1\\n\"\n                        \"1  0.1 0  -1\\n\"\n                        \"1  0.1 0  -1\\n\"\n                        \"0  1 0  -1\\n\"\n                        \"0  2 0  -1\\n\";\n    const char *edges = \"0  2   3   0,1\\n\"\n                        \"0  2   4   2,3\\n\";\n    const char *sites = \"0  0\\n\"\n                        \"1  0\\n\";\n    const char *mutations = \"0  2   1   -1  0.5\\n\"\n                            \"1  3   1   -1  1.5\\n\";\n\n    tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_min_time(&ts), 0.1, 1E-6);\n\n    ret = tsk_table_collection_copy(ts.tables, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_min_time(&ts), 0.1, 1E-6);\n    tsk_treeseq_free(&ts);\n\n    /* Setting mutation times to unknown should have no effect on min time. */\n    tables.mutations.time[0] = TSK_UNKNOWN_TIME;\n    tables.mutations.time[1] = TSK_UNKNOWN_TIME;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_min_time(&ts), 0.1, 1E-6);\n    tsk_treeseq_free(&ts);\n    tables.mutations.time[0] = 0.5;\n    tables.mutations.time[1] = 1.5;\n\n    /* An empty tree sequence has infinity min time. */\n    tsk_table_collection_clear(&tables, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_min_time(&ts), INFINITY);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_max_time(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    const char *nodes = \"1  0.1 0  -1\\n\"\n                        \"1  0.1 0  -1\\n\"\n                        \"1  0.1 0  -1\\n\"\n                        \"0  1 0  -1\\n\"\n                        \"0  2 0  -1\\n\";\n    const char *edges = \"0  2   3   0,1\\n\"\n                        \"0  2   4   2,3\\n\";\n    const char *sites = \"0  0\\n\"\n                        \"1  0\\n\";\n    const char *mutations = \"0  2   1   -1  0.5\\n\"\n                            \"1  3   1   -1  1.5\\n\";\n\n    tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_max_time(&ts), 2.0, 1E-6);\n\n    ret = tsk_table_collection_copy(ts.tables, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_max_time(&ts), 2.0, 1E-6);\n    tsk_treeseq_free(&ts);\n\n    /* Setting mutation times to unknown should have no effect on max time. */\n    tables.mutations.time[0] = TSK_UNKNOWN_TIME;\n    tables.mutations.time[1] = TSK_UNKNOWN_TIME;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_max_time(&ts), 2.0, 1E-6);\n    tsk_treeseq_free(&ts);\n    tables.mutations.time[0] = 0.5;\n    tables.mutations.time[1] = 1.5;\n\n    /* An empty tree sequence has negative infinity max time. */\n    tsk_table_collection_clear(&tables, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_max_time(&ts), -INFINITY);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_records(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2,\n        TSK_SIMPLIFY_KEEP_UNARY | TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified,\n        NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE);\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_nonbinary_records(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   4   0,1,2,3\\n\";\n    tsk_treeseq_t ts, simplified;\n    tsk_tree_t t;\n    tsk_id_t sample_ids[] = { 0, 1, 2, 3 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL(t.num_children[4], 4);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);\n    tsk_tree_free(&t);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 4, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 4, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 4, TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_unary_records(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   3   1\\n\"\n                        \"0  1   4   2,3\\n\";\n    tsk_treeseq_t ts, simplified, simplified_other;\n    tsk_tree_t t;\n    tsk_id_t sample_ids[] = { 0, 1 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_populations(&ts), 1);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL(t.num_children[2], 1);\n    CU_ASSERT_EQUAL(t.num_children[4], 2);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);\n    tsk_tree_free(&t);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified_other, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(\n        tsk_table_collection_equals(simplified.tables, simplified_other.tables, 0));\n    tsk_treeseq_free(&simplified);\n    tsk_treeseq_free(&simplified_other);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_unary_with_individuals(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0   -1\\n\"\n                        \"1  0   0   0\\n\"\n                        \"0  1   0   -1\\n\"\n                        \"0  1   0   1\\n\"\n                        \"0  2   0   -1\\n\"\n                        \"0  3   0   -1\\n\"\n                        \"0  3   0   2\\n\"\n                        \"0  1   0   -1\\n\"\n                        \"0  1   0   3\\n\"\n                        \"0  0   0   -1\\n\"\n                        \"0  0   0   4\\n\"\n                        \"0  1   0   3\\n\";\n    const char *edges = \"0  2   2   0\\n\"\n                        \"0  2   3   1\\n\"\n                        \"2  3   7   0\\n\"\n                        \"2  3   8   1,9\\n\"\n                        \"2  3   11   10\\n\"\n                        \"0  2   4   2,3\\n\"\n                        \"0  1   5   4\\n\"\n                        \"1  2   6   4\\n\";\n    const char *individuals = \"0    0.5     -1,-1\\n\"\n                              \"0    1.5,3.1 -1,-1\\n\"\n                              \"0    2.1     0,1\\n\"\n                              \"0    3.2     1,2\\n\"\n                              \"0    4.2     2,3\\n\";\n    const char *nodes_expect = \"1  0   0   -1\\n\"\n                               \"1  0   0   0\\n\"\n                               \"0  1   0   1\\n\"\n                               \"0  1   0   3\\n\"\n                               \"0  2   0   -1\\n\"\n                               \"0  3   0   2\\n\";\n    const char *edges_expect = \"0  2   2   1\\n\"\n                               \"2  3   3   1\\n\"\n                               \"0  2   4   0,2\\n\"\n                               \"1  2   5   4\\n\";\n    const char *individuals_expect = \"0    0.5     -1,-1\\n\"\n                                     \"0    1.5,3.1 -1,-1\\n\"\n                                     \"0    2.1     0,1\\n\"\n                                     \"0    3.2     1,2\\n\";\n    tsk_treeseq_t ts, simplified, expected;\n    tsk_id_t sample_ids[] = { 0, 1 };\n\n    tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, NULL, NULL, individuals, NULL, 0);\n    tsk_treeseq_from_text(&expected, 3, nodes_expect, edges_expect, NULL, NULL, NULL,\n        individuals_expect, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 12);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_individuals(&ts), 5);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_populations(&ts), 1);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2,\n        TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS | TSK_SIMPLIFY_FILTER_INDIVIDUALS,\n        &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(simplified.tables, expected.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&expected);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_non_sample_leaf_records(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  0   0\";\n    const char *edges = \"0  1   2   0,1,3,4\\n\";\n    const char *sites = \"0.1  0\\n\"\n                        \"0.2  0\\n\"\n                        \"0.3  0\\n\"\n                        \"0.4  0\\n\";\n    const char *mutations = \"0    0     1\\n\"\n                            \"1    1     1\\n\"\n                            \"2    3     1\\n\"\n                            \"3    4     1\";\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n    tsk_vargen_print_state(&vargen, _devnull);\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 1);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_vargen_free(&vargen);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&simplified);\n}\n\nstatic void\ntest_simplest_degenerate_multiple_root_records(void)\n{\n\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   3   1\\n\";\n    tsk_treeseq_t ts, simplified;\n    tsk_tree_t t;\n    tsk_id_t sample_ids[] = { 0, 1 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 2);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 2);\n    CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), 3);\n    CU_ASSERT_EQUAL(t.num_edges, 2);\n    CU_ASSERT_EQUAL(t.right_sib[2], 3);\n    CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL);\n    CU_ASSERT_EQUAL(t.num_children[2], 1);\n    CU_ASSERT_EQUAL(t.num_children[0], 0);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 2);\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_simplest_multiple_root_records(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   4   0,1\\n\"\n                        \"0  1   5   2,3\\n\";\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t sample_ids[] = { 0, 1, 2, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 6);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 4, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 6);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);\n    tsk_treeseq_free(&simplified);\n\n    /* Make one tree degenerate */\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 3, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);\n    tsk_treeseq_free(&simplified);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_zero_root_tree(void)\n{\n    int ret;\n    const char *nodes = \"0  0   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   4   0,1\\n\"\n                        \"0  1   5   2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 6);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 0);\n    CU_ASSERT_EQUAL(t.num_edges, 4);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), TSK_NULL);\n    CU_ASSERT_EQUAL(t.right_sib[2], 3);\n    CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL);\n    CU_ASSERT_EQUAL(t.num_children[0], 0);\n    CU_ASSERT_EQUAL(t.num_children[4], 2);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_multi_root_tree(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   3   1,2\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n\n    tsk_tree_print_state(&t, _devnull);\n\n    /* Make sure the initial roots are set correctly */\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);\n    CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL(t.right_sib[0], 1);\n    CU_ASSERT_EQUAL(t.left_sib[1], 0);\n    CU_ASSERT_EQUAL(t.right_sib[1], 2);\n    CU_ASSERT_EQUAL(t.left_sib[2], 1);\n    CU_ASSERT_EQUAL(t.right_sib[2], TSK_NULL);\n\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 2);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);\n    CU_ASSERT_EQUAL(t.right_sib[0], 3);\n    CU_ASSERT_EQUAL(t.num_edges, 2);\n    CU_ASSERT_EQUAL(t.num_children[0], 0);\n    CU_ASSERT_EQUAL(t.num_children[3], 2);\n\n    tsk_tree_print_state(&t, _devnull);\n\n    CU_ASSERT_EQUAL(tsk_tree_set_root_threshold(&t, 1), TSK_ERR_UNSUPPORTED_OPERATION);\n    ret = tsk_tree_next(&t);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_tree_set_root_threshold(&t, 0), TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_tree_set_root_threshold(&t, 2);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_tree_get_root_threshold(&t), 2);\n\n    ret = tsk_tree_next(&t);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 3);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_tree_mrca(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_id_t mrca, ret_id;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_tree_get_mrca(&t, 0, 0, &mrca);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(mrca, 0);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_root_mutations(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    const char *sites = \"0.1 0\";\n    const char *mutations = \"0    2     1\";\n    tsk_flags_t options = 0;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    tsk_treeseq_t ts, simplified;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, options, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&simplified), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_back_mutations(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\\n\";\n    const char *edges = \"0  1   3   0,1\\n\"\n                        \"0  1   4   2,3\\n\";\n    const char *sites = \"0.5 0\";\n    const char *mutations = \"0    3     1   -1\\n\"\n                            \"0    0     0   0\";\n    tsk_treeseq_t ts;\n    tsk_vargen_t vargen;\n    tsk_variant_t *var;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_vargen_next(&vargen, &var);\n    CU_ASSERT_EQUAL_FATAL(ret, 1);\n    CU_ASSERT_EQUAL(var->num_alleles, 2);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[0], \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(var->alleles[1], \"1\", 1);\n    CU_ASSERT_EQUAL(var->genotypes[0], 0);\n    CU_ASSERT_EQUAL(var->genotypes[1], 1);\n    CU_ASSERT_EQUAL(var->genotypes[2], 0);\n    CU_ASSERT_EQUAL(var->site.id, 0);\n    CU_ASSERT_EQUAL(var->site.mutations_length, 2);\n    tsk_vargen_free(&vargen);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_general_samples(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"1  0   0\";\n    const char *edges = \"0  1   1   0,2\\n\";\n    const char *sites = \"0.5  0\\n\"\n                        \"0.75 0\\n\";\n    const char *mutations = \"0    2     1\\n\"\n                            \"1    0     1\";\n    const tsk_id_t samples[2] = { 0, 2 };\n    const tsk_id_t *s;\n    int ret;\n\n    tsk_treeseq_t ts, simplified;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    s = tsk_treeseq_get_samples(&ts);\n    CU_ASSERT_FATAL(s != NULL);\n    CU_ASSERT_EQUAL(s[0], 0);\n    CU_ASSERT_EQUAL(s[1], 2);\n\n    ret = tsk_treeseq_simplify(&ts, samples, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    s = tsk_treeseq_get_samples(&simplified);\n    CU_ASSERT_FATAL(s != NULL);\n    CU_ASSERT_EQUAL(s[0], 0);\n    CU_ASSERT_EQUAL(s[1], 1);\n\n    tsk_treeseq_free(&simplified);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_holey_tree_sequence(void)\n{\n    const char *nodes_txt = \"1  0   0\\n\"\n                            \"1  0   0\\n\"\n                            \"0  1   0\";\n    const char *edges_txt = \"0  1   2   0\\n\"\n                            \"2  3   2   0\\n\"\n                            \"0  1   2   1\\n\"\n                            \"2  3   2   1\\n\";\n    const char *sites_txt = \"0.5  0\\n\"\n                            \"1.5  0\\n\"\n                            \"2.5  0\\n\";\n    const char *mutations_txt = \"0    0     1\\n\"\n                                \"1    1     1\\n\"\n                                \"2    2     1\\n\";\n    int ret;\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t sample_ids[] = { 0, 1 };\n\n    tsk_treeseq_from_text(\n        &ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_holey_tsk_treeseq_mutation_parents(void)\n{\n    const char *nodes_txt = \"1  0   0\\n\"\n                            \"1  0   0\\n\"\n                            \"0  1   0\";\n    const char *edges_txt = \"0  1   2   0\\n\"\n                            \"2  3   2   0\\n\"\n                            \"0  1   2   1\\n\"\n                            \"2  3   2   1\\n\";\n    const char *sites_txt = \"0.5  0\\n\"\n                            \"1.5  0\\n\"\n                            \"2.5  0\\n\";\n    const char *mutations_txt = \"0    0     1\\n\"\n                                \"0    0     1\\n\"\n                                \"1    1     1\\n\"\n                                \"1    1     1\\n\"\n                                \"2    2     1\\n\"\n                                \"2    2     1\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    int ret;\n\n    tsk_treeseq_from_text(\n        &ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.mutations.parent[0], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[1], 0);\n    CU_ASSERT_EQUAL(tables.mutations.parent[2], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[3], 2);\n    CU_ASSERT_EQUAL(tables.mutations.parent[4], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[5], 4);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_initial_gap_tree_sequence(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"2  3   2   0,1\\n\";\n    const char *sites = \"0.5  0\\n\"\n                        \"1.5  0\\n\"\n                        \"2.5  0\\n\";\n    const char *mutations = \"0    0     1\\n\"\n                            \"1    1     1\\n\"\n                            \"2    2     1\";\n    int ret;\n    tsk_treeseq_t ts, simplified;\n    const tsk_id_t z = TSK_NULL;\n    tsk_id_t parents[] = {\n        z,\n        z,\n        z,\n        2,\n        2,\n        z,\n    };\n    tsk_size_t num_trees = 2;\n    tsk_id_t sample_ids[] = { 0, 1 };\n\n    tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n\n    verify_trees(&ts, num_trees, parents);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_initial_gap_zero_roots(void)\n{\n    const char *nodes = \"0  0   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"2  3   2   0,1\\n\";\n    int ret;\n    tsk_treeseq_t ts;\n    const tsk_id_t z = TSK_NULL;\n    tsk_id_t parents[] = {\n        z,\n        z,\n        z,\n        2,\n        2,\n        z,\n    };\n    uint32_t num_trees = 2;\n    tsk_tree_t tree;\n\n    tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n\n    verify_trees(&ts, num_trees, parents);\n\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);\n    CU_ASSERT_EQUAL(tree.parent[0], 2);\n    CU_ASSERT_EQUAL(tree.parent[1], 2);\n    CU_ASSERT_EQUAL(tree.num_children[2], 2);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_holey_tsk_treeseq_zero_roots(void)\n{\n    const char *nodes_txt = \"0  0   0\\n\"\n                            \"0  0   0\\n\"\n                            \"0  1   0\";\n    const char *edges_txt = \"0  1   2   0\\n\"\n                            \"2  3   2   0\\n\"\n                            \"0  1   2   1\\n\"\n                            \"2  3   2   1\\n\";\n    int ret;\n    tsk_treeseq_t ts;\n    const tsk_id_t z = TSK_NULL;\n    tsk_id_t parents[] = {\n        2,\n        2,\n        z,\n        z,\n        z,\n        z,\n        2,\n        2,\n        z,\n    };\n    uint32_t num_trees = 3;\n    tsk_tree_t tree;\n\n    tsk_treeseq_from_text(&ts, 3, nodes_txt, edges_txt, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);\n\n    verify_trees(&ts, num_trees, parents);\n\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);\n    CU_ASSERT_EQUAL(tree.parent[0], 2);\n    CU_ASSERT_EQUAL(tree.parent[1], 2);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);\n    CU_ASSERT_EQUAL(tree.num_children[2], 2);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);\n    CU_ASSERT_EQUAL(tree.num_children[2], 0);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);\n    CU_ASSERT_EQUAL(tree.parent[0], 2);\n    CU_ASSERT_EQUAL(tree.parent[1], 2);\n    CU_ASSERT_EQUAL(tree.num_children[2], 2);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_initial_gap_tsk_treeseq_mutation_parents(void)\n{\n    const char *nodes_txt = \"1  0   0\\n\"\n                            \"1  0   0\\n\"\n                            \"0  1   0\";\n    const char *edges_txt = \"2  3   2   0,1\\n\";\n    const char *sites_txt = \"0.5  0\\n\"\n                            \"1.5  0\\n\"\n                            \"2.5  0\\n\";\n    const char *mutations_txt = \"0    0     1\\n\"\n                                \"0    0     1\\n\"\n                                \"1    1     1\\n\"\n                                \"1    1     1\\n\"\n                                \"2    2     1\\n\"\n                                \"2    2     1\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    int ret;\n\n    tsk_treeseq_from_text(\n        &ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.mutations.parent[0], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[1], 0);\n    CU_ASSERT_EQUAL(tables.mutations.parent[2], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[3], 2);\n    CU_ASSERT_EQUAL(tables.mutations.parent[4], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[5], 4);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_final_gap_tree_sequence(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  2   2   0,1\\n\";\n    const char *sites = \"0.5  0\\n\"\n                        \"1.5  0\\n\"\n                        \"2.5  0\\n\";\n    const char *mutations = \"0    0     1\\n\"\n                            \"1    1     1\\n\"\n                            \"2    0     1\";\n    tsk_treeseq_t ts;\n    const tsk_id_t z = TSK_NULL;\n    tsk_id_t parents[] = {\n        2,\n        2,\n        z,\n        z,\n        z,\n        z,\n    };\n    uint32_t num_trees = 2;\n\n    tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n\n    verify_trees(&ts, num_trees, parents);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_final_gap_tsk_treeseq_mutation_parents(void)\n{\n    const char *nodes_txt = \"1  0   0\\n\"\n                            \"1  0   0\\n\"\n                            \"0  1   0\";\n    const char *edges_txt = \"0  2   2   0,1\\n\";\n    const char *sites_txt = \"0.5  0\\n\"\n                            \"1.5  0\\n\"\n                            \"2.5  0\\n\";\n    const char *mutations_txt = \"0    0     1\\n\"\n                                \"0    0     1\\n\"\n                                \"1    1     1\\n\"\n                                \"1    1     1\\n\"\n                                \"2    0     1\\n\"\n                                \"2    0     1\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    int ret;\n\n    tsk_treeseq_from_text(\n        &ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.mutations.parent[0], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[1], 0);\n    CU_ASSERT_EQUAL(tables.mutations.parent[2], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[3], 2);\n    CU_ASSERT_EQUAL(tables.mutations.parent[4], -1);\n    CU_ASSERT_EQUAL(tables.mutations.parent[5], 4);\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_individuals(void)\n{\n    const char *individuals = \"1      0.25     -1,-1\\n\"\n                              \"2      0.5,0.25 -1,-1\\n\"\n                              \"3      0.75     0,1\\n\";\n    const char *nodes = \"1  0   -1  -1\\n\"\n                        \"1  0   -1  1\\n\"\n                        \"0  0   -1  -1\\n\"\n                        \"1  0   -1  0\\n\"\n                        \"0  0   -1  1\\n\"\n                        \"0  0   -1  2\\n\";\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_node_t node;\n    tsk_individual_t individual;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n    int ret;\n    tsk_id_t pat_id, mat_id;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1.0;\n    parse_individuals(individuals, &tables.individuals);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 3);\n\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 6);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_get_node(&ts, 0, &node);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(node.individual, TSK_NULL);\n\n    ret = tsk_treeseq_get_node(&ts, 1, &node);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(node.individual, 1);\n\n    ret = tsk_treeseq_get_individual(&ts, 0, &individual);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(individual.id, 0);\n    CU_ASSERT_EQUAL_FATAL(individual.flags, 1);\n    CU_ASSERT_EQUAL_FATAL(individual.location_length, 1);\n    CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.25);\n    CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.parents[0], -1);\n    CU_ASSERT_EQUAL_FATAL(individual.parents[1], -1);\n    pat_id = individual.id;\n    CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 1);\n    CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 3);\n\n    ret = tsk_treeseq_get_individual(&ts, 1, &individual);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(individual.id, 1);\n    CU_ASSERT_EQUAL_FATAL(individual.flags, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.location_length, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.5);\n    CU_ASSERT_EQUAL_FATAL(individual.location[1], 0.25);\n    CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.parents[0], -1);\n    CU_ASSERT_EQUAL_FATAL(individual.parents[1], -1);\n    mat_id = individual.id;\n    CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 1);\n    CU_ASSERT_EQUAL_FATAL(individual.nodes[1], 4);\n\n    ret = tsk_treeseq_get_individual(&ts, 2, &individual);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(individual.id, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.flags, 3);\n    CU_ASSERT_EQUAL_FATAL(individual.location_length, 1);\n    CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.75);\n    CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2);\n    CU_ASSERT_EQUAL_FATAL(individual.parents[0], pat_id);\n    CU_ASSERT_EQUAL_FATAL(individual.parents[1], mat_id);\n    CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 1);\n    CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 5);\n\n    ret = tsk_treeseq_get_individual(&ts, 3, &individual);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n\n    /* NaN/ifinity values are allowed in locations they do not\n     * affect the integrity of the model. */\n    tables.individuals.location[0] = NAN;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_treeseq_get_individual(&ts, 0, &individual);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT(!tsk_isfinite(individual.location[0]));\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_bad_individuals(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   2   1\\n\"\n                        \"0  1   4   3\\n\";\n    const char *individuals = \"1      0.25     -1\\n\"\n                              \"2      0.5,0.25 0\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n    tsk_id_t ret_id;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1.0;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* Make sure we have a good set of records */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    /* Bad individual ID */\n    tables.nodes.individual[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.nodes.individual[0] = TSK_NULL;\n\n    /* Bad individual ID */\n    tables.nodes.individual[0] = 0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.nodes.individual[0] = TSK_NULL;\n\n    /* Add two individuals */\n    parse_individuals(individuals, &tables.individuals);\n    CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 2);\n\n    /* Make sure we have a good set of records */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    /* Bad individual ID */\n    tables.nodes.individual[0] = 2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.nodes.individual[0] = TSK_NULL;\n\n    /* Bad parent ID */\n    tables.individuals.parents[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.individuals.parents[0] = 42;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.individuals.parents[0] = TSK_NULL;\n\n    /* Parent is self */\n    tables.individuals.parents[0] = 0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT);\n    tsk_treeseq_free(&ts);\n    tables.individuals.parents[0] = TSK_NULL;\n\n    /* Unsorted individuals are OK*/\n    tables.individuals.parents[0] = 1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_treeseq_free(&ts);\n    tables.individuals.parents[0] = TSK_NULL;\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_bad_edges(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   2   1\\n\"\n                        \"0  1   4   3\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    int ret;\n    tsk_id_t ret_id;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1.0;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* Make sure we have a good set of records */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    /* Bad population ID */\n    tables.nodes.population[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.nodes.population[0] = 0;\n\n    /* Bad population ID */\n    tables.nodes.population[0] = 1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.nodes.population[0] = 0;\n\n    /* Bad interval */\n    tables.edges.right[0] = 0.0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);\n    tsk_treeseq_free(&ts);\n    tables.edges.right[0] = 1.0;\n\n    /* Nonfinite coords */\n    tables.edges.left[0] = NAN;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tsk_treeseq_free(&ts);\n    tables.edges.left[0] = 1.0;\n\n    tables.edges.left[0] = INFINITY;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tsk_treeseq_free(&ts);\n    tables.edges.left[0] = 1.0;\n\n    tables.edges.right[0] = NAN;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tsk_treeseq_free(&ts);\n    tables.edges.right[0] = 1.0;\n\n    tables.edges.right[0] = -INFINITY;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tsk_treeseq_free(&ts);\n    tables.edges.right[0] = 1.0;\n\n    /* Left coordinate < 0. */\n    tables.edges.left[0] = -1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_LEFT_LESS_ZERO);\n    tsk_treeseq_free(&ts);\n    tables.edges.left[0] = 0.0;\n\n    /* Right coordinate > sequence length. */\n    tables.edges.right[0] = 2.0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);\n    tsk_treeseq_free(&ts);\n    tables.edges.right[0] = 1.0;\n\n    /* Duplicate records */\n    tables.edges.child[0] = 1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_DUPLICATE_EDGES);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[0] = 0;\n\n    /* Duplicate records */\n    tables.edges.child[0] = 1;\n    tables.edges.left[0] = 0.5;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NOT_SORTED_LEFT);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[0] = 0;\n    tables.edges.left[0] = 0.0;\n\n    /* child node == parent */\n    tables.edges.child[1] = 2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[1] = 1;\n\n    /* Unsorted child nodes */\n    tables.edges.child[0] = 1;\n    tables.edges.child[1] = 0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[0] = 0;\n    tables.edges.child[1] = 1;\n\n    /* discontinuous parent nodes */\n    /* Swap rows 1 and 2 */\n    tables.edges.parent[1] = 4;\n    tables.edges.child[1] = 3;\n    tables.edges.parent[2] = 2;\n    tables.edges.child[2] = 1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS);\n    tsk_treeseq_free(&ts);\n    tables.edges.parent[2] = 4;\n    tables.edges.child[2] = 3;\n    tables.edges.parent[1] = 2;\n    tables.edges.child[1] = 1;\n\n    /* Null parent */\n    tables.edges.parent[0] = TSK_NULL;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NULL_PARENT);\n    tsk_treeseq_free(&ts);\n    tables.edges.parent[0] = 2;\n\n    /* parent not in nodes list */\n    tables.nodes.num_rows = 2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.nodes.num_rows = 5;\n\n    /* parent negative */\n    tables.edges.parent[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.edges.parent[0] = 2;\n\n    /* Null child */\n    tables.edges.child[0] = TSK_NULL;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NULL_CHILD);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[0] = 0;\n\n    /* child node reference out of bounds */\n    tables.edges.child[0] = 100;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[0] = 0;\n\n    /* child node reference negative */\n    tables.edges.child[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.edges.child[0] = 0;\n\n    /* Make sure we've preserved a good tree sequence */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_bad_indexes(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   2   1\\n\"\n                        \"0  1   4   3\\n\";\n    tsk_table_collection_t tables;\n    tsk_id_t bad_indexes[] = { -1, 3, 4, 1000 };\n    tsk_size_t j;\n    tsk_id_t ret_id;\n    tsk_id_t ret_num_trees;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1.0;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* Make sure we have a good set of records */\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLES_NOT_INDEXED);\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n    /* TSK_CHECK_TREES returns the number of trees */\n    CU_ASSERT_EQUAL_FATAL(ret_num_trees, 1);\n\n    for (j = 0; j < sizeof(bad_indexes) / sizeof(*bad_indexes); j++) {\n        tables.indexes.edge_insertion_order[0] = bad_indexes[j];\n        ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n        CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n        tables.indexes.edge_insertion_order[0] = 0;\n\n        tables.indexes.edge_removal_order[0] = bad_indexes[j];\n        ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);\n        CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n        tables.indexes.edge_removal_order[0] = 0;\n    }\n\n    ret = tsk_table_collection_drop_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLES_NOT_INDEXED);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_bad_migrations(void)\n{\n    tsk_table_collection_t tables;\n    int ret;\n    tsk_id_t ret_id;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    /* insert two populations and one node to refer to. */\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    /* One migration, node 0 goes from population 0 to 1. */\n    ret_id\n        = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* We only need basic intregity checks for migrations */\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Bad node reference */\n    tables.migrations.node[0] = -1;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tables.migrations.node[0] = 0;\n\n    /* Bad node reference */\n    tables.migrations.node[0] = 1;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tables.migrations.node[0] = 0;\n\n    /* Bad population reference */\n    tables.migrations.source[0] = -1;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tables.migrations.source[0] = 0;\n\n    /* Bad population reference */\n    tables.migrations.source[0] = 2;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tables.migrations.source[0] = 0;\n\n    /* Bad population reference */\n    tables.migrations.dest[0] = -1;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tables.migrations.dest[0] = 1;\n\n    /* Bad population reference */\n    tables.migrations.dest[0] = 2;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    tables.migrations.dest[0] = 1;\n\n    /* Bad time values */\n    tables.migrations.time[0] = NAN;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);\n    tables.migrations.time[0] = 1.0;\n\n    tables.migrations.time[0] = INFINITY;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);\n    tables.migrations.time[0] = 1.0;\n\n    /* Bad left coordinate */\n    tables.migrations.left[0] = -1;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_LEFT_LESS_ZERO);\n    tables.migrations.left[0] = 0;\n\n    tables.migrations.left[0] = NAN;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tables.migrations.left[0] = 0;\n\n    tables.migrations.left[0] = -INFINITY;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tables.migrations.left[0] = 0;\n\n    /* Bad right coordinate */\n    tables.migrations.right[0] = 2;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);\n    tables.migrations.right[0] = 1;\n\n    tables.migrations.right[0] = NAN;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tables.migrations.right[0] = 1;\n\n    tables.migrations.right[0] = INFINITY;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);\n    tables.migrations.right[0] = 1;\n\n    /* Bad interval coordinate */\n    tables.migrations.right[0] = 0;\n    ret = (int) tsk_table_collection_check_integrity(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);\n    tables.migrations.right[0] = 1;\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_migration_simplify(void)\n{\n    tsk_table_collection_t tables;\n    int ret;\n    tsk_id_t ret_id;\n    tsk_id_t samples[] = { 0, 1 };\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    /* insert two populations and one node to refer to. */\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(\n        &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    /* One migration, node 0 goes from population 0 to 1. */\n    ret_id\n        = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_overlapping_parents(void)\n{\n    const char *nodes = \"1  0   -1\\n\"\n                        \"1  0   -1\\n\"\n                        \"0  1   -1\\n\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   2   1\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_tree_t tree;\n    int ret;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);\n\n    tables.edges.left[0] = 0;\n    tables.edges.parent[0] = 2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tree.parent[0], 2);\n    CU_ASSERT_EQUAL(tree.parent[1], 2);\n    CU_ASSERT_EQUAL(tree.left_sib[2], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.right_sib[2], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.left_child[2], 0);\n    CU_ASSERT_EQUAL(tree.right_child[2], 1);\n    CU_ASSERT_EQUAL(tree.left_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.right_sib[0], 1);\n    CU_ASSERT_EQUAL(tree.left_sib[1], 0);\n    CU_ASSERT_EQUAL(tree.right_sib[1], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.num_children[2], 2);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_contradictory_children(void)\n{\n    const char *nodes = \"1  0   -1\\n\"\n                        \"1  1   -1\\n\"\n                        \"0  1   -1\\n\";\n    const char *edges = \"0  1   1   0\\n\"\n                        \"0  1   2   0\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    int ret;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_overlapping_edges_simplify(void)\n{\n    const char *nodes = \"1  0   -1\\n\"\n                        \"1  0   -1\\n\"\n                        \"1  0   -1\\n\"\n                        \"0  1   -1\";\n    const char *edges = \"0  2   3   0\\n\"\n                        \"1  3   3   1\\n\"\n                        \"0  3   3   2\\n\";\n    tsk_id_t samples[] = { 0, 1, 2 };\n    tsk_table_collection_t tables;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 3;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 4);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 3, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 4);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 3);\n\n    /* Identical to the input.\n    0  2   3   0\n    1  3   3   1\n    0  3   3   2\n    */\n    CU_ASSERT_EQUAL(tables.edges.left[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.left[1], 1);\n    CU_ASSERT_EQUAL(tables.edges.left[2], 0);\n    CU_ASSERT_EQUAL(tables.edges.right[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.right[1], 3);\n    CU_ASSERT_EQUAL(tables.edges.right[2], 3);\n    CU_ASSERT_EQUAL(tables.edges.parent[0], 3);\n    CU_ASSERT_EQUAL(tables.edges.parent[1], 3);\n    CU_ASSERT_EQUAL(tables.edges.parent[2], 3);\n    CU_ASSERT_EQUAL(tables.edges.child[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.child[1], 1);\n    CU_ASSERT_EQUAL(tables.edges.child[2], 2);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_overlapping_unary_edges_simplify(void)\n{\n    const char *nodes = \"1  0   -1\\n\"\n                        \"1  0   -1\\n\"\n                        \"0  1   -1\";\n    const char *edges = \"0  2   2   0\\n\"\n                        \"1  3   2   1\\n\";\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_table_collection_t tables;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 3;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 2);\n\n    /* Because we only sample 0 and 1, the flanking unary edges are removed\n     1       2       2       0\n     1       2       2       1\n     */\n    CU_ASSERT_EQUAL(tables.edges.left[0], 1);\n    CU_ASSERT_EQUAL(tables.edges.right[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.parent[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.child[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.left[1], 1);\n    CU_ASSERT_EQUAL(tables.edges.right[1], 2);\n    CU_ASSERT_EQUAL(tables.edges.parent[1], 2);\n    CU_ASSERT_EQUAL(tables.edges.child[1], 1);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_overlapping_unary_edges_internal_samples_simplify(void)\n{\n    const char *nodes = \"1  0   -1\\n\"\n                        \"1  0   -1\\n\"\n                        \"1  1   -1\";\n    const char *edges = \"0  2   2   0\\n\"\n                        \"1  3   2   1\\n\";\n    tsk_id_t samples[] = { 0, 1, 2 };\n    tsk_table_collection_t tables;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 3;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 3, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 2);\n    /* Identical to the input.\n        0  2   2   0\n        1  3   2   1\n     */\n    CU_ASSERT_EQUAL(tables.edges.left[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.left[1], 1);\n    CU_ASSERT_EQUAL(tables.edges.right[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.right[1], 3);\n    CU_ASSERT_EQUAL(tables.edges.parent[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.parent[1], 2);\n    CU_ASSERT_EQUAL(tables.edges.child[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.child[1], 1);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_reduce_site_topology(void)\n{\n    /* Two trees side by side, with a site on the second one. The first\n     * tree should disappear. */\n    const char *nodes = \"1  0   -1\\n\"\n                        \"1  0   -1\\n\"\n                        \"0  1   -1\\n\"\n                        \"0  2   -1\\n\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   2   1\\n\"\n                        \"1  2   3   0\\n\"\n                        \"1  2   3   1\\n\";\n    const char *sites = \"1.0  0\\n\";\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_table_collection_t tables;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 2;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 4);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 4);\n    parse_sites(sites, &tables.sites);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 1);\n\n    ret = tsk_table_collection_simplify(\n        &tables, samples, 2, TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 2);\n    CU_ASSERT_EQUAL(tables.edges.left[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.left[1], 0);\n    CU_ASSERT_EQUAL(tables.edges.right[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.right[1], 2);\n    CU_ASSERT_EQUAL(tables.edges.parent[0], 2);\n    CU_ASSERT_EQUAL(tables.edges.parent[1], 2);\n    CU_ASSERT_EQUAL(tables.edges.child[0], 0);\n    CU_ASSERT_EQUAL(tables.edges.child[1], 1);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_simplify_defragment(void)\n{\n    const char *nodes = \"0        2     -1\\n\"\n                        \"0        2     -1\\n\"\n                        \"0        2     -1\\n\"\n                        \"0        2     -1\\n\"\n                        \"0        2     -1\\n\"\n                        \"0        2     -1\\n\"\n                        \"0        1     -1\\n\"\n                        \"0        1     -1\\n\"\n                        \"0        1     -1\\n\"\n                        \"0        1     -1\\n\"\n                        \"0        1     -1\\n\"\n                        \"0        1     -1\\n\"\n                        \"1        0     -1\\n\"\n                        \"1        0     -1\\n\"\n                        \"1        0     -1\\n\"\n                        \"1        0     -1\\n\"\n                        \"1        0     -1\\n\"\n                        \"1        0     -1\\n\";\n    const char *edges = \"0.00000000      0.20784841      8       12\\n\"\n                        \"0.00000000      0.42202433      8       15\\n\"\n                        \"0.00000000      0.63541014      8       16\\n\"\n                        \"0.42202433      1.00000000      9       15\\n\"\n                        \"0.00000000      1.00000000      9       17\\n\"\n                        \"0.00000000      1.00000000      10      14\\n\"\n                        \"0.20784841      1.00000000      11      12\\n\"\n                        \"0.00000000      1.00000000      11      13\\n\"\n                        \"0.63541014      1.00000000      11      16\\n\"\n                        \"0.00000000      1.00000000      0       10\\n\"\n                        \"0.62102072      1.00000000      1       9\\n\"\n                        \"0.00000000      1.00000000      1       11\\n\"\n                        \"0.00000000      0.26002984      2       6\\n\"\n                        \"0.26002984      1.00000000      2       6\\n\"\n                        \"0.00000000      0.62102072      2       9\\n\"\n                        \"0.55150554      1.00000000      3       8\\n\"\n                        \"0.00000000      1.00000000      4       7\\n\"\n                        \"0.00000000      0.55150554      5       8\\n\";\n\n    tsk_id_t samples[] = { 12, 13, 14, 15, 16, 17 };\n    tsk_table_collection_t tables;\n    int ret;\n\n    /* This was the simplest example I could find that exercised the\n     * inner loops of the simplifier_extract_ancestry function */\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    parse_nodes(nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 18);\n    parse_edges(edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 18);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 6, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 10);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 10);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_population_filter(void)\n{\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    tsk_population_table_add_row(&tables.populations, \"0\", 1);\n    tsk_population_table_add_row(&tables.populations, \"1\", 1);\n    tsk_population_table_add_row(&tables.populations, \"2\", 1);\n    /* Two nodes referring to population 1 */\n    tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, TSK_NULL, NULL, 0);\n    tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, TSK_NULL, NULL, 0);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);\n    CU_ASSERT_EQUAL(tables.populations.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.populations.metadata[0], '0');\n    CU_ASSERT_EQUAL(tables.populations.metadata[1], '1');\n    CU_ASSERT_EQUAL(tables.populations.metadata[2], '2');\n\n    ret = tsk_table_collection_simplify(\n        &tables, samples, 2, TSK_SIMPLIFY_FILTER_POPULATIONS, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);\n    CU_ASSERT_EQUAL(tables.nodes.population[0], 0);\n    CU_ASSERT_EQUAL(tables.nodes.population[1], 0);\n    CU_ASSERT_EQUAL(tables.populations.num_rows, 1);\n    CU_ASSERT_EQUAL(tables.populations.metadata[0], '1');\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_individual_filter(void)\n{\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, \"0\", 1);\n    tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, \"1\", 1);\n    tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, \"2\", 1);\n    /* Two nodes referring to individual 1 */\n    tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);\n    tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);\n    CU_ASSERT_EQUAL(tables.individuals.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.individuals.metadata[0], '0');\n    CU_ASSERT_EQUAL(tables.individuals.metadata[1], '1');\n    CU_ASSERT_EQUAL(tables.individuals.metadata[2], '2');\n\n    ret = tsk_table_collection_simplify(\n        &tables, samples, 2, TSK_SIMPLIFY_FILTER_INDIVIDUALS, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);\n    CU_ASSERT_EQUAL(tables.nodes.individual[0], 0);\n    CU_ASSERT_EQUAL(tables.nodes.individual[1], 0);\n    CU_ASSERT_EQUAL(tables.individuals.num_rows, 1);\n    CU_ASSERT_EQUAL(tables.individuals.metadata[0], '1');\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_simplest_no_node_filter(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\"; /* unreferenced node */\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    tsk_id_t node_map[] = { -1, -1, -1, -1 };\n    tsk_id_t j;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_simplify(\n        &ts, NULL, 0, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    /* Reversing sample order makes no difference */\n    sample_ids[0] = 1;\n    sample_ids[1] = 0;\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 1, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, node_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 0);\n    for (j = 0; j < 4; j++) {\n        CU_ASSERT_EQUAL(node_map[j], j);\n    }\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 1,\n        TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_KEEP_INPUT_ROOTS\n            | TSK_SIMPLIFY_KEEP_UNARY,\n        &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 1);\n    tsk_treeseq_free(&simplified);\n\n    sample_ids[0] = 0;\n    sample_ids[1] = 0;\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_no_update_flags(void)\n{\n    const char *nodes = \"0  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t sample_ids[] = { 0, 1 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    /* We have a mixture of sample and non-samples in the input tables */\n    ret = tsk_treeseq_simplify(\n        &ts, sample_ids, 2, TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    ret = tsk_treeseq_simplify(&ts, sample_ids, 2,\n        TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS | TSK_SIMPLIFY_NO_FILTER_NODES, &simplified,\n        NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));\n    tsk_treeseq_free(&simplified);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    genotypes[0] = -1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    /* Check the null tree */\n    genotypes[0] = 1;\n    CU_ASSERT_FALSE(tsk_tree_next(&t));\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    /* Assign the ancestral_state */\n    genotypes[0] = 1;\n    genotypes[1] = 1;\n    ancestral_state = 0;\n    ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,\n        &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 2);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].state, 1);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_nonbinary_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   4   0,1,2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0, 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_unary_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   3   1\\n\"\n                        \"0  1   4   2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 2);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_non_sample_leaf_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  0   0\";\n    const char *edges = \"0  1   2   0,1,3,4\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_internal_sample_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  1   0\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    genotypes[2] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 1);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 0);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_multiple_root_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0  1   4   0,1\\n\"\n                        \"0  1   5   2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0, 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    genotypes[1] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 4);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_chained_map_mutations(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  1   0\\n\"\n                        \"1  1   0\\n\"\n                        \"0  2   0\";\n    const char *edges = \"0  1   2   0\\n\"\n                        \"0  1   3   1\\n\"\n                        \"0  1   4   2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 0, 0, 0 };\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[2] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 2);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 2);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].parent, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].state, 0);\n    free(transitions);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplest_mutation_edges(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   1   0\\n\"\n                        \"1  2   2   0\\n\";\n    const char *sites = \"0.5  0\\n\"\n                        \"1.5  0\\n\";\n    const char *mutations = \"0    2     1\\n\"\n                            \"0    1     1\\n\"\n                            \"0    0     1\\n\"\n                            \"1    2     1\\n\"\n                            \"1    1     1\\n\"\n                            \"1    0     1\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    /* We have mutations over roots, samples and just isolated nodes */\n    tsk_id_t mutation_edges[] = { -1, -1, 0, -1, -1, 1 };\n    tsk_size_t i, j, k, t;\n    tsk_mutation_t mut;\n    tsk_site_t site;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n\n    for (j = 0; j < tsk_treeseq_get_num_mutations(&ts); j++) {\n        ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, &mut);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(mut.edge, mutation_edges[j]);\n    }\n\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    i = 0;\n    for (t = 0; t < 2; t++) {\n        ret = tsk_tree_next(&tree);\n        CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n        for (j = 0; j < tree.sites_length; j++) {\n            site = tree.sites[j];\n            for (k = 0; k < site.mutations_length; k++) {\n                CU_ASSERT_EQUAL(site.mutations[k].edge, mutation_edges[i]);\n                i++;\n            }\n        }\n    }\n    CU_ASSERT_EQUAL(i, 6);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\n/*=======================================================\n * Single tree tests.\n *======================================================*/\n\nstatic void\ntest_single_tree_good_records(void)\n{\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 7);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_nonbinary_tree_good_records(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *edges = \"0 1 7 0,1,2,3\\n\"\n                        \"0 1 8 4,5\\n\"\n                        \"0 1 9 6,7,8\";\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 7);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 10);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_bad_records(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n\n    /* Not sorted in time order */\n    tables.nodes.time[5] = 0.5;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);\n    tsk_treeseq_free(&ts);\n    tables.nodes.time[5] = 2.0;\n\n    /* Left value greater than sequence right */\n    tables.edges.left[2] = 2.0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);\n    tsk_treeseq_free(&ts);\n    tables.edges.left[2] = 0.0;\n\n    /* Non finite */\n    tables.nodes.time[5] = INFINITY;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);\n    tsk_treeseq_free(&ts);\n    tables.nodes.time[5] = 2.0;\n\n    tables.nodes.time[5] = NAN;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);\n    tsk_treeseq_free(&ts);\n    tables.nodes.time[5] = 2.0;\n\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_good_mutations(void)\n{\n    tsk_treeseq_t ts;\n    tsk_size_t j;\n    tsk_size_t num_sites = 3;\n    tsk_size_t num_mutations = 7;\n    tsk_site_t other_sites[num_sites];\n    tsk_mutation_t other_mutations[num_mutations];\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 7);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), num_sites);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), num_mutations);\n\n    for (j = 0; j < num_sites; j++) {\n        ret = tsk_treeseq_get_site(&ts, (tsk_id_t) j, other_sites + j);\n        CU_ASSERT_EQUAL(ret, 0);\n    }\n    for (j = 0; j < num_mutations; j++) {\n        ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, other_mutations + j);\n        CU_ASSERT_EQUAL(ret, 0);\n    }\n    CU_ASSERT_EQUAL(other_sites[0].position, 0.125);\n    CU_ASSERT_NSTRING_EQUAL(other_sites[0].ancestral_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_sites[1].position, 0.25);\n    CU_ASSERT_NSTRING_EQUAL(other_sites[1].ancestral_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_sites[2].position, 0.5);\n    CU_ASSERT_NSTRING_EQUAL(other_sites[2].ancestral_state, \"0\", 1);\n\n    CU_ASSERT_EQUAL(other_mutations[0].id, 0);\n    CU_ASSERT_EQUAL(other_mutations[0].node, 2);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[0].derived_state, \"1\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[0].inherited_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_mutations[1].id, 1);\n    CU_ASSERT_EQUAL(other_mutations[1].node, 4);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[1].derived_state, \"1\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[1].inherited_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_mutations[2].id, 2);\n    CU_ASSERT_EQUAL(other_mutations[2].node, 0);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[2].derived_state, \"0\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[2].inherited_state, \"1\", 1);\n    CU_ASSERT_EQUAL(other_mutations[3].id, 3);\n    CU_ASSERT_EQUAL(other_mutations[3].node, 0);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[3].derived_state, \"1\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[3].inherited_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_mutations[4].id, 4);\n    CU_ASSERT_EQUAL(other_mutations[4].node, 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[4].derived_state, \"1\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[4].inherited_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_mutations[5].id, 5);\n    CU_ASSERT_EQUAL(other_mutations[5].node, 2);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[5].derived_state, \"1\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[5].inherited_state, \"0\", 1);\n    CU_ASSERT_EQUAL(other_mutations[6].id, 6);\n    CU_ASSERT_EQUAL(other_mutations[6].node, 3);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[6].derived_state, \"1\", 1);\n    CU_ASSERT_NSTRING_EQUAL(other_mutations[6].inherited_state, \"0\", 1);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_bad_mutations(void)\n{\n    int ret = 0;\n    const char *sites = \"0       0\\n\"\n                        \"0.1     0\\n\"\n                        \"0.2     0\\n\";\n    const char *mutations = \"0   0  1  -1  0\\n\"\n                            \"1   1  1  -1  0\\n\"\n                            \"2   4  1  -1  1\\n\"\n                            \"2   1  0  2   0\\n\"\n                            \"2   1  1  3   0\\n\"\n                            \"2   2  1  -1  0\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    parse_sites(sites, &tables.sites);\n    parse_mutations(mutations, &tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 3);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 6);\n    tables.sequence_length = 1.0;\n\n    /* Check to make sure we have legal mutations */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n    tsk_treeseq_free(&ts);\n\n    /* negative coordinate */\n    tables.sites.position[0] = -1.0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[0] = 0.0;\n\n    /* non finite coordinates */\n    tables.sites.position[0] = NAN;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[0] = 0.0;\n\n    tables.sites.position[0] = INFINITY;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[0] = 0.0;\n\n    /* coordinate == sequence length */\n    tables.sites.position[2] = 1.0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[2] = 0.2;\n\n    /* coordinate > sequence length */\n    tables.sites.position[2] = 1.1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[2] = 0.2;\n\n    /* Duplicate positions */\n    tables.sites.position[0] = 0.1;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_DUPLICATE_SITE_POSITION);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[0] = 0.0;\n\n    /* Unsorted positions */\n    tables.sites.position[0] = 0.3;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);\n    tsk_treeseq_free(&ts);\n    tables.sites.position[0] = 0.0;\n\n    /* site < 0 */\n    tables.mutations.site[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.mutations.site[0] = 0;\n\n    /* site == num_sites */\n    tables.mutations.site[0] = 3;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.mutations.site[0] = 0;\n\n    /* node = NULL */\n    tables.mutations.node[0] = TSK_NULL;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.mutations.node[0] = 0;\n\n    /* node >= num_nodes */\n    tables.mutations.node[0] = 7;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.mutations.node[0] = 0;\n\n    /* parent < -1 */\n    tables.mutations.parent[0] = -2;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.mutations.parent[0] = TSK_NULL;\n\n    /* parent >= num_mutations */\n    tables.mutations.parent[0] = 7;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    tsk_treeseq_free(&ts);\n    tables.mutations.parent[0] = TSK_NULL;\n\n    /* parent on a different site */\n    tables.mutations.parent[1] = 0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE);\n    tsk_treeseq_free(&ts);\n    tables.mutations.parent[1] = TSK_NULL;\n\n    /* parent is the same mutation */\n    tables.mutations.parent[0] = 0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_EQUAL);\n    tsk_treeseq_free(&ts);\n    tables.mutations.parent[0] = TSK_NULL;\n\n    /* parent_id > mutation id */\n    tables.mutations.parent[3] = 4;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n    tsk_treeseq_free(&ts);\n    tables.mutations.parent[3] = 2;\n\n    /* time < node time */\n    tables.mutations.time[2] = 0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);\n    tsk_treeseq_free(&ts);\n    tables.mutations.time[2] = 1;\n\n    /* time > parent mutation */\n    tables.mutations.time[4] = 0.5;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);\n    tsk_treeseq_free(&ts);\n    tables.mutations.time[4] = 0;\n\n    /* time > parent node */\n    tables.mutations.time[0] = 1.5;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE);\n    tsk_treeseq_free(&ts);\n    tables.mutations.time[0] = 0;\n\n    /* Check to make sure we've maintained legal mutations */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_iter(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *edges = \"0  6   4   0,1\\n\"\n                        \"0  6   5   2,3\\n\"\n                        \"0  6   6   4,5\\n\";\n    tsk_id_t parents[] = { 4, 4, 5, 5, 6, 6, TSK_NULL };\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t u, v, w;\n    tsk_size_t num_samples;\n    tsk_size_t num_nodes = 7;\n\n    tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    verify_edge_array_trees(&ts);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    CU_ASSERT_EQUAL(tree.num_children[4], 2);\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1);\n    tsk_tree_print_state(&tree, _devnull);\n\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        ret = tsk_tree_get_parent(&tree, u, &v);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(v, parents[u]);\n    }\n    ret = tsk_tree_get_num_samples(&tree, 0, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 1);\n    ret = tsk_tree_get_num_samples(&tree, 4, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 2);\n    ret = tsk_tree_get_num_samples(&tree, 6, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 4);\n    ret = tsk_tree_get_mrca(&tree, 0, 1, &w);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(w, 4);\n    ret = tsk_tree_get_mrca(&tree, 0, 2, &w);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(w, 6);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_nonbinary_tree_iter(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *edges = \"0  1   7   0,1,2,3\\n\"\n                        \"0  1   8   4,5\\n\"\n                        \"0  1   9   6,7,8\\n\";\n    tsk_id_t parents[] = { 7, 7, 7, 7, 8, 8, 9, 9, 9, TSK_NULL };\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t u, v, w;\n    tsk_size_t num_samples;\n    tsk_size_t num_nodes = 10;\n    tsk_size_t total_samples = 7;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    verify_edge_array_trees(&ts);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    tsk_tree_print_state(&tree, _devnull);\n\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        ret = tsk_tree_get_parent(&tree, u, &v);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(v, parents[u]);\n    }\n    for (u = 0; u < (tsk_id_t) total_samples; u++) {\n        ret = tsk_tree_get_num_samples(&tree, u, &num_samples);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(num_samples, 1);\n        CU_ASSERT_EQUAL(tree.left_child[u], TSK_NULL);\n    }\n\n    u = 7;\n    ret = tsk_tree_get_num_samples(&tree, u, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 4);\n    CU_ASSERT_EQUAL(tree.right_child[u], 3);\n    CU_ASSERT_EQUAL(tree.left_sib[3], 2);\n    CU_ASSERT_EQUAL(tree.left_sib[2], 1);\n    CU_ASSERT_EQUAL(tree.left_sib[1], 0);\n    CU_ASSERT_EQUAL(tree.left_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.num_children[u], 4);\n\n    u = 8;\n    ret = tsk_tree_get_num_samples(&tree, u, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 2);\n    CU_ASSERT_EQUAL(tree.right_child[u], 5);\n    CU_ASSERT_EQUAL(tree.left_sib[5], 4);\n    CU_ASSERT_EQUAL(tree.left_sib[4], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.num_children[u], 2);\n\n    u = 9;\n    ret = tsk_tree_get_num_samples(&tree, u, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 7);\n    CU_ASSERT_EQUAL(tree.right_child[u], 8);\n    CU_ASSERT_EQUAL(tree.left_sib[8], 7);\n    CU_ASSERT_EQUAL(tree.left_sib[7], 6);\n    CU_ASSERT_EQUAL(tree.left_sib[6], TSK_NULL);\n    CU_ASSERT_EQUAL(tree.num_children[u], 3);\n\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), 9);\n\n    ret = tsk_tree_get_mrca(&tree, 0, 1, &w);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(w, 7);\n    ret = tsk_tree_get_mrca(&tree, 0, 4, &w);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(w, 9);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_general_samples_iter(void)\n{\n    int ret;\n    const char *nodes = \"0  3   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  1   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\";\n    const char *edges = \"0  6   2   3,4\\n\"\n                        \"0  6   1   5,6\\n\"\n                        \"0  6   0   1,2\\n\";\n    tsk_id_t parents[] = { TSK_NULL, 0, 0, 2, 2, 1, 1 };\n    const tsk_id_t *samples;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t u, v, w;\n    tsk_size_t num_samples;\n    tsk_size_t num_nodes = 7;\n\n    tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    samples = tsk_treeseq_get_samples(&ts);\n    CU_ASSERT_EQUAL(samples[0], 3);\n    CU_ASSERT_EQUAL(samples[1], 4);\n    CU_ASSERT_EQUAL(samples[2], 5);\n    CU_ASSERT_EQUAL(samples[3], 6);\n    verify_edge_array_trees(&ts);\n\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    tsk_tree_print_state(&tree, _devnull);\n\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        ret = tsk_tree_get_parent(&tree, u, &v);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(v, parents[u]);\n    }\n    ret = tsk_tree_get_num_samples(&tree, 3, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 1);\n    ret = tsk_tree_get_num_samples(&tree, 2, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 2);\n    ret = tsk_tree_get_num_samples(&tree, 0, &num_samples);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(num_samples, 4);\n    ret = tsk_tree_get_mrca(&tree, 3, 4, &w);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(w, 2);\n    ret = tsk_tree_get_mrca(&tree, 3, 6, &w);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(w, 0);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_iter_times(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  2   0\\n\"\n                        \"1  3   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  4   0\\n\"\n                        \"0  5   0\\n\";\n    const char *edges = \"0  6   4   0,1\\n\"\n                        \"0  6   5   2,3\\n\"\n                        \"0  6   6   4,5\\n\";\n    tsk_id_t parents[] = { 4, 4, 5, 5, 6, 6, TSK_NULL };\n    double times[] = { 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0 };\n    double t;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t u, v;\n    uint32_t num_nodes = 7;\n\n    tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    tsk_tree_print_state(&tree, _devnull);\n\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        ret = tsk_tree_get_parent(&tree, u, &v);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(v, parents[u]);\n        ret = tsk_tree_get_time(&tree, u, &t);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(t, times[u]);\n    }\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_iter_depths(void)\n{\n    int ret = 0;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *edges = \"0  6   4   0,1\\n\"\n                        \"0  6   5   2,3\\n\"\n                        \"0  6   6   4,5\\n\";\n    int depths[] = { 2, 2, 2, 2, 1, 1, 0 };\n    int depth;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t u;\n    uint32_t num_nodes = 7;\n\n    tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);\n\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        ret = tsk_tree_get_depth(&tree, u, &depth);\n        CU_ASSERT_EQUAL(ret, 0);\n        CU_ASSERT_EQUAL(depth, depths[u]);\n    }\n\n    ret = tsk_tree_get_depth(&tree, (tsk_id_t) num_nodes + 1, &depth);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_tree_get_depth(&tree, TSK_NULL, &depth);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_simplify(void)\n{\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_simplify(&ts);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 2);\n\n    /* Zero samples gives us the empty table collection */\n    ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 0);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 0);\n\n    /* Make sure we detect unsorted edges */\n    ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    unsort_edges(&tables.edges, 0);\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD);\n\n    /* detect bad parents */\n    ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.edges.parent[0] = -1;\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_PARENT);\n\n    /* detect bad children */\n    ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.edges.child[0] = -1;\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_CHILD);\n\n    /* detect loops */\n    ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.edges.child[0] = tables.edges.parent[0];\n    ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_simplify_debug(void)\n{\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t samples[] = { 0, 1 };\n    int ret;\n    FILE *tmp = fopen(_tmp_file_name, \"w\");\n\n    CU_ASSERT_FATAL(tmp != NULL);\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    tsk_set_debug_stream(tmp);\n    ret = tsk_treeseq_simplify(&ts, samples, 2, TSK_DEBUG, &simplified, NULL);\n    tsk_set_debug_stream(stdout);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(ftell(tmp) > 0);\n\n    fclose(tmp);\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&simplified);\n}\n\nstatic void\ntest_single_tree_simplify_keep_input_roots(void)\n{\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_id_t samples[] = { 0, 1 };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n    verify_simplify(&ts);\n    ret = tsk_treeseq_copy_tables(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_simplify(\n        &tables, samples, 2, TSK_SIMPLIFY_KEEP_INPUT_ROOTS, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.nodes.num_rows, 4);\n    CU_ASSERT_EQUAL(tables.edges.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.sites.num_rows, 3);\n    CU_ASSERT_EQUAL(tables.mutations.num_rows, 4);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_simplify_no_sample_nodes(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t1, t2;\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* We zero out the sample column in t1, and run simplify. We should\n     * get back the same table */\n\n    tsk_memset(t1.nodes.flags, 0, sizeof(*t1.nodes.flags) * t1.nodes.num_rows);\n\n    ret = tsk_table_collection_simplify(&t1, samples, 4, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_simplify_null_samples(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t t1, t2;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_copy_tables(&ts, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_simplify(&t1, NULL, 0, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_compute_mutation_parents(void)\n{\n    int ret = 0;\n    const char *sites = \"0       0\\n\"\n                        \"0.1     0\\n\"\n                        \"0.2     0\\n\";\n    const char *mutations = \"0   0  1  -1\\n\"\n                            \"1   1  1  -1\\n\"\n                            \"2   4  1  -1\\n\"\n                            \"2   1  0  2 \\n\"\n                            \"2   1  1  3 \\n\"\n                            \"2   2  1  -1\\n\";\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    parse_sites(sites, &tables.sites);\n    parse_mutations(mutations, &tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 3);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 6);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Check to make sure we have legal mutations */\n    ret = tsk_treeseq_init(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n\n    /* Compute the mutation parents */\n    verify_compute_mutation_parents(&ts);\n    tsk_treeseq_free(&ts);\n\n    /* Bad site reference */\n    tables.mutations.site[0] = -1;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tables.mutations.site[0] = 0;\n\n    /* Bad site reference */\n    tables.mutations.site[0] = -1;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tables.mutations.site[0] = 0;\n\n    /* mutation sites out of order */\n    tables.mutations.site[0] = 2;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_MUTATIONS);\n    tables.mutations.site[0] = 0;\n\n    /* sites out of order */\n    tables.sites.position[0] = 0.11;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);\n    tables.sites.position[0] = 0;\n\n    /* Bad node reference */\n    tables.mutations.node[0] = -1;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tables.mutations.node[0] = 0;\n\n    /* Bad node reference */\n    tables.mutations.node[0] = (tsk_id_t) tables.nodes.num_rows;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tables.mutations.node[0] = 0;\n\n    /* Mutations not ordered by tree */\n    tables.mutations.node[2] = 1;\n    tables.mutations.node[3] = 4;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n    tables.mutations.node[2] = 4;\n    tables.mutations.node[3] = 1;\n\n    /* Need to reset the parent field here */\n    tsk_memset(\n        tables.mutations.parent, 0xff, tables.mutations.num_rows * sizeof(tsk_id_t));\n    /* Mutations not ordered by site */\n    tables.mutations.site[3] = 1;\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);\n    tables.mutations.site[3] = 2;\n\n    /* Check to make sure we still have legal mutations */\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_init(&ts, &tables, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_compute_mutation_times(void)\n{\n    int ret = 0;\n    const char *sites = \"0       0\\n\"\n                        \"0.1     0\\n\"\n                        \"0.2     0\\n\"\n                        \"0.3     0\\n\";\n    const char *mutations = \"0   0  1  -1  3\\n\"\n                            \"1   1  1  -1  3\\n\"\n                            \"2   4  1  -1  8\\n\"\n                            \"2   1  0  2   4\\n\"\n                            \"2   2  1  -1  4\\n\"\n                            \"2   1  1  3   2\\n\"\n                            \"3   6  1  -1  10\\n\";\n    /*          6          */\n    /*          6          */\n    /*         / \\         */\n    /*        /   \\        */\n    /*       2     \\       */\n    /*      /       5      */\n    /*     4       / \\     */\n    /*    0 1,3,4 5   \\    */\n    /*   0   1   2     3   */\n\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    tables.nodes.time[4] = 6;\n    tables.nodes.time[5] = 8;\n    tables.nodes.time[6] = 10;\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    parse_sites(sites, &tables.sites);\n    parse_mutations(mutations, &tables.mutations);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 4);\n    CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 7);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    /* Check to make sure we have legal mutations */\n    ret = tsk_treeseq_init(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);\n\n    /* Compute the mutation times */\n    verify_compute_mutation_times(&ts);\n\n    /* Verify consistency of individuals */\n    verify_individual_nodes(&ts);\n    tsk_treeseq_free(&ts);\n\n    /* Bad random param */\n    ret = tsk_table_collection_compute_mutation_times(&tables, (double *) 1, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Bad site reference */\n    tables.mutations.site[0] = -1;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tables.mutations.site[0] = 0;\n\n    /* Bad site reference */\n    tables.mutations.site[0] = -1;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tables.mutations.site[0] = 0;\n\n    /* mutation sites out of order */\n    tables.mutations.site[0] = 2;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_MUTATIONS);\n    tables.mutations.site[0] = 0;\n\n    /* sites out of order */\n    tables.sites.position[0] = 0.11;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);\n    tables.sites.position[0] = 0;\n\n    /* Bad node reference */\n    tables.mutations.node[0] = -1;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tables.mutations.node[0] = 0;\n\n    /* Bad node reference */\n    tables.mutations.node[0] = (tsk_id_t) tables.nodes.num_rows;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tables.mutations.node[0] = 0;\n\n    /* Mutations not ordered by site */\n    tables.mutations.site[2] = 0;\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);\n    tables.mutations.site[2] = 2;\n\n    ret = tsk_treeseq_init(&ts, &tables, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);\n    tsk_treeseq_free(&ts);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_single_tree_mutation_edges(void)\n{\n    int ret = 0;\n    tsk_size_t i, j, k;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_mutation_t mut;\n    tsk_site_t site;\n    tsk_id_t mutation_edges[] = { 2, 4, 0, 0, 1, 2, 3 };\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    for (j = 0; j < 7; j++) {\n        ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, &mut);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(mut.edge, mutation_edges[j]);\n    }\n\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL(ret, TSK_TREE_OK);\n    i = 0;\n    for (j = 0; j < tree.sites_length; j++) {\n        site = tree.sites[j];\n        for (k = 0; k < site.mutations_length; k++) {\n            CU_ASSERT_EQUAL(site.mutations[k].edge, mutation_edges[i]);\n            i++;\n        }\n    }\n    CU_ASSERT_EQUAL(i, 7);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_is_descendant(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 0, 4));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 1, 4));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 0, 6));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 1, 6));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 4, 6));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 2, 5));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 3, 5));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 2, 6));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 3, 6));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 5, 6));\n    /* Nodes are descendents of themselves. */\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 0, 0));\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 1, 1));\n\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 0, 1));\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 0, 2));\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 0, 5));\n\n    /* Out of bounds nodes always return false.*/\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, -1, 5));\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 100, 5));\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, -1, -1));\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_total_branch_length(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    double length;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, TSK_NULL, &length), 0);\n    CU_ASSERT_EQUAL_FATAL(length, 9);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 7, &length), 0);\n    CU_ASSERT_EQUAL_FATAL(length, 9);\n    CU_ASSERT_EQUAL_FATAL(\n        tsk_tree_get_total_branch_length(&tree, tree.virtual_root, &length), 0);\n    CU_ASSERT_EQUAL_FATAL(length, 9);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 4, &length), 0);\n    CU_ASSERT_EQUAL_FATAL(length, 2);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 0, &length), 0);\n    CU_ASSERT_EQUAL_FATAL(length, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 5, &length), 0);\n    CU_ASSERT_EQUAL_FATAL(length, 4);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, -2, &length),\n        TSK_ERR_NODE_OUT_OF_BOUNDS);\n    CU_ASSERT_EQUAL_FATAL(\n        tsk_tree_get_total_branch_length(&tree, 8, &length), TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_num_lineages(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_size_t num_lineages;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 0, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 4);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, -1, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 1, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 3);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 2, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 2);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 2.999, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 2);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 3, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 300, &num_lineages), 0);\n    CU_ASSERT_EQUAL_FATAL(num_lineages, 0);\n\n    CU_ASSERT_EQUAL_FATAL(\n        tsk_tree_num_lineages(&tree, INFINITY, &num_lineages), TSK_ERR_TIME_NONFINITE);\n    CU_ASSERT_EQUAL_FATAL(\n        tsk_tree_num_lineages(&tree, NAN, &num_lineages), TSK_ERR_TIME_NONFINITE);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_map_mutations(void)\n{\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 1, 1, 1 };\n    int ret = 0;\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state, j;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 1);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 0);\n    free(transitions);\n\n    genotypes[0] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 1);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    genotypes[0] = 0;\n    genotypes[1] = 0;\n    genotypes[2] = 0;\n    genotypes[3] = 0;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 0);\n    free(transitions);\n\n    for (j = 1; j < 64; j++) {\n        genotypes[0] = j;\n        genotypes[1] = 0;\n        genotypes[2] = 0;\n        genotypes[3] = 0;\n        ret = tsk_tree_map_mutations(\n            &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n        CU_ASSERT_EQUAL_FATAL(num_transitions, 1);\n        CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);\n        CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n        CU_ASSERT_EQUAL_FATAL(transitions[0].state, j);\n        free(transitions);\n    }\n\n    genotypes[0] = 2;\n    genotypes[1] = 1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 2);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].node, 4);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].parent, 0);\n    CU_ASSERT_EQUAL_FATAL(transitions[1].state, 2);\n    free(transitions);\n\n    genotypes[0] = 1;\n    genotypes[1] = 2;\n    genotypes[2] = 3;\n    genotypes[3] = 4;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 3);\n    free(transitions);\n\n    ancestral_state = 5;\n    ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,\n        &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 4);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 5);\n    free(transitions);\n\n    ancestral_state = -1;\n    ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,\n        &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ANCESTRAL_STATE);\n\n    ancestral_state = 64;\n    ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,\n        &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ANCESTRAL_STATE);\n\n    genotypes[0] = 64;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_GENOTYPE);\n\n    genotypes[0] = -2;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_GENOTYPE);\n\n    genotypes[0] = -1;\n    genotypes[1] = -1;\n    genotypes[2] = -1;\n    genotypes[3] = -1;\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_GENOTYPES_ALL_MISSING);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_single_tree_map_mutations_internal_samples(void)\n{\n    /* Example derived from test case provoking a segfault */\n    const char *nodes = \"0       0.00000000000000   0\\n\"\n                        \"0       0.00000000000000   0\\n\"\n                        \"1       0.00000000000000   0\\n\"\n                        \"1       0.00000000000000   0\\n\"\n                        \"1       0.00000000000000   0\\n\"\n                        \"0       0.10792116530237   0\\n\"\n                        \"1       1.00674711128465   0\\n\"\n                        \"1       1.24675560985525   0\\n\"\n                        \"0       1.78536352520779   0\\n\";\n    const char *edges = \"0.00000000      1.00000000      5       0\\n\"\n                        \"0.00000000      1.00000000      5       2\\n\"\n                        \"0.00000000      1.00000000      6       4\\n\"\n                        \"0.00000000      1.00000000      6       5\\n\"\n                        \"0.00000000      1.00000000      7       1\\n\"\n                        \"0.00000000      1.00000000      7       3\\n\"\n                        \"0.00000000      1.00000000      8       6\\n\"\n                        \"0.00000000      1.00000000      8       7\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int32_t genotypes[] = { 0, 2, 2, 1, 0 };\n    int ret = 0;\n    tsk_size_t num_transitions;\n    tsk_state_transition_t *transitions;\n    int32_t ancestral_state;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 5);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n\n    ret = tsk_tree_map_mutations(\n        &t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);\n    CU_ASSERT_EQUAL_FATAL(num_transitions, 4);\n    free(transitions);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_single_tree_tracked_samples(void)\n{\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t samples[] = { 0, 1 };\n    tsk_size_t n;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,\n        single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_tree_set_tracked_samples(&tree, 2, samples);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 5, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 6, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n    ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n\n    ret = tsk_tree_next(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n    ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n\n    ret = tsk_tree_set_tracked_samples(&tree, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 0);\n    ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 0);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&tree);\n}\n\nstatic void\ntest_single_tree_tree_pos(void)\n{\n    tsk_treeseq_t ts;\n    tsk_tree_position_t tree_pos;\n    bool valid;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_position_init(&tree_pos, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FATAL(valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FATAL(!valid);\n\n    tsk_tree_position_print_state(&tree_pos, _devnull);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 6);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    valid = tsk_tree_position_prev(&tree_pos);\n    CU_ASSERT_FATAL(valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);\n\n    valid = tsk_tree_position_prev(&tree_pos);\n    CU_ASSERT_FATAL(!valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);\n\n    ret = tsk_tree_position_seek_forward(&tree_pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order)\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FATAL(!valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 6);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    ret = tsk_tree_position_seek_backward(&tree_pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);\n\n    tsk_tree_position_free(&tree_pos);\n    tsk_treeseq_free(&ts);\n}\n\n/*=======================================================\n * Multi tree tests.\n *======================================================*/\n\nstatic void\ntest_simple_multi_tree(void)\n{\n    // clang-format off\n    tsk_id_t parents[] = {\n        6, 5, 8, 5, TSK_NULL, 6, 8, TSK_NULL, TSK_NULL,\n        6, 5, 4, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,\n        7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL,\n    };\n    // clang-format on\n    uint32_t num_trees = 3;\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    verify_trees(&ts, num_trees, parents);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_multi_tree_direction_switching_tree_pos(void)\n{\n    tsk_treeseq_t ts;\n    tsk_tree_position_t tree_pos;\n    bool valid;\n    int ret = 0;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_tree_position_init(&tree_pos, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FATAL(valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 2);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    valid = tsk_tree_position_prev(&tree_pos);\n    CU_ASSERT_FATAL(!valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);\n\n    valid = tsk_tree_position_prev(&tree_pos);\n    CU_ASSERT_FATAL(valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, 2);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 7);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 4);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);\n\n    valid = tsk_tree_position_next(&tree_pos);\n    CU_ASSERT_FATAL(!valid);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 11);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    ret = tsk_tree_position_seek_forward(&tree_pos, 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 7);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 11);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    ret = tsk_tree_position_seek_backward(&tree_pos, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 2);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 4);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);\n\n    ret = tsk_tree_position_seek_forward(&tree_pos, 2);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(tree_pos.index, 2);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 7);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 10);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 6);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 11);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);\n    CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);\n\n    tsk_tree_position_free(&tree_pos);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_unary_multi_tree(void)\n{\n    // clang-format off\n    tsk_id_t parents[] = {\n        6, 5, 7, 5, TSK_NULL, 6, 8, 8, TSK_NULL, 5,\n        6, 5, 4, 4, 5, 6, 8, TSK_NULL, TSK_NULL, 5,\n        7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL, 5,\n    };\n    // clang-format on\n    tsk_treeseq_t ts;\n    uint32_t num_trees = 3;\n\n    tsk_treeseq_from_text(&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, unary_ex_sites,\n        unary_ex_mutations, NULL, NULL, 0);\n    verify_trees(&ts, num_trees, parents);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_internal_sample_multi_tree(void)\n{\n    // clang-format off\n    tsk_id_t parents[] = {\n        7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL,\n        4, 5, 4, 8, 5, 8, TSK_NULL, TSK_NULL, TSK_NULL,\n        6, 5, 4, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,\n    };\n    // clang-format on\n    tsk_treeseq_t ts;\n    uint32_t num_trees = 3;\n\n    tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,\n        NULL, internal_sample_ex_sites, internal_sample_ex_mutations, NULL, NULL, 0);\n    verify_trees(&ts, num_trees, parents);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_internal_sample_simplified_multi_tree(void)\n{\n    int ret;\n    tsk_treeseq_t ts, simplified;\n    tsk_id_t samples[] = { 2, 3, 5 };\n    tsk_id_t node_map[9];\n    tsk_id_t z = TSK_NULL;\n    // clang-format off\n    tsk_id_t parents[] = {\n    /*  0  1  2  3  4 */\n        3, 3, z, 2, z,\n        2, 4, 4, z, z,\n        3, 3, z, 2, z,\n    };\n    // clang-format on\n    uint32_t num_trees = 3;\n\n    tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,\n        NULL, internal_sample_ex_sites, internal_sample_ex_mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_simplify(&ts, samples, 3, 0, &simplified, node_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(node_map[2], 0);\n    CU_ASSERT_EQUAL(node_map[3], 1);\n    CU_ASSERT_EQUAL(node_map[5], 2);\n\n    verify_trees(&simplified, num_trees, parents);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&simplified);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_multi_tree(void)\n{\n    /* We make one mutation for each tree */\n    // clang-format off\n    tsk_id_t parents[] = {\n        8, 8, 8, 8, 10, 10, 9, 10, 9, 12, 12, TSK_NULL, TSK_NULL,\n        8, 8, 8, 8, 10, 11, 9, 10, 9, 11, 12, 12, TSK_NULL,\n    };\n    // clang-format on\n\n    tsk_treeseq_t ts;\n    uint32_t num_trees = 2;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,\n        nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);\n    verify_trees(&ts, num_trees, parents);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_simplify_keep_input_roots_multi_tree(void)\n{\n\n    /*\n    0.25┊     8   ┊         ┊         ┊\n        ┊   ┏━┻━┓ ┊         ┊         ┊\n    0.20┊   ┃   ┃ ┊         ┊   7     ┊\n        ┊   ┃   ┃ ┊         ┊ ┏━┻━┓   ┊\n    0.17┊   6   ┃ ┊   6     ┊ ┃   ┃   ┊\n        ┊ ┏━┻┓  ┃ ┊ ┏━┻━┓   ┊ ┃   ┃   ┊\n    0.09┊ ┃  5  ┃ ┊ ┃   5   ┊ ┃   5   ┊\n        ┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓  ┊ ┃ ┏━┻┓  ┊\n    0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃  4  ┊ ┃ ┃  4  ┊\n        ┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊\n    0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n      0.00      2.00      7.00      10.00\n\n    Simplifies to\n\n    0.25┊  4  ┊     ┊     ┊\n        ┊  ┃  ┊     ┊     ┊\n    0.20┊  ┃  ┊     ┊  3  ┊\n        ┊  ┃  ┊     ┊ ┏┻┓ ┊\n    0.17┊  2  ┊  2  ┊ ┃ ┃ ┊\n        ┊ ┏┻┓ ┊ ┏┻┓ ┊ ┃ ┃ ┊\n    0.00┊ 0 1 ┊ 0 1 ┊ 0 1 ┊\n      0.00  2.00  7.00  10.00\n\n    */\n    int ret = 0;\n    // clang-format off\n    tsk_id_t parents[] = {\n        2, 2, 4, -1, -1,\n        2, 2, -1, -1, -1,\n        3, 3, -1, -1, -1,\n    };\n    // clang-format on\n    uint32_t num_trees = 3;\n\n    tsk_id_t samples[] = { 0, 3 };\n    tsk_treeseq_t ts, simplified;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_simplify(\n        &ts, samples, 2, TSK_SIMPLIFY_KEEP_INPUT_ROOTS, &simplified, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    verify_trees(&simplified, num_trees, parents);\n    verify_edge_array_trees(&ts);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&simplified);\n}\n\nstatic void\ntest_left_to_right_multi_tree(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  0.090   0\\n\"\n                        \"0  0.170   0\\n\"\n                        \"0  0.253   0\\n\"\n                        \"0  0.071   0\\n\"\n                        \"0  0.202   0\\n\";\n    const char *edges = \"2 10 7 2,3\\n\"\n                        \"0 2  4 1\\n\"\n                        \"2 10 4 1\\n\"\n                        \"0 2  4 3\\n\"\n                        \"2 10 4 7\\n\"\n                        \"0 7  5 0,4\\n\"\n                        \"7 10 8 0,4\\n\"\n                        \"0 2  6 2,5\\n\";\n    const char *sites = \"1      0\\n\"\n                        \"4.5    0\\n\"\n                        \"8.5    0\\n\";\n    const char *mutations = \"0    2    1\\n\"\n                            \"1    0    1\\n\"\n                            \"2    4    1\\n\";\n\n    // clang-format off\n    tsk_id_t parents[] = {\n        5, 4, 6, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,\n        5, 4, 7, 7, 5, TSK_NULL, TSK_NULL, 4, TSK_NULL,\n        8, 4, 7, 7, 8, TSK_NULL, TSK_NULL, 4, TSK_NULL,\n    };\n    // clang-format on\n    tsk_treeseq_t ts;\n    uint32_t num_trees = 3;\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    verify_trees(&ts, num_trees, parents);\n    verify_tree_next_prev(&ts);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_gappy_multi_tree(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  0.090   0\\n\"\n                        \"0  0.170   0\\n\"\n                        \"0  0.253   0\\n\"\n                        \"0  0.071   0\\n\"\n                        \"0  0.202   0\\n\";\n    const char *edges = \"2 7  7 2\\n\"\n                        \"8 10 7 2\\n\"\n                        \"2 7  7 3\\n\"\n                        \"8 10 7 3\\n\"\n                        \"1 2  4 1\\n\"\n                        \"2 7  4 1\\n\"\n                        \"8 10 4 1\\n\"\n                        \"1 2  4 3\\n\"\n                        \"2 7  4 7\\n\"\n                        \"8 10 4 7\\n\"\n                        \"1 7  5 0,4\\n\"\n                        \"8 10 8 0,4\\n\"\n                        \"1 2  6 2,5\\n\";\n    tsk_id_t z = TSK_NULL;\n    // clang-format off\n    tsk_id_t parents[] = {\n        z, z, z, z, z, z, z, z, z,\n        5, 4, 6, 4, 5, 6, z, z, z,\n        5, 4, 7, 7, 5, z, z, 4, z,\n        z, z, z, z, z, z, z, z, z,\n        8, 4, 7, 7, 8, z, z, 4, z,\n        z, z, z, z, z, z, z, z, z,\n    };\n    // clang-format on\n    tsk_treeseq_t ts;\n    uint32_t num_trees = 6;\n\n    tsk_treeseq_from_text(&ts, 12, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    verify_trees(&ts, num_trees, parents);\n    verify_tree_next_prev(&ts);\n    verify_edge_array_trees(&ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_tsk_treeseq_bad_records(void)\n{\n    int ret = 0;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    uint32_t num_trees = 3;\n    // clang-format off\n    tsk_id_t parents[] = {\n        6, 5, 8, 5, TSK_NULL, 6, 8, TSK_NULL, TSK_NULL,\n        6, 5, 4, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,\n        7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL,\n    };\n    // clang-format on\n    tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 10;\n    parse_nodes(paper_ex_nodes, &tables.nodes);\n    parse_edges(paper_ex_edges, &tables.edges);\n    parse_individuals(paper_ex_individuals, &tables.individuals);\n\n    /* Make sure we have a good set of records */\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ts.num_trees, 3);\n    verify_trees(&ts, num_trees, parents);\n    tsk_treeseq_free(&ts);\n\n    /* Left value greater than right */\n    tables.edges.left[0] = 10.0;\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);\n    tsk_treeseq_free(&ts);\n    tables.edges.left[0] = 2.0;\n\n    ret = tsk_treeseq_init(&ts, &tables, load_flags);\n    CU_ASSERT_EQUAL(ret, 0);\n    verify_trees(&ts, num_trees, parents);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_convenience_arrays_multi_tree(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n\n    tsk_treeseq_from_text(\n        &ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);\n    verify_edge_array_trees(&ts);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n    CU_ASSERT_EQUAL(t.num_children[8], 2);\n\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n    CU_ASSERT_EQUAL(t.num_children[8], 1);\n\n    CU_ASSERT_TRUE(tsk_tree_next(&t));\n    CU_ASSERT_EQUAL(t.num_children[8], 0);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_multiroot_mrca(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t tree;\n    tsk_id_t mrca;\n\n    tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&tree, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&tree);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 0, 0, &mrca), 0);\n    CU_ASSERT_EQUAL(mrca, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 0, 1, &mrca), 0);\n    CU_ASSERT_EQUAL(mrca, 10);\n    /* MRCA of two nodes in different subtrees is TSK_NULL */\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 0, 2, &mrca), 0);\n    CU_ASSERT_EQUAL(mrca, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 2, 0, &mrca), 0);\n    CU_ASSERT_EQUAL(mrca, TSK_NULL);\n\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&ts);\n}\n\n/*=======================================================\n * Sample sets\n *======================================================*/\n\nstatic void\ntest_simple_sample_sets(void)\n{\n    // clang-format off\n    sample_count_test_t tests[] = {\n        {0, 0, 1}, {0, 5, 2}, {0, 6, 3},\n        {1, 4, 2}, {1, 5, 3}, {1, 6, 4}};\n    // clang-format on\n    uint32_t num_tests = 6;\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,\n        paper_ex_individuals, NULL, 0);\n    verify_sample_counts(&ts, num_tests, tests, 0);\n    verify_sample_counts(&ts, num_tests, tests, TSK_SEEK_SKIP);\n    verify_sample_sets(&ts);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_nonbinary_sample_sets(void)\n{\n    // clang-format off\n    sample_count_test_t tests[] = {\n        {0, 0, 1}, {0, 8, 4}, {0, 9, 5}, {0, 10, 3}, {0, 12, 8},\n        {1, 5, 1}, {1, 8, 4}, {1, 9, 5}, {0, 10, 2}, {0, 11, 1}};\n    // clang-format on\n    uint32_t num_tests = 8;\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    verify_sample_counts(&ts, num_tests, tests, 0);\n    verify_sample_counts(&ts, num_tests, tests, TSK_SEEK_SKIP);\n    verify_sample_sets(&ts);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_internal_sample_sample_sets(void)\n{\n    // clang-format off\n    sample_count_test_t tests[] = {\n        {0, 0, 1}, {0, 5, 4}, {0, 4, 2}, {0, 7, 5},\n        {1, 4, 2}, {1, 5, 4}, {1, 8, 5},\n        {2, 5, 4}, {2, 6, 5}};\n    // clang-format on\n    uint32_t num_tests = 9;\n    tsk_treeseq_t ts;\n\n    tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,\n        NULL, NULL, NULL, NULL, NULL, 0);\n    verify_sample_counts(&ts, num_tests, tests, 0);\n    verify_sample_counts(&ts, num_tests, tests, TSK_SEEK_SKIP);\n    verify_sample_sets(&ts);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_non_sample_leaf_sample_lists(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"0  0   0\\n\"\n                        \"1  2   0\\n\";\n    const char *edges = \"0 1  2 0,1\\n\";\n    const tsk_id_t left_sample[3] = { 0, -1, 1 };\n    const tsk_id_t right_sample[3] = { 0, -1, 0 };\n    const tsk_id_t next_sample[2] = { -1, 0 };\n    const tsk_id_t samples[2] = { 0, 2 };\n    const tsk_id_t sample_index_map[3] = { 0, -1, 1 };\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_id_t i;\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    for (i = 0; i < 3; i++) {\n        CU_ASSERT_EQUAL_FATAL(left_sample[i], t.left_sample[i]);\n        CU_ASSERT_EQUAL_FATAL(right_sample[i], t.right_sample[i]);\n        CU_ASSERT_EQUAL_FATAL(sample_index_map[i], ts.sample_index_map[i]);\n    }\n    for (i = 0; i < 2; i++) {\n        CU_ASSERT_EQUAL_FATAL(next_sample[i], t.next_sample[i]);\n        CU_ASSERT_EQUAL_FATAL(samples[i], t.samples[i]);\n    }\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_virtual_root_properties(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int depth;\n    double time, length;\n    tsk_id_t node;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_depth(&t, t.virtual_root, &depth), 0)\n    CU_ASSERT_EQUAL_FATAL(depth, -1);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_time(&t, t.virtual_root, &time), 0)\n    /* Workaround problems in IEEE floating point macros. We may want to\n     * add tsk_isinf (like tsk_isnan) at some point, but not worth it just\n     * for this test case */\n    CU_ASSERT_TRUE(isinf((float) time));\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&t, t.virtual_root, 0, &node), 0)\n    CU_ASSERT_EQUAL(node, t.virtual_root);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&t, 0, t.virtual_root, &node), 0)\n    CU_ASSERT_EQUAL(node, t.virtual_root);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, t.virtual_root, &node), 0)\n    CU_ASSERT_EQUAL(node, TSK_NULL);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_branch_length(&t, t.virtual_root, &length), 0)\n    CU_ASSERT_EQUAL(length, 0);\n\n    /* The definition of \"descendant\" is that node v is on the path from\n     * u to a root. Since there is no parent link from roots to the\n     * virtual_root, it's consistent with this definition to return false\n     * for every node. */\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&t, 0, t.virtual_root));\n    CU_ASSERT_FALSE(\n        tsk_tree_is_descendant(&t, t.left_child[t.virtual_root], t.virtual_root));\n    CU_ASSERT_FALSE(tsk_tree_is_descendant(&t, t.virtual_root, 0));\n    /* The virtual_root *is* a descendent of itself, though. This is\n     * consistent with other nodes that are not \"in\" the tree being\n     * descendents of themselves, despite not being roots in the tree. */\n    CU_ASSERT_TRUE(tsk_tree_is_descendant(&t, t.virtual_root, t.virtual_root));\n\n    CU_ASSERT_FALSE(tsk_tree_is_sample(&t, t.virtual_root));\n\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_no_sample_count_semantics(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_id_t nodes;\n    tsk_size_t n;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_NO_SAMPLE_COUNTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 0);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_preorder(&t, &nodes, &n), TSK_ERR_UNSUPPORTED_OPERATION);\n    CU_ASSERT_EQUAL(tsk_tree_postorder(&t, &nodes, &n), TSK_ERR_UNSUPPORTED_OPERATION);\n    CU_ASSERT_EQUAL(tsk_tree_preorder_samples_from(&t, -1, &nodes, &n),\n        TSK_ERR_UNSUPPORTED_OPERATION);\n\n    CU_ASSERT_EQUAL(tsk_tree_preorder_from(&t, t.virtual_root, &nodes, &n),\n        TSK_ERR_UNSUPPORTED_OPERATION);\n    CU_ASSERT_EQUAL(tsk_tree_postorder_from(&t, t.virtual_root, &nodes, &n),\n        TSK_ERR_UNSUPPORTED_OPERATION);\n    CU_ASSERT_EQUAL(tsk_tree_preorder_samples_from(&t, t.virtual_root, &nodes, &n),\n        TSK_ERR_UNSUPPORTED_OPERATION);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\n/*=======================================================\n * Tree traversals\n *=======================================================*/\n\nstatic void\nverify_node_lists(tsk_size_t n, tsk_id_t *l1, tsk_id_t *l2)\n{\n    tsk_size_t j;\n\n    for (j = 0; j < n; j++) {\n        /* printf(\"%d %d\\n\", l1[j], l2[j]); */\n        CU_ASSERT_EQUAL(l1[j], l2[j]);\n    }\n}\n\nstatic void\ntest_single_tree_traversal(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_size_t num_nodes = 7;\n    tsk_id_t preorder[] = { 6, 4, 0, 1, 5, 2, 3 };\n    tsk_id_t preorder_vr[] = { 7, 6, 4, 0, 1, 5, 2, 3 };\n    tsk_id_t preorder_samples[] = { 0, 1, 2, 3 };\n    tsk_id_t postorder[] = { 0, 1, 4, 2, 3, 5, 6 };\n    tsk_id_t postorder_vr[] = { 0, 1, 4, 2, 3, 5, 6, 7 };\n    tsk_id_t nodes[num_nodes + 1];\n    tsk_size_t n;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_preorder(&t, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, num_nodes);\n    verify_node_lists(n, nodes, preorder);\n\n    ret = tsk_tree_preorder_from(&t, -1, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, num_nodes);\n    verify_node_lists(n, nodes, preorder);\n\n    ret = tsk_tree_preorder_from(&t, t.virtual_root, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, num_nodes + 1);\n    verify_node_lists(n, nodes, preorder_vr);\n\n    ret = tsk_tree_preorder_samples_from(&t, -1, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 4);\n    verify_node_lists(n, nodes, preorder_samples);\n\n    ret = tsk_tree_preorder_samples_from(&t, t.virtual_root, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 4);\n    verify_node_lists(n, nodes, preorder_samples);\n\n    ret = tsk_tree_preorder_from(&t, 5, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 3);\n    verify_node_lists(n, nodes, preorder + 4);\n\n    ret = tsk_tree_preorder_samples_from(&t, 5, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n    verify_node_lists(n, nodes, preorder_samples + 2);\n\n    ret = tsk_tree_postorder(&t, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, num_nodes);\n    verify_node_lists(n, nodes, postorder);\n\n    ret = tsk_tree_postorder_from(&t, -1, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, num_nodes);\n    verify_node_lists(n, nodes, postorder);\n\n    ret = tsk_tree_postorder_from(&t, t.virtual_root, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, num_nodes + 1);\n    verify_node_lists(n, nodes, postorder_vr);\n\n    ret = tsk_tree_postorder_from(&t, 4, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 3);\n    verify_node_lists(n, nodes, postorder);\n\n    /* Check errors */\n    ret = tsk_tree_preorder_from(&t, -2, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_tree_preorder_from(&t, 8, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_tree_preorder_samples_from(&t, -2, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_tree_preorder_samples_from(&t, 8, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_tree_postorder_from(&t, -2, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_tree_postorder_from(&t, 8, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\n/* printed out in tree order.\n0.90┊             ┊         11  ┊             ┊\n    ┊             ┊         ┏┻┓ ┊             ┊\n0.80┊         10  ┊         ┃ ┃ ┊             ┊\n    ┊         ┏┻┓ ┊         ┃ ┃ ┊             ┊\n0.40┊     9   ┃ ┃ ┊    9    ┃ ┃ ┊     9       ┊\n    ┊   ┏━┻┓  ┃ ┃ ┊  ┏━┻━┓  ┃ ┃ ┊   ┏━┻━━┓    ┊\n0.30┊   ┃  ┃  ┃ ┃ ┊  ┃   8  ┃ ┃ ┊   ┃    8    ┊\n    ┊   ┃  ┃  ┃ ┃ ┊  ┃  ┏┻┓ ┃ ┃ ┊   ┃   ┏┻┓   ┊\n0.20┊   ┃  7  ┃ ┃ ┊  7  ┃ ┃ ┃ ┃ ┊   7   ┃ ┃   ┊\n    ┊   ┃ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┃ ┃ ┊ ┏━┻┓  ┃ ┃   ┊\n0.10┊   ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃  6  ┃ ┃   ┊\n    ┊   ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┃   ┊\n0.00┊ 5 2 3 4 0 1 ┊ 3 4 1 2 0 5 ┊ 4 0 3 1 2 5 ┊\n    0             4             8            10\n*/\n\nstatic void\ntest_multiroot_tree_traversal(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n\n    tsk_tree_t t;\n    tsk_id_t preorder[] = { 5, 9, 2, 7, 3, 4, 10, 0, 1 };\n    tsk_id_t preorder_vr[] = { 12, 5, 9, 2, 7, 3, 4, 10, 0, 1 };\n    tsk_id_t preorder_samples[] = { 5, 2, 3, 4, 0, 1 };\n    tsk_id_t postorder[] = { 5, 2, 3, 4, 7, 9, 0, 1, 10 };\n    tsk_id_t postorder_vr[] = { 5, 2, 3, 4, 7, 9, 0, 1, 10, 12 };\n    tsk_id_t nodes[13];\n    tsk_size_t n;\n\n    tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_preorder(&t, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 9);\n    verify_node_lists(n, nodes, preorder);\n\n    ret = tsk_tree_preorder_from(&t, -1, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 9);\n    verify_node_lists(n, nodes, preorder);\n\n    ret = tsk_tree_preorder_from(&t, t.virtual_root, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 10);\n    verify_node_lists(n, nodes, preorder_vr);\n\n    ret = tsk_tree_preorder_from(&t, 10, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 3);\n    verify_node_lists(n, nodes, preorder + 6);\n\n    ret = tsk_tree_preorder_samples_from(&t, -1, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 6);\n    verify_node_lists(n, nodes, preorder_samples);\n\n    ret = tsk_tree_preorder_samples_from(&t, t.virtual_root, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 6);\n    verify_node_lists(n, nodes, preorder_samples);\n\n    ret = tsk_tree_preorder_samples_from(&t, 5, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    verify_node_lists(n, nodes, preorder_samples);\n\n    ret = tsk_tree_preorder_samples_from(&t, 10, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 2);\n    verify_node_lists(n, nodes, preorder_samples + 4);\n\n    ret = tsk_tree_postorder(&t, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 9);\n    verify_node_lists(n, nodes, postorder);\n\n    ret = tsk_tree_postorder_from(&t, -1, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 9);\n    verify_node_lists(n, nodes, postorder);\n\n    ret = tsk_tree_postorder_from(&t, t.virtual_root, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 10);\n    verify_node_lists(n, nodes, postorder_vr);\n\n    ret = tsk_tree_postorder_from(&t, 10, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 3);\n    verify_node_lists(n, nodes, postorder + 6);\n\n    /* Nodes that aren't \"in\" the tree have singleton traversal lists and\n     * connect to no samples */\n\n    ret = tsk_tree_preorder_from(&t, 11, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    CU_ASSERT_EQUAL_FATAL(nodes[0], 11);\n\n    ret = tsk_tree_postorder_from(&t, 11, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 1);\n    CU_ASSERT_EQUAL_FATAL(nodes[0], 11);\n\n    ret = tsk_tree_preorder_samples_from(&t, 11, nodes, &n);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(n, 0);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\nverify_seek_multi_tree(tsk_flags_t seek_options)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    double breakpoints[] = { 0, 2, 7, 10 };\n    tsk_id_t num_trees = 3;\n    tsk_id_t j, k;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,\n        paper_ex_individuals, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < num_trees; j++) {\n        ret = tsk_tree_seek(&t, breakpoints[j], seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(t.index, j);\n        ret = tsk_tree_seek_index(&t, j, seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(t.index, j);\n        for (k = 0; k < num_trees; k++) {\n            ret = tsk_tree_seek(&t, breakpoints[k], seek_options);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(t.index, k);\n            ret = tsk_tree_seek_index(&t, k, seek_options);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            CU_ASSERT_EQUAL_FATAL(t.index, k);\n        }\n    }\n\n    ret = tsk_tree_seek(&t, 1.99999, seek_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t.index, 0);\n    ret = tsk_tree_seek(&t, 6.99999, seek_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t.index, 1);\n    ret = tsk_tree_seek(&t, 9.99999, seek_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(t.index, 2);\n\n    tsk_tree_free(&t);\n\n    /* Seek to all positions from a new tree. */\n    for (j = 0; j < num_trees; j++) {\n        ret = tsk_tree_init(&t, &ts, 0);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_seek(&t, breakpoints[j], seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(t.index, j);\n        tsk_tree_free(&t);\n    }\n\n    /* Seek to all positions from a non-new tree in the null state*/\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    for (j = 0; j < num_trees; j++) {\n        ret = tsk_tree_seek(&t, 0, seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_prev(&t);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(t.index, -1);\n        ret = tsk_tree_seek(&t, breakpoints[j], seek_options);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL_FATAL(t.index, j);\n    }\n    tsk_tree_free(&t);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_seek_multi_tree(void)\n{\n    verify_seek_multi_tree(0);\n    verify_seek_multi_tree(TSK_SEEK_SKIP);\n}\n\nstatic void\ntest_seek_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,\n        paper_ex_individuals, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_seek(&t, -1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);\n    ret = tsk_tree_seek(&t, 10, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);\n    ret = tsk_tree_seek(&t, 11, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);\n    ret = tsk_tree_seek_index(&t, (tsk_id_t) ts.num_trees, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);\n    ret = tsk_tree_seek_index(&t, -1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n}\n\n/*=======================================================\n * KC Distance tests.\n *=======================================================*/\n\nstatic void\ntest_isolated_node_kc(void)\n{\n    const char *single_leaf = \"1 0 0\";\n    const char *single_internal = \"0 0 0\";\n    const char *edges = \"\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, single_leaf, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n\n    tsk_treeseq_from_text(\n        &ts, 1, single_internal, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_single_tree_kc(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t, other_t;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 1, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_init(&other_t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&other_t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    check_trees_identical(&t, &other_t);\n\n    ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    ret = tsk_tree_kc_distance(&t, &other_t, 1, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n    tsk_tree_free(&other_t);\n}\n\nstatic void\ntest_two_trees_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *nodes_other = \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"0  4   0\\n\"\n                              \"0  6   0\\n\";\n    const char *edges = \"0 1  3 0,1\\n\"\n                        \"0 1  4 2,3\\n\";\n    int ret;\n    tsk_treeseq_t ts, other_ts;\n    tsk_tree_t t, other_t;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    tsk_treeseq_from_text(\n        &other_ts, 1, nodes_other, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_kc_distance(&ts, &other_ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &other_ts, 1, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 4.243, 1e-2);\n\n    ret = tsk_tree_init(&other_t, &other_ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&other_t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    ret = tsk_tree_kc_distance(&t, &other_t, 1, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 4.243, 1e-2);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&other_ts);\n    tsk_tree_free(&t);\n    tsk_tree_free(&other_t);\n}\n\nstatic void\ntest_empty_tree_kc(void)\n{\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_tree_t t;\n    tsk_id_t v;\n    int ret;\n    double result = 0;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);\n    tsk_treeseq_free(&ts);\n    tables.sequence_length = NAN;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);\n    tsk_treeseq_free(&ts);\n    tables.sequence_length = INFINITY;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);\n    tsk_treeseq_free(&ts);\n    tables.sequence_length = 1.0;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    verify_empty_tree_sequence(&ts, 1.0);\n\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(t.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(t.parent[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.left_child[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.right_child[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.left_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.right_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_nonbinary_tree_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   4   0,1,2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_nonzero_samples_kc(void)\n{\n    const char *nodes = \"0  0   0\\n\" /* unused node at the start */\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   3   1,2\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_internal_samples_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  1   0\";\n    const char *edges = \"0  1   2   0,1\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    /* Permitted in tree sequences */\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0.0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_non_sample_leaf_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"0  0   0\\n\"\n                        \"0  1   0\\n\";\n    const char *edges = \"0 1  2 0,1\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0.0);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0.0);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_unequal_sample_size_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *nodes_other = \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"0  1   0\\n\";\n    const char *edges = \"0 1  3 0,1\\n\"\n                        \"0 1  4 2,3\\n\";\n    const char *edges_other = \"0 1  2 0,1\\n\";\n    int ret;\n    tsk_treeseq_t ts, other_ts;\n    tsk_tree_t t, other_t;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    tsk_treeseq_from_text(\n        &other_ts, 1, nodes_other, edges_other, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_kc_distance(&ts, &other_ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLE_SIZE_MISMATCH);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_init(&other_t, &other_ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&other_t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLE_SIZE_MISMATCH);\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&other_ts);\n    tsk_tree_free(&t);\n    tsk_tree_free(&other_t);\n}\n\nstatic void\ntest_unequal_samples_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *nodes_other = \"0  0   0\\n\" /* Unused node at the start */\n                              \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"0  2   0\\n\"\n                              \"0  3   0\\n\";\n    const char *edges = \"0 1  3 0,1\\n\"\n                        \"0 1  4 2,3\\n\";\n    const char *edges_other = \"0 1  4 1,2\\n\"\n                              \"0 1  5 3,4\\n\";\n    int ret;\n    tsk_treeseq_t ts, other_ts;\n    tsk_tree_t t, other_t;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    tsk_treeseq_from_text(\n        &other_ts, 1, nodes_other, edges_other, NULL, NULL, NULL, NULL, NULL, 0);\n\n    ret = tsk_treeseq_kc_distance(&ts, &other_ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLES_NOT_EQUAL);\n\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_init(&other_t, &other_ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&other_t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLES_NOT_EQUAL);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&other_ts);\n    tsk_tree_free(&t);\n    tsk_tree_free(&other_t);\n}\n\nstatic void\ntest_unary_nodes_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\";\n    const char *edges = \"0  1   2   0,1\\n\"\n                        \"0  1   3   2\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_kc_distance(&t, &t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_no_sample_lists_kc(void)\n{\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    int ret = 0;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    ret = tsk_tree_kc_distance(&t, &t, 9, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NO_SAMPLE_LISTS);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_unequal_sequence_lengths_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\";\n    const char *edges_1 = \"0 1  3 0,1\\n\"\n                          \"0 1  4 2,3\\n\";\n    const char *edges_2 = \"0 2  3 0,1\\n\"\n                          \"0 2  4 2,3\\n\";\n\n    tsk_treeseq_t ts, other;\n    int ret;\n    double result = 0;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges_1, NULL, NULL, NULL, NULL, NULL, 0);\n    tsk_treeseq_from_text(&other, 2, nodes, edges_2, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &other, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEQUENCE_LENGTH_MISMATCH);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&other);\n}\n\nstatic void\ntest_different_number_trees_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\"\n                        \"0  4   0\\n\"\n                        \"0  5   0\\n\";\n    const char *edges = \"0 10  5 0,1\\n\"\n                        \"0 10  6 3,4\\n\"\n                        \"5 10  7 2,5\\n\"\n                        \"0 5   8 2\\n\"\n                        \"0 10  8 6\\n\"\n                        \"5 10  8 7\\n\"\n                        \"0 5   9 5,8\\n\";\n\n    const char *other_nodes = \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"1  0   0\\n\"\n                              \"0  1   0\\n\"\n                              \"0  2   0\\n\"\n                              \"0  3   0\\n\"\n                              \"0  4   0\\n\";\n    const char *other_edges = \"0 10  5 0,1\\n\"\n                              \"0 10  6 2,3\\n\"\n                              \"0 10  7 4,5\\n\"\n                              \"0 10  8 6,7\\n\";\n    tsk_treeseq_t ts, other;\n    double result, expected;\n    int ret = 0;\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    tsk_treeseq_from_text(\n        &other, 10, other_nodes, other_edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_treeseq_kc_distance(&ts, &other, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    expected = (sqrt(8.0) * 5.0 + sqrt(6.0) * 5.0) / 10.0;\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(result, expected, 1e-2);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&other);\n}\n\nstatic void\ntest_offset_trees_with_errors_kc(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  2   0\\n\"\n                        \"0  3   0\\n\"\n                        \"0  4   0\\n\";\n    const char *edges = \"0 10  4 0,1\\n\"\n                        \"0 10  5 2,3\\n\"\n                        \"0 10  6 4,5\\n\";\n    tsk_treeseq_t ts, other;\n    double result;\n    int ret = 0;\n\n    tsk_treeseq_from_text(\n        &ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);\n    tsk_treeseq_from_text(&other, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 10);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&other), 10);\n\n    ret = tsk_treeseq_kc_distance(&ts, &other, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);\n\n    ret = tsk_treeseq_kc_distance(&other, &ts, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&other);\n}\n\n/*=======================================================\n * Miscellaneous tests.\n *======================================================*/\n\nstatic void\ntest_genealogical_nearest_neighbours_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    const tsk_id_t *reference_sets[2];\n    tsk_id_t reference_set_0[4], reference_set_1[4];\n    tsk_id_t focal[] = { 0, 1, 2, 3 };\n    tsk_size_t reference_set_size[2];\n    tsk_size_t num_focal = 4;\n    double *A = tsk_malloc(2 * num_focal * sizeof(double));\n    CU_ASSERT_FATAL(A != NULL);\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 0, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, INT16_MAX, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n\n    /* Overlapping sample sets */\n    reference_sets[0] = focal;\n    reference_set_size[0] = 1;\n    reference_sets[1] = focal;\n    reference_set_size[1] = num_focal;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    /* bad values in the sample sets */\n    reference_set_0[0] = 0;\n    reference_set_0[1] = 1;\n    reference_set_1[0] = 2;\n    reference_set_1[1] = 3;\n    reference_set_size[0] = 2;\n    reference_set_size[1] = 2;\n    reference_sets[0] = reference_set_0;\n    reference_sets[1] = reference_set_1;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    reference_set_0[0] = -1;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    reference_set_0[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    reference_set_0[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts) + 1;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    /* Duplicate values in the focal sets */\n    reference_set_0[0] = 1;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n    reference_set_0[0] = 3;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    /* Bad sample ID */\n    reference_sets[0] = focal;\n    reference_set_size[0] = 1;\n    reference_sets[1] = focal + 1;\n    reference_set_size[1] = num_focal - 1;\n    focal[0] = -1;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    focal[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    focal[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts) + 100;\n    ret = tsk_treeseq_genealogical_nearest_neighbours(\n        &ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n\n    tsk_treeseq_free(&ts);\n    free(A);\n}\n\nstatic void\ntest_single_tree_balance(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_size_t sackin, colless;\n    double b1, b2;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    /* Balanced binary tree with 4 leaves */\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);\n    CU_ASSERT_EQUAL(sackin, 8);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, &colless), 0);\n    CU_ASSERT_EQUAL(colless, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);\n    CU_ASSERT_DOUBLE_EQUAL(b1, 2, 1e-8);\n    /* Test different bases for b2_index to high-precision */\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, &b2), 0);\n    CU_ASSERT_DOUBLE_EQUAL(b2, 0.6020599913279623, 1e-14);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 2, &b2), 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(b2, 2, 1e-16);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 3, &b2), 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(b2, 1.2618595071429148, 1e-14);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_multiroot_balance(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_size_t sackin;\n    double b1;\n\n    tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    /* 0.80┊         10 */\n    /*     ┊         ┏┻┓ */\n    /* 0.40┊     9   ┃ ┃ */\n    /*     ┊   ┏━┻┓  ┃ ┃ */\n    /* 0.30┊   ┃  ┃  ┃ ┃ */\n    /*     ┊   ┃  ┃  ┃ ┃ */\n    /* 0.20┊   ┃  7  ┃ ┃ */\n    /*     ┊   ┃ ┏┻┓ ┃ ┃ */\n    /* 0.10┊   ┃ ┃ ┃ ┃ ┃ */\n    /*     ┊   ┃ ┃ ┃ ┃ ┃ */\n    /* 0.00┊ 5 2 3 4 0 1 */\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);\n    CU_ASSERT_EQUAL(sackin, 7);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, NULL), TSK_ERR_UNDEFINED_MULTIROOT);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);\n    CU_ASSERT_DOUBLE_EQUAL(b1, 1.0, 1e-8);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, NULL), TSK_ERR_UNDEFINED_MULTIROOT);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_nonbinary_balance(void)\n{\n    int ret;\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"1  0   0\\n\"\n                        \"0  1   0\";\n    const char *edges = \"0  1   4   0,1,2,3\\n\";\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_size_t sackin, colless;\n    double b1, b2;\n\n    tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    /* Star tree with 4 leaves */\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);\n    CU_ASSERT_EQUAL(sackin, 4);\n    CU_ASSERT_EQUAL_FATAL(\n        tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_NONBINARY);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(b1, 0, 1e-8);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, &b2), 0);\n    CU_ASSERT_DOUBLE_EQUAL_FATAL(b1, 0, 1e-8);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_empty_tree_balance(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    tsk_size_t sackin, colless;\n    double b1, b2;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1.0;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);\n    CU_ASSERT_EQUAL(sackin, 0);\n    /* Technically wrong here because we have 0 roots, but not worth worrying about */\n    CU_ASSERT_EQUAL_FATAL(\n        tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_MULTIROOT);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);\n    CU_ASSERT_EQUAL(b1, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, &b2), TSK_ERR_UNDEFINED_MULTIROOT);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_b2_bad_base(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_tree_t t;\n    double result;\n    double bad_base[] = { -2, -1, 1 };\n    size_t j;\n\n    tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,\n        NULL, NULL, NULL, 0);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    for (j = 0; j < sizeof(bad_base) / sizeof(*bad_base); j++) {\n        ret = tsk_tree_b2_index(&t, bad_base[j], &result);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_FALSE(tsk_isfinite(result));\n    }\n    CU_ASSERT_FATAL(j > 0);\n\n    /* this one is peculiar, in that base 0 seems to give a finite answer */\n    ret = tsk_tree_b2_index(&t, 0, &result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(result, 0);\n\n    tsk_treeseq_free(&ts);\n    tsk_tree_free(&t);\n}\n\nstatic void\ntest_tree_errors(void)\n{\n    int ret;\n    tsk_size_t j;\n    tsk_id_t num_nodes = 9;\n    tsk_id_t u;\n    tsk_node_t node;\n    tsk_treeseq_t ts, other_ts;\n    tsk_tree_t t, other_t;\n    tsk_id_t bad_nodes[] = { num_nodes + 1, num_nodes + 2, -1 };\n    tsk_id_t tracked_samples[] = { 0, 0, 0 };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,\n        paper_ex_individuals, NULL, 0);\n\n    ret = tsk_tree_init(&t, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n\n    /* Out-of-bounds queries */\n    for (j = 0; j < sizeof(bad_nodes) / sizeof(tsk_id_t); j++) {\n        u = bad_nodes[j];\n        ret = tsk_tree_get_parent(&t, u, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_tree_get_time(&t, u, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_tree_get_branch_length(&t, u, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_tree_get_mrca(&t, u, 0, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_tree_get_mrca(&t, 0, u, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_tree_get_num_samples(&t, u, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        ret = tsk_tree_get_num_tracked_samples(&t, u, NULL);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        /* Also check tree sequence methods */\n        ret = tsk_treeseq_get_node(&ts, (tsk_id_t) u, &node);\n        CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n        CU_ASSERT(!tsk_treeseq_is_sample(&ts, u));\n        CU_ASSERT(!tsk_tree_is_sample(&t, u));\n    }\n\n    tracked_samples[0] = 0;\n    tracked_samples[1] = (tsk_id_t) tsk_treeseq_get_num_samples(&ts);\n    ret = tsk_tree_set_tracked_samples(&t, 2, tracked_samples);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SAMPLES);\n    tracked_samples[1] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);\n    ret = tsk_tree_set_tracked_samples(&t, 2, tracked_samples);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tracked_samples[1] = 0;\n    ret = tsk_tree_set_tracked_samples(&t, 2, tracked_samples);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_DUPLICATE_SAMPLE);\n\n    tsk_treeseq_from_text(&other_ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL,\n        NULL, paper_ex_individuals, NULL, 0);\n\n    ret = tsk_tree_init(&other_t, &other_ts, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);\n    tsk_tree_free(&t);\n    tsk_tree_free(&other_t);\n\n    ret = tsk_tree_init(&t, &other_ts, TSK_NO_SAMPLE_COUNTS);\n    CU_ASSERT_EQUAL(ret, 0);\n    ret = tsk_tree_copy(&t, &other_t, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);\n    tsk_tree_free(&other_t);\n    ret = tsk_tree_copy(&t, &other_t, TSK_SAMPLE_LISTS);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);\n    tsk_tree_free(&other_t);\n\n    tsk_tree_free(&t);\n    tsk_treeseq_free(&other_ts);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_treeseq_row_access_errors(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_get_individual(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_node(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_edge(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_migration(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_site(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_mutation(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_population(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n    ret = tsk_treeseq_get_provenance(&ts, 0, NULL);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_treeseq_get_individuals_population_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_id_t output[2];\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret_id = tsk_treeseq_get_individuals_population(&ts, output);\n    CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_treeseq_get_individuals_population(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    int j;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_id_t output[4];\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    for (j = 0; j < 2; j++) {\n        ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n        CU_ASSERT_EQUAL_FATAL(ret_id, (tsk_id_t) j);\n    }\n    for (j = 0; j < 4; j++) {\n        ret_id = tsk_individual_table_add_row(\n            &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n        CU_ASSERT_EQUAL_FATAL(ret_id, (tsk_id_t) j);\n    }\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.0, 1, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 4);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_get_individuals_population(&ts, output);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(output[0], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(output[1], 0);\n    CU_ASSERT_EQUAL_FATAL(output[2], TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(output[3], 1);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_treeseq_get_individuals_time_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    double output[2];\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_individual_table_add_row(\n        &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.2, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.8, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_get_individuals_time(&ts, output);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_TIME_MISMATCH);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_treeseq_get_individuals_time(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    int j;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    double output[4];\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    for (j = 0; j < 2; j++) {\n        ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n    for (j = 0; j < 4; j++) {\n        ret_id = tsk_individual_table_add_row(\n            &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.25, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.0, 1, 3, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.25, 0, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 4);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_get_individuals_time(&ts, output);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL_FATAL(output[0], 3.25);\n    CU_ASSERT_EQUAL_FATAL(output[1], 1.25);\n    CU_ASSERT_FATAL(tsk_is_unknown_time(output[2]));\n    CU_ASSERT_EQUAL_FATAL(output[3], 3.0);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_tree_copy_flags(void)\n{\n    int iret, ret;\n    tsk_size_t j;\n    tsk_treeseq_t ts;\n    tsk_tree_t t, other_t;\n    tsk_flags_t options[] = { 0, TSK_NO_SAMPLE_COUNTS, TSK_SAMPLE_LISTS,\n        TSK_NO_SAMPLE_COUNTS | TSK_SAMPLE_LISTS };\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,\n        paper_ex_individuals, NULL, 0);\n\n    for (j = 0; j < sizeof(options) / sizeof(*options); j++) {\n        ret = tsk_tree_init(&t, &ts, options[j]);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_init(&other_t, &ts, options[j]);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        check_trees_identical(&t, &other_t);\n        tsk_tree_free(&other_t);\n\n        while ((iret = tsk_tree_next(&t)) == TSK_TREE_OK) {\n            ret = tsk_tree_copy(&t, &other_t, options[j]);\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            check_trees_identical(&t, &other_t);\n            tsk_tree_free(&other_t);\n        }\n        CU_ASSERT_EQUAL_FATAL(iret, 0);\n\n        ret = tsk_tree_first(&t);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n        ret = tsk_tree_copy(&t, &other_t, options[j]);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        while (true) {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            check_trees_identical(&t, &other_t);\n            CU_ASSERT_EQUAL_FATAL(tsk_tree_next(&t), tsk_tree_next(&other_t));\n            if (t.index == -1) {\n                break;\n            }\n        }\n\n        ret = tsk_tree_last(&t);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n        ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT | options[j]);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        while (true) {\n            CU_ASSERT_EQUAL_FATAL(ret, 0);\n            check_trees_identical(&t, &other_t);\n            CU_ASSERT_EQUAL_FATAL(tsk_tree_prev(&t), tsk_tree_prev(&other_t));\n            if (t.index == -1) {\n                break;\n            }\n        }\n\n        tsk_tree_free(&other_t);\n        tsk_tree_free(&t);\n    }\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_deduplicate_sites(void)\n{\n    int ret;\n    // Modified from paper_ex\n    const char *tidy_sites = \"1      0\\n\"\n                             \"4.5    0\\n\"\n                             \"8.5    0\\n\";\n    const char *tidy_mutations = \"0      2   1\\n\"\n                                 \"0      1   2\\n\"\n                                 \"0      6   3\\n\"\n                                 \"0      3   4\\n\"\n                                 \"1      0   1\\n\"\n                                 \"1      2   2\\n\"\n                                 \"1      4   3\\n\"\n                                 \"1      5   4\\n\"\n                                 \"2      5   1\\n\"\n                                 \"2      7   2\\n\"\n                                 \"2      1   3\\n\"\n                                 \"2      0   4\\n\";\n    const char *messy_sites = \"1      0\\n\"\n                              \"1      0\\n\"\n                              \"1      0\\n\"\n                              \"1      0\\n\"\n                              \"4.5    0\\n\"\n                              \"4.5    0\\n\"\n                              \"4.5    0\\n\"\n                              \"4.5    0\\n\"\n                              \"8.5    0\\n\"\n                              \"8.5    0\\n\"\n                              \"8.5    0\\n\"\n                              \"8.5    0\\n\";\n    const char *messy_mutations = \"0      2   1\\n\"\n                                  \"1      1   2\\n\"\n                                  \"2      6   3\\n\"\n                                  \"3      3   4\\n\"\n                                  \"4      0   1\\n\"\n                                  \"5      2   2\\n\"\n                                  \"6      4   3\\n\"\n                                  \"7      5   4\\n\"\n                                  \"8      5   1\\n\"\n                                  \"9      7   2\\n\"\n                                  \"10     1   3\\n\"\n                                  \"11     0   4\\n\";\n    tsk_table_collection_t tidy, messy;\n\n    ret = tsk_table_collection_init(&tidy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_init(&messy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    messy.sequence_length = 10;\n    tidy.sequence_length = 10;\n    parse_individuals(paper_ex_individuals, &tidy.individuals);\n    parse_nodes(paper_ex_nodes, &tidy.nodes);\n    parse_sites(tidy_sites, &tidy.sites);\n    parse_mutations(tidy_mutations, &tidy.mutations);\n    // test cleaning doesn't mess up the tidy one\n    parse_individuals(paper_ex_individuals, &messy.individuals);\n    parse_nodes(paper_ex_nodes, &messy.nodes);\n    parse_sites(tidy_sites, &messy.sites);\n    parse_mutations(tidy_mutations, &messy.mutations);\n\n    ret = tsk_table_collection_deduplicate_sites(&messy, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations, 0));\n\n    tsk_site_table_clear(&messy.sites);\n    tsk_mutation_table_clear(&messy.mutations);\n\n    // test with the actual messy one\n    parse_sites(messy_sites, &messy.sites);\n    parse_mutations(messy_mutations, &messy.mutations);\n\n    ret = tsk_table_collection_deduplicate_sites(&messy, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites, 0));\n    CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations, 0));\n\n    tsk_table_collection_free(&tidy);\n    tsk_table_collection_free(&messy);\n}\n\nstatic void\ntest_deduplicate_sites_errors(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 10;\n    ret_id = tsk_site_table_add_row(&tables.sites, 2, \"A\", 1, \"m\", 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 2, \"TT\", 2, \"MM\", 2);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, 0, \"T\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* Negative position */\n    tables.sites.position[0] = -1;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);\n    tables.sites.position[0] = 2;\n\n    /* unsorted position */\n    tables.sites.position[1] = 0.5;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);\n    tables.sites.position[1] = 2;\n\n    /* negative site ID */\n    tables.mutations.site[0] = -1;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tables.mutations.site[0] = 0;\n\n    /* site ID out of bounds */\n    tables.mutations.site[0] = 2;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);\n    tables.mutations.site[0] = 0;\n\n    /* Bad offset in metadata */\n    tables.sites.metadata_offset[0] = 2;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    tables.sites.metadata_offset[0] = 0;\n\n    /* Bad length in metadata */\n    tables.sites.metadata_offset[2] = 100;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    tables.sites.metadata_offset[2] = 3;\n\n    /* Bad offset in ancestral_state */\n    tables.sites.ancestral_state_offset[0] = 2;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    tables.sites.ancestral_state_offset[0] = 0;\n\n    /* Bad length in ancestral_state */\n    tables.sites.ancestral_state_offset[2] = 100;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);\n    tables.sites.ancestral_state_offset[2] = 3;\n\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_deduplicate_sites_zero_rows(void)\n{\n\n    int ret;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL(ret, 0);\n    CU_ASSERT_EQUAL(tables.sites.num_rows, 0)\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_deduplicate_sites_multichar(void)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 10;\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, \"AA\", 1, \"M\", 1);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_site_table_add_row(&tables.sites, 0, \"0\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_site_table_add_row(&tables.sites, 1, \"BBBBB\", 5, \"NNNNN\", 5);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 2);\n    ret_id = tsk_site_table_add_row(&tables.sites, 1, \"0\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 3);\n\n    ret = tsk_table_collection_deduplicate_sites(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 2);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.position[0], 0);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.position[1], 1);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.ancestral_state[0], 'A');\n    CU_ASSERT_EQUAL_FATAL(tables.sites.ancestral_state_offset[1], 1);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.metadata[0], 'M');\n    CU_ASSERT_EQUAL_FATAL(tables.sites.metadata_offset[1], 1);\n\n    CU_ASSERT_NSTRING_EQUAL(tables.sites.ancestral_state + 1, \"BBBBB\", 5);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.ancestral_state_offset[2], 6);\n    CU_ASSERT_NSTRING_EQUAL(tables.sites.metadata + 1, \"NNNNN\", 5);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.metadata_offset[2], 6);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_empty_tree_sequence(void)\n{\n    tsk_treeseq_t ts;\n    tsk_table_collection_t tables;\n    tsk_tree_t t;\n    tsk_id_t v;\n    int ret;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);\n    tsk_treeseq_free(&ts);\n    tables.sequence_length = 1.0;\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    verify_empty_tree_sequence(&ts, 1.0);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(t.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(t.num_edges, 0);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 0, &v), 0);\n    CU_ASSERT_EQUAL_FATAL(v, TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_tree_free(&t);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_last(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);\n    CU_ASSERT_EQUAL_FATAL(t.interval.left, 0);\n    CU_ASSERT_EQUAL_FATAL(t.interval.right, 1);\n    CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);\n    tsk_tree_free(&t);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_zero_edges(void)\n{\n    const char *nodes = \"1  0   0\\n\"\n                        \"1  0   0\\n\";\n    const char *edges = \"\";\n    const char *sites = \"0.1  0\\n\"\n                        \"0.2  0\\n\";\n    const char *mutations = \"0    0     1\\n\"\n                            \"1    1     1\\n\";\n    tsk_treeseq_t ts, tss;\n    tsk_tree_t t;\n    tsk_id_t samples, node_map;\n    const tsk_id_t z = TSK_NULL;\n    tsk_id_t parents[] = {\n        z,\n        z,\n    };\n    int ret;\n\n    tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 2.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);\n    tsk_treeseq_print_state(&ts, _devnull);\n\n    verify_trees(&ts, 1, parents);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_first(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(t.interval.left, 0);\n    CU_ASSERT_EQUAL(t.interval.right, 2);\n    CU_ASSERT_EQUAL(t.num_edges, 0);\n    CU_ASSERT_EQUAL(t.parent[0], TSK_NULL);\n    CU_ASSERT_EQUAL(t.parent[1], TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);\n    CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL(t.right_sib[0], 1);\n    tsk_tree_print_state(&t, _devnull);\n    tsk_tree_free(&t);\n\n    ret = tsk_tree_init(&t, &ts, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_tree_last(&t);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);\n    CU_ASSERT_EQUAL(t.interval.left, 0);\n    CU_ASSERT_EQUAL(t.interval.right, 2);\n    CU_ASSERT_EQUAL(t.parent[0], TSK_NULL);\n    CU_ASSERT_EQUAL(t.parent[1], TSK_NULL);\n    CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);\n    CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL);\n    CU_ASSERT_EQUAL(t.right_sib[0], 1);\n    tsk_tree_print_state(&t, _devnull);\n    tsk_tree_free(&t);\n\n    /* We give pointers ot samples and node_map here as they must be non null */\n    ret = tsk_treeseq_simplify(&ts, &samples, 0, 0, &tss, &node_map);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&tss), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&tss), 2.0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&tss), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&tss), 2);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&tss), 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&tss), 1);\n    tsk_treeseq_print_state(&ts, _devnull);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&tss);\n}\n\nstatic void\ntest_tree_sequence_metadata(void)\n{\n    int ret;\n    tsk_table_collection_t tc;\n    tsk_treeseq_t ts;\n\n    char example_metadata[100] = \"An example of metadata with unicode 🎄🌳🌴🌲🎋\";\n    char example_metadata_schema[100]\n        = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\";\n    char example_time_units[100] = \"An example of time units ⏰\";\n    tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);\n    tsk_size_t example_time_units_length = (tsk_size_t) strlen(example_metadata_schema);\n    tsk_size_t example_metadata_schema_length = (tsk_size_t) strlen(example_time_units);\n\n    ret = tsk_table_collection_init(&tc, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tc.sequence_length = 1.0;\n    ret = tsk_table_collection_build_index(&tc, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata(\n        &tc, example_metadata, example_metadata_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_metadata_schema(\n        &tc, example_metadata_schema, example_metadata_schema_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_set_time_units(\n        &tc, example_time_units, example_time_units_length);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_init(&ts, &tc, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_EQUAL(tsk_treeseq_get_metadata_length(&ts), example_metadata_length);\n    CU_ASSERT_EQUAL(\n        tsk_treeseq_get_metadata_schema_length(&ts), example_metadata_schema_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(tsk_treeseq_get_metadata(&ts), example_metadata,\n                        example_metadata_length),\n        0);\n    CU_ASSERT_EQUAL(tsk_memcmp(tsk_treeseq_get_metadata_schema(&ts),\n                        example_metadata_schema, example_metadata_schema_length),\n        0);\n\n    CU_ASSERT_EQUAL(tsk_treeseq_get_time_units_length(&ts), example_time_units_length);\n    CU_ASSERT_EQUAL(tsk_memcmp(tsk_treeseq_get_time_units(&ts), example_time_units,\n                        example_time_units_length),\n        0);\n\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tc);\n}\n\nstatic int\ndummy_stat(tsk_size_t K, const double *X, tsk_size_t M, double *Y, void *params)\n{\n    tsk_size_t k;\n    CU_ASSERT_FATAL(M == K);\n    CU_ASSERT_FATAL(params == NULL);\n\n    for (k = 0; k < K; k++) {\n        Y[k] = X[k];\n    }\n    return 0;\n}\n\nstatic void\ntest_time_uncalibrated(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    tsk_treeseq_t ts2;\n    tsk_size_t sample_set_sizes[] = { 2, 2 };\n    tsk_id_t samples[] = { 0, 1, 2, 3 };\n    tsk_size_t num_samples;\n    double result[100];\n    double *W;\n    double *sigma;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ts.time_uncalibrated, false);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_table_collection_set_time_units(\n        &tables, TSK_TIME_UNITS_UNCALIBRATED, strlen(TSK_TIME_UNITS_UNCALIBRATED));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ts.time_uncalibrated, true);\n    tsk_treeseq_free(&ts);\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_table_collection_set_time_units(\n        ts.tables, TSK_TIME_UNITS_UNCALIBRATED, strlen(TSK_TIME_UNITS_UNCALIBRATED));\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts2, ts.tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        &ts2, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_SITE, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_allele_frequency_spectrum(\n        &ts2, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);\n    ret = tsk_treeseq_allele_frequency_spectrum(&ts2, 2, sample_set_sizes, samples, 0,\n        NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    sigma = tsk_calloc(tsk_treeseq_get_num_nodes(&ts2), sizeof(double));\n    num_samples = tsk_treeseq_get_num_samples(&ts2);\n    W = tsk_calloc(num_samples, sizeof(double));\n\n    ret = tsk_treeseq_general_stat(&ts2, 1, W, 1, dummy_stat, NULL,\n        tsk_treeseq_get_num_trees(&ts2), tsk_treeseq_get_breakpoints(&ts2),\n        TSK_STAT_SITE, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_general_stat(&ts2, 1, W, 1, dummy_stat, NULL,\n        tsk_treeseq_get_num_trees(&ts2), tsk_treeseq_get_breakpoints(&ts2),\n        TSK_STAT_BRANCH, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);\n    ret = tsk_treeseq_general_stat(&ts2, 1, W, 1, dummy_stat, NULL,\n        tsk_treeseq_get_num_trees(&ts2), tsk_treeseq_get_breakpoints(&ts2),\n        TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, sigma);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_divergence_matrix(\n        &ts2, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);\n    ret = tsk_treeseq_divergence_matrix(&ts2, 0, NULL, NULL, 0, NULL,\n        TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_safe_free(W);\n    tsk_safe_free(sigma);\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&ts2);\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_reference_sequence(void)\n{\n    int ret;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_FALSE(tsk_treeseq_has_reference_sequence(&ts));\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_reference_sequence_set_data(&tables.reference_sequence, \"abc\", 3);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_treeseq_has_reference_sequence(&ts));\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nstatic void\ntest_split_edges_no_populations(void)\n{\n    int ret;\n    tsk_treeseq_t ts, split_ts;\n    tsk_table_collection_t tables;\n    tsk_id_t new_nodes[] = { 9, 10, 11 };\n    tsk_size_t num_new_nodes = 3;\n    const char *metadata = \"some metadata\";\n    tsk_size_t j;\n    tsk_node_t node;\n    double time = 0.09;\n    tsk_id_t ret_id;\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n\n    ret_id = tsk_table_collection_copy(ts.tables, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    tsk_treeseq_free(&ts);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_treeseq_init(&ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* NOTE: haven't worked out the exact IDs on the branches here, just\n     * for illustration.\n\n    0.25┊     8   ┊         ┊         ┊\n        ┊   ┏━┻━┓ ┊         ┊         ┊\n    0.20┊   ┃   ┃ ┊         ┊   7     ┊\n        ┊   ┃   ┃ ┊         ┊ ┏━┻━┓   ┊\n    0.17┊   6   ┃ ┊   6     ┊ ┃   ┃   ┊\n        ┊ ┏━┻┓  ┃ ┊ ┏━┻━┓   ┊ ┃   ┃   ┊\n    0.09┊ 9  5  10┊ 9   5   ┊ 11  5   ┊\n        ┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓  ┊ ┃ ┏━┻┓  ┊\n    0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃  4  ┊ ┃ ┃  4  ┊\n        ┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊\n    0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n      0.00      2.00      7.00      10.00\n    */\n    ret = tsk_treeseq_split_edges(\n        &ts, time, 1234, 0, metadata, strlen(metadata), 0, &split_ts);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&split_ts), 3);\n    CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&split_ts), 12);\n\n    for (j = 0; j < num_new_nodes; j++) {\n        ret = tsk_treeseq_get_node(&split_ts, new_nodes[j], &node);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(node.time, time);\n        CU_ASSERT_EQUAL(node.flags, 1234);\n        CU_ASSERT_EQUAL(node.individual, TSK_NULL);\n        CU_ASSERT_EQUAL(node.population, 0);\n        CU_ASSERT_EQUAL(node.metadata_length, strlen(metadata));\n        CU_ASSERT_EQUAL(strncmp(node.metadata, metadata, strlen(metadata)), 0);\n    }\n    tsk_treeseq_free(&split_ts);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_split_edges_populations(void)\n{\n    int ret;\n    tsk_treeseq_t ts, split_ts;\n    tsk_table_collection_t tables;\n    double time = 0.5;\n    tsk_node_t node;\n    tsk_id_t valid_pops[] = { -1, 0, 1 };\n    tsk_id_t num_valid_pops = 3;\n    tsk_id_t j, population, ret_id;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, 1, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 1, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    for (j = 0; j < num_valid_pops; j++) {\n        population = valid_pops[j];\n        ret = tsk_treeseq_split_edges(&ts, time, 0, population, NULL, 0, 0, &split_ts);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&split_ts), 1);\n        CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&split_ts), 3);\n        ret = tsk_treeseq_get_node(&split_ts, 2, &node);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        CU_ASSERT_EQUAL(node.population, population);\n        tsk_treeseq_free(&split_ts);\n    }\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_split_edges_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts, split_ts;\n    tsk_table_collection_t tables;\n    double time = 0.5;\n    tsk_id_t invalid_pops[] = { -2, 2, 3 };\n    tsk_id_t num_invalid_pops = 3;\n    tsk_id_t j, population, ret_id;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = 1;\n\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, 1, TSK_NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 1);\n    ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 1, 0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_split_edges(\n        &ts, TSK_UNKNOWN_TIME, 0, TSK_NULL, NULL, 0, 0, &split_ts);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);\n\n    for (j = 0; j < num_invalid_pops; j++) {\n        population = invalid_pops[j];\n        ret = tsk_treeseq_split_edges(&ts, time, 0, population, NULL, 0, 0, &split_ts);\n        CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n        tsk_treeseq_free(&split_ts);\n    }\n    tsk_treeseq_free(&ts);\n\n    ret_id\n        = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_split_edges(&ts, time, 0, population, NULL, 0, 0, &split_ts);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n    tsk_treeseq_free(&split_ts);\n\n    tsk_table_collection_free(&tables);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_extend_haplotypes_simple(void)\n{\n    int ret;\n    tsk_treeseq_t ts, ets;\n    const char *nodes = \"1  0       -1   -1\\n\"\n                        \"1  0       -1   -1\\n\"\n                        \"0  2.0     -1   -1\\n\";\n    const char *edges = \"0  10   2   0\\n\"\n                        \"0  10   2   1\\n\";\n    const char *sites = \"0.0  0\\n\"\n                        \"1.0  0\\n\";\n    const char *mutations = \"0    0     1  -1  0.5\\n\"\n                            \"1    1     1  -1  0.5\\n\";\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_extend_haplotypes(&ts, 10, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE_FATAL(tsk_table_collection_equals(ts.tables, ets.tables, 0));\n    tsk_treeseq_free(&ts);\n\n    tsk_treeseq_free(&ets);\n}\n\nstatic void\ntest_extend_haplotypes_errors(void)\n{\n    int ret;\n    tsk_treeseq_t ts, ets;\n    const char *nodes = \"1  0       -1   -1\\n\"\n                        \"1  0       -1   -1\\n\"\n                        \"0  2.0     -1   -1\\n\";\n    const char *edges = \"0  10   2   0\\n\"\n                        \"0  10   2   1\\n\";\n    const char *sites = \"0.0  0\\n\"\n                        \"1.0  0\\n\";\n    const char *mutations = \"0    0     1  -1   0.5\\n\"\n                            \"1    1     1  -1   0.5\\n\";\n    const char *mutations_no_time = \"0    0     1  -1\\n\"\n                                    \"1    1     1  -1\\n\";\n    // left, right, node source, dest, time\n    const char *migrations = \"0  10  0  0  1  0.5\\n\"\n                             \"0  10  0  1  0  1.5\\n\";\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_extend_haplotypes(&ts, -2, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EXTEND_EDGES_BAD_MAXITER);\n    tsk_treeseq_free(&ts);\n\n    tsk_treeseq_from_text(\n        &ts, 10, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);\n    ret = tsk_treeseq_extend_haplotypes(&ts, 10, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n    tsk_treeseq_free(&ts);\n\n    tsk_treeseq_from_text(\n        &ts, 10, nodes, edges, NULL, sites, mutations_no_time, NULL, NULL, 0);\n    ret = tsk_treeseq_extend_haplotypes(&ts, 10, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME);\n    tsk_treeseq_free(&ts);\n\n    tsk_treeseq_free(&ets);\n}\n\nstatic void\nassert_equal_except_edges_and_mutation_nodes(\n    const tsk_treeseq_t *ts1, const tsk_treeseq_t *ts2)\n{\n    tsk_table_collection_t t1, t2;\n    int ret;\n\n    ret = tsk_table_collection_copy(ts1->tables, &t1, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_table_collection_copy(ts2->tables, &t2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tsk_memset(t1.mutations.node, 0, t1.mutations.num_rows * sizeof(*t1.mutations.node));\n    tsk_memset(t2.mutations.node, 0, t2.mutations.num_rows * sizeof(*t2.mutations.node));\n\n    tsk_edge_table_clear(&t1.edges);\n    tsk_edge_table_clear(&t2.edges);\n\n    CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));\n\n    tsk_table_collection_free(&t1);\n    tsk_table_collection_free(&t2);\n}\n\nstatic void\ntest_extend_haplotypes(void)\n{\n    int ret = 0;\n    int max_iter = 10;\n    tsk_treeseq_t ts, ets;\n    FILE *tmp = fopen(_tmp_file_name, \"w\");\n\n    /* 7 and 8 should be extended to the whole sequence;\n     * also 5 to the second tree (where x's are)\n\n       6          6      6         6\n     +-+-+      +-+-+  +-+-+     +-+-+\n     |   |      7   x  x   8     x   x\n     |   |     ++-+ |  | +-++    |   |\n     4   5     4  | x  4 |  5    4   5\n    +++ +++   +++ | |  | | +++  +++ +++\n    0 1 2 3   0 1 2 3  0 1 2 3  0 1 2 3\n    */\n\n    const char *nodes = \"1  0       -1   -1\\n\"\n                        \"1  0       -1   -1\\n\"\n                        \"1  0       -1   -1\\n\"\n                        \"1  0       -1   -1\\n\"\n                        \"0  1.0     -1   -1\\n\"\n                        \"0  1.0     -1   -1\\n\"\n                        \"0  3.0     -1   -1\\n\"\n                        \"0  2.0     -1   -1\\n\"\n                        \"0  2.0     -1   -1\\n\";\n    // l, r, p, c\n    const char *edges = \"0  10   4  0\\n\"\n                        \"0   5   4  1\\n\"\n                        \"7  10   4  1\\n\"\n                        \"0   2   5  2\\n\"\n                        \"5  10   5  2\\n\"\n                        \"0   2   5  3\\n\"\n                        \"5  10   5  3\\n\"\n                        \"2   5   7  2\\n\"\n                        \"2   5   7  4\\n\"\n                        \"5   7   8  1\\n\"\n                        \"5   7   8  5\\n\"\n                        \"2   5   6  3\\n\"\n                        \"0   2   6  4\\n\"\n                        \"5  10   6  4\\n\"\n                        \"0   2   6  5\\n\"\n                        \"7  10   6  5\\n\"\n                        \"2   5   6  7\\n\"\n                        \"5   7   6  8\\n\";\n    const char *sites = \"0.0  0\\n\"\n                        \"9.0  0\\n\";\n    const char *mutations = \"0    4     1  -1  2.5\\n\"\n                            \"0    4     2   0  1.5\\n\"\n                            \"1    6     3  -1  3.5\\n\"\n                            \"1    5     1   2  2.5\\n\"\n                            \"1    5     2   3  1.5\\n\";\n\n    tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n\n    for (max_iter = 1; max_iter < 10; max_iter++) {\n        ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);\n        CU_ASSERT_EQUAL_FATAL(ret, 0);\n        assert_equal_except_edges_and_mutation_nodes(&ts, &ets);\n        CU_ASSERT_TRUE(ets.tables->edges.num_rows >= 12);\n        tsk_treeseq_free(&ets);\n    }\n\n    ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL_FATAL(ets.tables->nodes.num_rows, 9);\n    CU_ASSERT_EQUAL_FATAL(ets.tables->edges.num_rows, 12);\n    assert_equal_except_edges_and_mutation_nodes(&ts, &ets);\n    tsk_treeseq_free(&ets);\n\n    tsk_set_debug_stream(tmp);\n    ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, TSK_DEBUG, &ets);\n    tsk_set_debug_stream(stdout);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(ftell(tmp) > 0);\n    tsk_treeseq_free(&ets);\n\n    fclose(tmp);\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_extend_haplotypes_conflicting_times(void)\n{\n    int ret;\n    int max_iter = 10;\n    tsk_treeseq_t ts, ets;\n    /*\n      3.00┊ 3 ┊ 4 ┊\n          ┊ ┃ ┊ ┃ ┊\n      2.00┊ ┃ ┊ 2 ┊\n          ┊ ┃ ┊ ┃ ┊\n      1.00┊ 1 ┊ ┃ ┊\n          ┊ ┃ ┊ ┃ ┊\n      0.00┊ 0 ┊ 0 ┊\n          0   2   4\n    */\n\n    const char *nodes = \"1  0.0     -1   -1\\n\"\n                        \"0  1.0     -1   -1\\n\"\n                        \"0  2.0     -1   -1\\n\"\n                        \"0  3.0     -1   -1\\n\"\n                        \"0  3.0     -1   -1\\n\";\n    // l, r, p, c\n    const char *edges = \"0.0   2.0   1  0\\n\"\n                        \"2.0   4.0   2  0\\n\"\n                        \"0.0   2.0   3  1\\n\"\n                        \"2.0   4.0   4  2\\n\";\n\n    tsk_treeseq_from_text(&ts, 4, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ts.tables->edges.num_rows, 4);\n\n    ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, ets.tables, 0));\n    tsk_treeseq_free(&ets);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_extend_haplotypes_new_edge(void)\n{\n    int ret;\n    int max_iter = 10;\n    tsk_treeseq_t ts, ets, ref_ts;\n    /*  This is an example where new edges are added\n     *  on both forwards and back passes\n      4.00┊   ┊ 4 ┊ 4 ┊ 4 ┊\n          ┊   ┊ ┃ ┊ ┃ ┊ ┃ ┊\n      3.00┊ 2 ┊ ┃ ┊ 2 ┊ 2 ┊\n          ┊ ┃ ┊ ┃ ┊ ┃ ┊ ┃ ┊\n      2.00┊ ┃ ┊ 3 ┊ ┃ ┊ 3 ┊\n          ┊ ┃ ┊ ┃ ┊ ┃ ┊ ┃ ┊\n      1.00┊ 1 ┊ ┃ ┊ ┃ ┊ ┃ ┊\n          ┊ ┃ ┊ ┃ ┊ ┃ ┊ ┃ ┊\n      0.00┊ 0 ┊ 0 ┊ 0 ┊ 0 ┊\n          0   2   4   6   8\n    */\n\n    const char *nodes = \"1  0.0     -1   -1\\n\"\n                        \"0  1.0     -1   -1\\n\"\n                        \"0  3.0     -1   -1\\n\"\n                        \"0  2.0     -1   -1\\n\"\n                        \"0  4.0     -1   -1\\n\";\n    // l, r, p, c\n    const char *edges = \"0.0   2.0   1  0\\n\"\n                        \"2.0   4.0   3  0\\n\"\n                        \"6.0   8.0   3  0\\n\"\n                        \"4.0   5.0   2  0\\n\"\n                        \"5.0   6.0   2  0\\n\"\n                        \"0.0   2.0   2  1\\n\"\n                        \"6.0   7.0   2  3\\n\"\n                        \"7.0   8.0   2  3\\n\"\n                        \"4.0   8.0   4  2\\n\"\n                        \"2.0   4.0   4  3\\n\";\n    const char *ext_edges = \"0.0   8.0   1  0\\n\"\n                            \"0.0   8.0   3  1\\n\"\n                            \"0.0   8.0   2  3\\n\"\n                            \"2.0   8.0   4  2\\n\";\n    const char *sites = \"3.0  0\\n\";\n    // s, n , ds, t\n    const char *mutations = \"0    4     5  -1  4.5\\n\"\n                            \"0    3     4   0  3.5\\n\"\n                            \"0    3     3   1  2.5\\n\"\n                            \"0    0     2   2  1.5\\n\"\n                            \"0    0     1   3  0.5\\n\";\n    const char *ext_mutations = \"0    4     5  -1  4.5\\n\"\n                                \"0    2     4   0  3.5\\n\"\n                                \"0    3     3   1  2.5\\n\"\n                                \"0    1     2   2  1.5\\n\"\n                                \"0    0     1   3  0.5\\n\";\n\n    tsk_treeseq_from_text(&ts, 8, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ts.tables->edges.num_rows, 10);\n    tsk_treeseq_from_text(\n        &ref_ts, 8, nodes, ext_edges, NULL, sites, ext_mutations, NULL, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(ref_ts.tables->edges.num_rows, 4);\n\n    ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    assert_equal_except_edges_and_mutation_nodes(&ts, &ets);\n    CU_ASSERT_TRUE(tsk_table_collection_equals(ets.tables, ref_ts.tables, 0));\n    tsk_treeseq_free(&ets);\n\n    tsk_treeseq_free(&ts);\n    tsk_treeseq_free(&ref_ts);\n}\n\nstatic void\ntest_init_take_ownership_no_edge_metadata(void)\n{\n    int ret;\n    tsk_treeseq_t ts;\n    tsk_table_collection_t *tables = tsk_malloc(sizeof(tsk_table_collection_t));\n\n    CU_ASSERT_NOT_EQUAL_FATAL(tables, NULL);\n\n    tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,\n        paper_ex_mutations, paper_ex_individuals, NULL, 0);\n    ret = tsk_treeseq_copy_tables(&ts, tables, TSK_TC_NO_EDGE_METADATA);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_treeseq_init(&ts, tables, TSK_TAKE_OWNERSHIP);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA);\n\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_init_compute_mutation_parents(void)\n{\n    int ret;\n    tsk_table_collection_t *tables, *tables2;\n    tsk_treeseq_t ts;\n    const char *sites = \"0       0\\n\";\n    /* Make a mutation on a parallel branch the parent*/\n    const char *bad_mutations = \"0   0  1  -1\\n\"\n                                \"0   1  1  0\\n\";\n\n    tables = tsk_malloc(sizeof(tsk_table_collection_t));\n    CU_ASSERT_NOT_EQUAL_FATAL(tables, NULL);\n    tables2 = tsk_malloc(sizeof(tsk_table_collection_t));\n    CU_ASSERT_NOT_EQUAL_FATAL(tables2, NULL);\n\n    CU_ASSERT_FATAL(tables != NULL);\n    ret = tsk_table_collection_init(tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables->sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables->nodes);\n    CU_ASSERT_EQUAL_FATAL(tables->nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables->edges);\n    CU_ASSERT_EQUAL_FATAL(tables->edges.num_rows, 6);\n    parse_sites(sites, &tables->sites);\n    CU_ASSERT_EQUAL_FATAL(tables->sites.num_rows, 1);\n    parse_mutations(bad_mutations, &tables->mutations);\n    CU_ASSERT_EQUAL_FATAL(tables->mutations.num_rows, 2);\n    tables->sequence_length = 1.0;\n    ret = tsk_table_collection_copy(tables, tables2, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    ret = tsk_treeseq_init(&ts, tables, TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_MUTATION_PARENT);\n    tsk_treeseq_free(&ts);\n\n    ret = tsk_treeseq_init(\n        &ts, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_treeseq_free(&ts);\n\n    /* When we use take ownership, the check of parents shouldn't overwrite them*/\n    ret = tsk_treeseq_init(&ts, tables, TSK_TAKE_OWNERSHIP | TSK_TS_INIT_BUILD_INDEXES);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_MUTATION_PARENT);\n    CU_ASSERT_EQUAL(tables->mutations.parent[0], -1);\n    CU_ASSERT_EQUAL(tables->mutations.parent[1], 0);\n    tsk_treeseq_free(&ts);\n\n    /* When we use take ownership and compute, the tables are overwritten*/\n    ret = tsk_treeseq_init(&ts, tables2,\n        TSK_TAKE_OWNERSHIP | TSK_TS_INIT_BUILD_INDEXES\n            | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    CU_ASSERT_EQUAL(tables2->mutations.parent[0], -1);\n    CU_ASSERT_EQUAL(tables2->mutations.parent[1], -1);\n\n    /* Don't need to free tables as we took ownership */\n    tsk_treeseq_free(&ts);\n}\n\nstatic void\ntest_init_compute_mutation_parents_errors(void)\n{\n    int ret;\n    tsk_id_t row_ret;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    const char *sites = \"0.5       0\\n\"\n                        \"0         0\\n\";\n\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    tables.sequence_length = 1;\n    parse_nodes(single_tree_ex_nodes, &tables.nodes);\n    CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);\n    parse_edges(single_tree_ex_edges, &tables.edges);\n    CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);\n    parse_sites(sites, &tables.sites);\n    CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 2);\n    tables.sequence_length = 1.0;\n\n    ret = tsk_treeseq_init(\n        &ts, &tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);\n    tsk_treeseq_free(&ts);\n\n    tsk_site_table_clear(&tables.sites);\n    row_ret = tsk_site_table_add_row(&tables.sites, 0.5, \"A\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(row_ret, 0);\n    row_ret = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, \"A\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(row_ret, 0);\n    row_ret = tsk_mutation_table_add_row(\n        &tables.mutations, 0, 4, TSK_NULL, TSK_UNKNOWN_TIME, \"A\", 1, NULL, 0);\n    CU_ASSERT_EQUAL_FATAL(row_ret, 1);\n\n    ret = tsk_treeseq_init(\n        &ts, &tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);\n    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n    tsk_treeseq_free(&ts);\n\n    tsk_table_collection_free(&tables);\n}\n\nint\nmain(int argc, char **argv)\n{\n    CU_TestInfo tests[] = {\n        /* simplest example tests */\n        { \"test_simplest_discrete_genome\", test_simplest_discrete_genome },\n        { \"test_simplest_discrete_time\", test_simplest_discrete_time },\n        { \"test_simplest_min_time\", test_simplest_min_time },\n        { \"test_simplest_max_time\", test_simplest_max_time },\n        { \"test_simplest_records\", test_simplest_records },\n        { \"test_simplest_nonbinary_records\", test_simplest_nonbinary_records },\n        { \"test_simplest_unary_records\", test_simplest_unary_records },\n        { \"test_simplest_unary_with_individuals\", test_simplest_unary_with_individuals },\n        { \"test_simplest_non_sample_leaf_records\",\n            test_simplest_non_sample_leaf_records },\n        { \"test_simplest_degenerate_multiple_root_records\",\n            test_simplest_degenerate_multiple_root_records },\n        { \"test_simplest_multiple_root_records\", test_simplest_multiple_root_records },\n        { \"test_simplest_zero_root_tree\", test_simplest_zero_root_tree },\n        { \"test_simplest_multi_root_tree\", test_simplest_multi_root_tree },\n        { \"test_simplest_tree_mrca\", test_simplest_tree_mrca },\n        { \"test_simplest_root_mutations\", test_simplest_root_mutations },\n        { \"test_simplest_back_mutations\", test_simplest_back_mutations },\n        { \"test_simplest_general_samples\", test_simplest_general_samples },\n        { \"test_simplest_holey_tree_sequence\", test_simplest_holey_tree_sequence },\n        { \"test_simplest_holey_tsk_treeseq_zero_roots\",\n            test_simplest_holey_tsk_treeseq_zero_roots },\n        { \"test_simplest_holey_tsk_treeseq_mutation_parents\",\n            test_simplest_holey_tsk_treeseq_mutation_parents },\n        { \"test_simplest_initial_gap_tree_sequence\",\n            test_simplest_initial_gap_tree_sequence },\n        { \"test_simplest_initial_gap_zero_roots\", test_simplest_initial_gap_zero_roots },\n        { \"test_simplest_initial_gap_tsk_treeseq_mutation_parents\",\n            test_simplest_initial_gap_tsk_treeseq_mutation_parents },\n        { \"test_simplest_final_gap_tree_sequence\",\n            test_simplest_final_gap_tree_sequence },\n        { \"test_simplest_final_gap_tsk_treeseq_mutation_parents\",\n            test_simplest_final_gap_tsk_treeseq_mutation_parents },\n        { \"test_simplest_individuals\", test_simplest_individuals },\n        { \"test_simplest_bad_individuals\", test_simplest_bad_individuals },\n        { \"test_simplest_bad_edges\", test_simplest_bad_edges },\n        { \"test_simplest_bad_indexes\", test_simplest_bad_indexes },\n        { \"test_simplest_bad_migrations\", test_simplest_bad_migrations },\n        { \"test_simplest_migration_simplify\", test_simplest_migration_simplify },\n        { \"test_simplest_overlapping_parents\", test_simplest_overlapping_parents },\n        { \"test_simplest_contradictory_children\", test_simplest_contradictory_children },\n        { \"test_simplest_overlapping_edges_simplify\",\n            test_simplest_overlapping_edges_simplify },\n        { \"test_simplest_overlapping_unary_edges_simplify\",\n            test_simplest_overlapping_unary_edges_simplify },\n        { \"test_simplest_overlapping_unary_edges_internal_samples_simplify\",\n            test_simplest_overlapping_unary_edges_internal_samples_simplify },\n        { \"test_simplest_reduce_site_topology\", test_simplest_reduce_site_topology },\n        { \"test_simplest_simplify_defragment\", test_simplest_simplify_defragment },\n        { \"test_simplest_population_filter\", test_simplest_population_filter },\n        { \"test_simplest_individual_filter\", test_simplest_individual_filter },\n        { \"test_simplest_no_node_filter\", test_simplest_no_node_filter },\n        { \"test_simplest_no_update_flags\", test_simplest_no_update_flags },\n        { \"test_simplest_map_mutations\", test_simplest_map_mutations },\n        { \"test_simplest_nonbinary_map_mutations\",\n            test_simplest_nonbinary_map_mutations },\n        { \"test_simplest_unary_map_mutations\", test_simplest_unary_map_mutations },\n        { \"test_simplest_non_sample_leaf_map_mutations\",\n            test_simplest_non_sample_leaf_map_mutations },\n        { \"test_simplest_internal_sample_map_mutations\",\n            test_simplest_internal_sample_map_mutations },\n        { \"test_simplest_multiple_root_map_mutations\",\n            test_simplest_multiple_root_map_mutations },\n        { \"test_simplest_chained_map_mutations\", test_simplest_chained_map_mutations },\n        { \"test_simplest_mutation_edges\", test_simplest_mutation_edges },\n\n        /* Single tree tests */\n        { \"test_single_tree_good_records\", test_single_tree_good_records },\n        { \"test_single_nonbinary_tree_good_records\",\n            test_single_nonbinary_tree_good_records },\n        { \"test_single_tree_bad_records\", test_single_tree_bad_records },\n        { \"test_single_tree_good_mutations\", test_single_tree_good_mutations },\n        { \"test_single_tree_bad_mutations\", test_single_tree_bad_mutations },\n        { \"test_single_tree_iter\", test_single_tree_iter },\n        { \"test_single_tree_general_samples_iter\",\n            test_single_tree_general_samples_iter },\n        { \"test_single_nonbinary_tree_iter\", test_single_nonbinary_tree_iter },\n        { \"test_single_tree_iter_times\", test_single_tree_iter_times },\n        { \"test_single_tree_iter_depths\", test_single_tree_iter_depths },\n        { \"test_single_tree_simplify\", test_single_tree_simplify },\n        { \"test_single_tree_simplify_debug\", test_single_tree_simplify_debug },\n        { \"test_single_tree_simplify_keep_input_roots\",\n            test_single_tree_simplify_keep_input_roots },\n        { \"test_single_tree_simplify_no_sample_nodes\",\n            test_single_tree_simplify_no_sample_nodes },\n        { \"test_single_tree_simplify_null_samples\",\n            test_single_tree_simplify_null_samples },\n        { \"test_single_tree_compute_mutation_parents\",\n            test_single_tree_compute_mutation_parents },\n        { \"test_single_tree_compute_mutation_times\",\n            test_single_tree_compute_mutation_times },\n        { \"test_single_tree_mutation_edges\", test_single_tree_mutation_edges },\n        { \"test_single_tree_is_descendant\", test_single_tree_is_descendant },\n        { \"test_single_tree_total_branch_length\", test_single_tree_total_branch_length },\n        { \"test_single_tree_num_lineages\", test_single_tree_num_lineages },\n        { \"test_single_tree_map_mutations\", test_single_tree_map_mutations },\n        { \"test_single_tree_map_mutations_internal_samples\",\n            test_single_tree_map_mutations_internal_samples },\n        { \"test_single_tree_tracked_samples\", test_single_tree_tracked_samples },\n        { \"test_single_tree_tree_pos\", test_single_tree_tree_pos },\n\n        /* Multi tree tests */\n        { \"test_simple_multi_tree\", test_simple_multi_tree },\n        { \"test_multi_tree_direction_switching_tree_pos\",\n            test_multi_tree_direction_switching_tree_pos },\n        { \"test_nonbinary_multi_tree\", test_nonbinary_multi_tree },\n        { \"test_unary_multi_tree\", test_unary_multi_tree },\n        { \"test_internal_sample_multi_tree\", test_internal_sample_multi_tree },\n        { \"test_internal_sample_simplified_multi_tree\",\n            test_internal_sample_simplified_multi_tree },\n        { \"test_simplify_keep_input_roots_multi_tree\",\n            test_simplify_keep_input_roots_multi_tree },\n        { \"test_left_to_right_multi_tree\", test_left_to_right_multi_tree },\n        { \"test_gappy_multi_tree\", test_gappy_multi_tree },\n        { \"test_convenience_arrays_multi_tree\", test_convenience_arrays_multi_tree },\n\n        { \"test_tsk_treeseq_bad_records\", test_tsk_treeseq_bad_records },\n\n        /* multiroot tests */\n        { \"test_multiroot_mrca\", test_multiroot_mrca },\n\n        /* Sample sets */\n        { \"test_simple_sample_sets\", test_simple_sample_sets },\n        { \"test_nonbinary_sample_sets\", test_nonbinary_sample_sets },\n        { \"test_internal_sample_sample_sets\", test_internal_sample_sample_sets },\n        { \"test_non_sample_leaf_sample_lists\", test_non_sample_leaf_sample_lists },\n\n        { \"test_no_sample_count_semantics\", test_no_sample_count_semantics },\n        { \"test_virtual_root_properties\", test_virtual_root_properties },\n\n        /* tree traversal orders */\n        { \"test_single_tree_traversal\", test_single_tree_traversal },\n        { \"test_multiroot_tree_traversal\", test_multiroot_tree_traversal },\n\n        /* Seek */\n        { \"test_seek_multi_tree\", test_seek_multi_tree },\n        { \"test_seek_errors\", test_seek_errors },\n\n        /* KC distance tests */\n        { \"test_single_tree_kc\", test_single_tree_kc },\n        { \"test_isolated_node_kc\", test_isolated_node_kc },\n        { \"test_two_trees_kc\", test_two_trees_kc },\n        { \"test_empty_tree_kc\", test_empty_tree_kc },\n        { \"test_nonbinary_tree_kc\", test_nonbinary_tree_kc },\n        { \"test_nonzero_samples_kc\", test_nonzero_samples_kc },\n        { \"test_internal_samples_kc\", test_internal_samples_kc },\n        { \"test_non_sample_leaf_kc\", test_non_sample_leaf_kc },\n        { \"test_unequal_sample_size_kc\", test_unequal_sample_size_kc },\n        { \"test_unequal_samples_kc\", test_unequal_samples_kc },\n        { \"test_unary_nodes_kc\", test_unary_nodes_kc },\n        { \"test_no_sample_lists_kc\", test_no_sample_lists_kc },\n        { \"test_unequal_sequence_lengths_kc\", test_unequal_sequence_lengths_kc },\n        { \"test_different_number_trees_kc\", test_different_number_trees_kc },\n        { \"test_offset_trees_with_errors_kc\", test_offset_trees_with_errors_kc },\n\n        /* Tree balance/imbalance index tests */\n        { \"test_single_tree_balance\", test_single_tree_balance },\n        { \"test_multiroot_balance\", test_multiroot_balance },\n        { \"test_nonbinary_balance\", test_nonbinary_balance },\n        { \"test_empty_tree_balance\", test_empty_tree_balance },\n        { \"test_b2_bad_base\", test_b2_bad_base },\n\n        /* Misc */\n        { \"test_tree_errors\", test_tree_errors },\n        { \"test_treeseq_row_access_errors\", test_treeseq_row_access_errors },\n        { \"test_treeseq_get_individuals_population_errors\",\n            test_treeseq_get_individuals_population_errors },\n        { \"test_treeseq_get_individuals_population\",\n            test_treeseq_get_individuals_population },\n        { \"test_treeseq_get_individuals_time_errors\",\n            test_treeseq_get_individuals_time_errors },\n        { \"test_treeseq_get_individuals_time\", test_treeseq_get_individuals_time },\n        { \"test_tree_copy_flags\", test_tree_copy_flags },\n        { \"test_genealogical_nearest_neighbours_errors\",\n            test_genealogical_nearest_neighbours_errors },\n        { \"test_deduplicate_sites\", test_deduplicate_sites },\n        { \"test_deduplicate_sites_errors\", test_deduplicate_sites_errors },\n        { \"test_deduplicate_sites_zero_rows\", test_deduplicate_sites_zero_rows },\n        { \"test_deduplicate_sites_multichar\", test_deduplicate_sites_multichar },\n        { \"test_empty_tree_sequence\", test_empty_tree_sequence },\n        { \"test_zero_edges\", test_zero_edges },\n        { \"test_tree_sequence_metadata\", test_tree_sequence_metadata },\n        { \"test_time_uncalibrated\", test_time_uncalibrated },\n        { \"test_reference_sequence\", test_reference_sequence },\n        { \"test_split_edges_no_populations\", test_split_edges_no_populations },\n        { \"test_split_edges_populations\", test_split_edges_populations },\n        { \"test_split_edges_errors\", test_split_edges_errors },\n        { \"test_extend_haplotypes_simple\", test_extend_haplotypes_simple },\n        { \"test_extend_haplotypes_errors\", test_extend_haplotypes_errors },\n        { \"test_extend_haplotypes\", test_extend_haplotypes },\n        { \"test_extend_haplotypes_new_edge\", test_extend_haplotypes_new_edge },\n        { \"test_extend_haplotypes_conflicting_times\",\n            test_extend_haplotypes_conflicting_times },\n        { \"test_init_take_ownership_no_edge_metadata\",\n            test_init_take_ownership_no_edge_metadata },\n        { \"test_init_compute_mutation_parents\", test_init_compute_mutation_parents },\n        { \"test_init_compute_mutation_parents_errors\",\n            test_init_compute_mutation_parents_errors },\n        { NULL, NULL },\n    };\n\n    return test_main(tests, argc, argv);\n}\n"
  },
  {
    "path": "c/tests/testlib.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include \"testlib.h\"\n\nchar *_tmp_file_name;\nFILE *_devnull;\n\n/* Simple single tree example. */\nconst char *single_tree_ex_nodes = /*          6          */\n    \"1  0   -1   -1\\n\"             /*         / \\         */\n    \"1  0   -1   -1\\n\"             /*        /   \\        */\n    \"1  0   -1   -1\\n\"             /*       /     \\       */\n    \"1  0   -1   -1\\n\"             /*      /       5      */\n    \"0  1   -1   -1\\n\"             /*     4       / \\     */\n    \"0  2   -1   -1\\n\"             /*    / \\     /   \\    */\n    \"0  3   -1   -1\\n\";            /*   0   1   2     3   */\nconst char *single_tree_ex_edges = \"0  1   4   0,1\\n\"\n                                   \"0  1   5   2,3\\n\"\n                                   \"0  1   6   4,5\\n\";\nconst char *single_tree_ex_sites = \"0.125  0\\n\"\n                                   \"0.25   0\\n\"\n                                   \"0.5    0\\n\";\n/* site, node, derived_state, [parent, time] */\nconst char *single_tree_ex_mutations\n    = \"0    2     1   -1\\n\"\n      \"1    4     1   -1\\n\"\n      \"1    0     0   1\\n\"  /* Back mutation over 0 */\n      \"2    0     1   -1\\n\" /* recurrent mutations over samples */\n      \"2    1     1   -1\\n\"\n      \"2    2     1   -1\\n\"\n      \"2    3     1   -1\\n\";\n\n/*** Example from the PLOS paper ***/\n/*\n0.25┊     8   ┊         ┊         ┊\n    ┊   ┏━┻━┓ ┊         ┊         ┊\n0.20┊   ┃   ┃ ┊         ┊   7     ┊\n    ┊   ┃   ┃ ┊         ┊ ┏━┻━┓   ┊\n0.17┊   6   ┃ ┊   6     ┊ ┃   ┃   ┊\n    ┊ ┏━┻┓  ┃ ┊ ┏━┻━┓   ┊ ┃   ┃   ┊\n0.09┊ ┃  5  ┃ ┊ ┃   5   ┊ ┃   5   ┊\n    ┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓  ┊ ┃ ┏━┻┓  ┊\n0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃  4  ┊ ┃ ┃  4  ┊\n    ┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊\n0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n  0.00      2.00      7.00      10.00\n*/\nconst char *paper_ex_nodes = \"1  0       -1   0\\n\"\n                             \"1  0       -1   0\\n\"\n                             \"1  0       -1   1\\n\"\n                             \"1  0       -1   1\\n\"\n                             \"0  0.071   -1   -1\\n\"\n                             \"0  0.090   -1   -1\\n\"\n                             \"0  0.170   -1   -1\\n\"\n                             \"0  0.202   -1   -1\\n\"\n                             \"0  0.253   -1   -1\\n\";\nconst char *paper_ex_edges = \"2 10 4 2\\n\"\n                             \"2 10 4 3\\n\"\n                             \"0 10 5 1\\n\"\n                             \"0 2  5 3\\n\"\n                             \"2 10 5 4\\n\"\n                             \"0 7  6 0,5\\n\"\n                             \"7 10 7 0,5\\n\"\n                             \"0 2  8 2,6\\n\";\n/* We make one mutation for each tree */\nconst char *paper_ex_sites = \"1      0\\n\"\n                             \"4.5    0\\n\"\n                             \"8.5    0\\n\";\nconst char *paper_ex_mutations = \"0      2   1\\n\"\n                                 \"1      0   1\\n\"\n                                 \"2      5   1\\n\";\n/* Two (diploid) individuals */\nconst char *paper_ex_individuals = \"0      0.2,1.5    -1,-1\\n\"\n                                   \"0      0.0,0.0    -1,-1\\n\";\n\n/*** An example of a nonbinary tree sequence ***/\n/*\n0.41┊         12      ┊          12     ┊\n    ┊       ┏━━┻━━┓   ┊         ┏━┻━━┓  ┊\n0.28┊       ┃     ┃   ┊        11    ┃  ┊\n    ┊       ┃     ┃   ┊       ┏━┻━┓  ┃  ┊\n0.13┊       ┃    10   ┊       ┃   ┃ 10  ┊\n    ┊       ┃   ┏━╋━┓ ┊       ┃   ┃ ┏┻┓ ┊\n0.07┊       9   ┃ ┃ ┃ ┊       9   ┃ ┃ ┃ ┊\n    ┊    ┏━━┻━┓ ┃ ┃ ┃ ┊    ┏━━┻━┓ ┃ ┃ ┃ ┊\n0.01┊    8    ┃ ┃ ┃ ┃ ┊    8    ┃ ┃ ┃ ┃ ┊\n    ┊ ┏━┳┻┳━┓ ┃ ┃ ┃ ┃ ┊ ┏━┳┻┳━┓ ┃ ┃ ┃ ┃ ┊\n0.00┊ 0 1 2 3 6 4 5 7 ┊ 0 1 2 3 6 5 4 7 ┊\n    0                17                100\n\n*/\nconst char *nonbinary_ex_nodes = \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"1  0       0   -1\\n\"\n                                 \"0  0.01    0   -1\\n\"\n                                 \"0  0.068   0   -1\\n\"\n                                 \"0  0.130   0   -1\\n\"\n                                 \"0  0.279   0   -1\\n\"\n                                 \"0  0.405   0   -1\\n\";\nconst char *nonbinary_ex_edges = \"0\t100\t8\t0,1,2,3\\n\"\n                                 \"0\t100\t9\t6,8\\n\"\n                                 \"0  100 10  4\\n\"\n                                 \"0  17  10  5\\n\"\n                                 \"0  100 10  7\\n\"\n                                 \"17\t100\t11\t5,9\\n\"\n                                 \"0\t17\t12\t9\\n\"\n                                 \"0  100 12  10\\n\"\n                                 \"17\t100\t12\t11\";\nconst char *nonbinary_ex_sites = \"1  0\\n\"\n                                 \"18 0\\n\";\nconst char *nonbinary_ex_mutations = \"0    2   1\\n\"\n                                     \"1    11  1\";\n\n/*** An example of a tree sequence with unary nodes\n *   and also a non-sample leaf (node 9). ***/\n/*\n0.25┊      8    ┊   8       ┊           ┊         ┊\n    ┊   ┏━━┻━━┓ ┊   ┃       ┊           ┊         ┊\n0.20┊   ┃     7 ┊   ┃       ┊   7       ┊         ┊\n    ┊   ┃     ┃ ┊   ┃       ┊ ┏━┻━━┓    ┊         ┊\n0.17┊   6     ┃ ┊   6       ┊ ┃    ┃    ┊         ┊\n    ┊ ┏━┻━┓   ┃ ┊ ┏━┻━━┓    ┊ ┃    ┃    ┊         ┊\n0.09┊ ┃   5   ┃ ┊ ┃    5    ┊ ┃    5    ┊         ┊\n    ┊ ┃ ┏━╋━┓ ┃ ┊ ┃ ┏━━╋━━┓ ┊ ┃ ┏━━╋━━┓ ┊         ┊\n0.07┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃  4  ┃ ┊ ┃ ┃  4  ┃ ┊         ┊\n    ┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┃ ┊ ┃ ┃ ┏┻┓ ┃ ┊         ┊\n0.00┊ 0 1 3 9 2 ┊ 0 1 2 3 9 ┊ 0 1 2 3 9 ┊ 0 1 2 3 ┊\n    0           2           7          10        100\n*/\nconst char *unary_ex_nodes = \"1  0       0  -1\\n\"\n                             \"1  0       0  -1\\n\"\n                             \"1  0       0  -1\\n\"\n                             \"1  0       0  -1\\n\"\n                             \"0  0.071   0  -1\\n\"\n                             \"0  0.090   0  -1\\n\"\n                             \"0  0.170   0  -1\\n\"\n                             \"0  0.202   0  -1\\n\"\n                             \"0  0.253   0  -1\\n\"\n                             \"0  0       0  -1\\n\";\nconst char *unary_ex_edges = \"2 10 4 2,3\\n\"\n                             \"0 10 5 1\\n\"\n                             \"0 2  5 3\\n\"\n                             \"2 10 5 4\\n\"\n                             \"0 10 5 9\\n\"\n                             \"0 7  6 0,5\\n\"\n                             \"7 10 7 0\\n\"\n                             \"0 2  7 2\\n\"\n                             \"7 10 7 5\\n\"\n                             \"0 7  8 6\\n\"\n                             \"0 2  8 7\\n\";\n\n/* We make one mutation for each tree, over unary nodes if they exist */\nconst char *unary_ex_sites = \"1.0    0\\n\"\n                             \"4.5    0\\n\"\n                             \"8.5    0\\n\";\nconst char *unary_ex_mutations = \"0    2   1\\n\"\n                                 \"1    6   1\\n\"\n                                 \"1    9   0\\n\"\n                                 \"2    5   1\\n\";\n\n/* An example of a simple tree sequence with multiple marginal trees. */\n\n/* Simple single tree example. */\nconst char *multiple_tree_ex_nodes = /*                                    */\n    \"1  0   -1   -1\\n\"               /*         6        |                */\n    \"1  0   -1   -1\\n\"               /*        / \\       |                 */\n    \"1  0   -1   -1\\n\"               /*       /   \\      |     5           */\n    \"0  1   -1   -1\\n\"               /*      4     \\     |    / \\          */\n    \"0  2   -1   -1\\n\"               /*     / \\     \\    |   /   3         */\n    \"0  3   -1   -1\\n\"               /*    /   \\     \\   |  /   / \\        */\n    \"0  4   -1   -1\\n\";              /*   0     1     2  | 0   1   2       */\n                                     /* |----------------|---------------| */\n                                     /* 0                1               2 */\n\nconst char *multiple_tree_ex_edges = \"0.75  1.0   3   1,2\\n\"\n                                     \"0.0  0.75   4   0,1\\n\"\n                                     \"0.75  1.0   5   0,3\\n\"\n                                     \"0.0  0.75   6   2,4\\n\";\n\n/* Odd topology -- different roots. */\n\nconst char *odd_tree1_ex_nodes = /*        |       |   5    */\n    \"1  0   -1  -1\\n\"            /*        |   4   |   |    */\n    \"1  0   -1  -1\\n\"            /*    3   |   |   |   |    */\n    \"0  1   -1  -1\\n\"            /*    |   |   |   |   |    */\n    \"0  2   -1   -1\\n\"           /*    2   |   2   |   2    */\n    \"0  3   -1   -1\\n\"           /*   / \\  |  / \\  |  / \\   */\n    \"0  4   -1   -1\\n\";          /*  0   1 | 0   1 | 0   1  */\n                                 /* |------|-------|------| */\n                                 /* 0.0    0.2     0.7   1.0*/\n\nconst char *odd_tree1_ex_edges = \"0.0   1.0 2   0,1\\n\"\n                                 \"0.0   0.2 3   2\\n\"\n                                 \"0.2   0.7 4   2\\n\"\n                                 \"0.7   1.0 4   2\\n\";\n\n/* An example where some samples descend from other samples, and multiple roots */\n\nconst char *multi_root_tree_ex_nodes = \"1  0   -1  -1\\n\" /*  4     5 */\n                                       \"1  0   -1  -1\\n\" /*  |     | */\n                                       \"1  1   -1  -1\\n\" /*  2     3 */\n                                       \"1  1   -1  -1\\n\" /*  |     | */\n                                       \"0  2   -1  -1\\n\" /*  0     1 */\n                                       \"0  2   -1  -1\\n\";\n\nconst char *multi_root_tree_ex_edges = \"0   1   2   0\\n\"\n                                       \"0   1   3   1\\n\"\n                                       \"0   1   4   2\\n\"\n                                       \"0   1   5   3\\n\";\n\n/* Examples of tree sequences where samples have different paths to the same ancestor. */\n\nconst char *multi_path_tree_ex_nodes = /*       5        |             */\n    \"1  0   -1  -1\\n\"                  /*      / \\       |             */\n    \"1  0   -1  -1\\n\"                  /*     /   4      |     4       */\n    \"1  0   -1  -1\\n\"                  /*    /   / \\     |    / \\      */\n    \"0  1   -1  -1\\n\"                  /*   /   /   \\    |   3   \\     */\n    \"0  2   -1  -1\\n\"                  /*  /   /     \\   |  / \\   \\    */\n    \"0  3   -1  -1\\n\";                 /* 0   2       1  | 0   2   1   */\n                                       /*----------------|------------ */\n                                       /*0.0            0.2         1.0*/\n\nconst char *multi_path_tree_ex_edges = \"0.2 1.0 3   0\\n\"\n                                       \"0.2 1.0 3   2\\n\"\n                                       \"0.0 1.0 4   1\\n\"\n                                       \"0.0 0.2 4   2\\n\"\n                                       \"0.2 1.0 4   3\\n\"\n                                       \"0.0 0.2 5   0\\n\"\n                                       \"0.0 0.2 5   4\\n\";\n\nconst char *multi_path_tree_ex2_nodes = \"1  0   -1  -1\\n\"\n                                        \"1  0   -1  -1\\n\"\n                                        \"0  1   -1  -1\\n\"\n                                        \"0  2   -1  -1\\n\"\n                                        \"0  3   -1  -1\\n\";\n\nconst char *multi_path_tree_ex2_edges = \"0.6 1.0 2   1\\n\"\n                                        \"0.0 1.0 3   0\\n\"\n                                        \"0.0 0.6 4   1\\n\"\n                                        \"0.6 1.0 4   2\\n\"\n                                        \"0.0 1.0 4   3\\n\";\n\n/* An example of a tree sequence with internally sampled nodes. */\n\n/*\n1.20┊         ┊     8   ┊         ┊\n    ┊         ┊   ┏━┻━┓ ┊         ┊\n1.00┊   7     ┊   ┃   ┃ ┊         ┊\n    ┊ ┏━┻━┓   ┊   ┃   ┃ ┊         ┊\n0.70┊ ┃   ┃   ┊   ┃   ┃ ┊   6     ┊\n    ┊ ┃   ┃   ┊   ┃   ┃ ┊ ┏━┻━┓   ┊\n0.50┊ ┃   5   ┊   5   ┃ ┊ ┃   5   ┊\n    ┊ ┃ ┏━┻┓  ┊  ┏┻━┓ ┃ ┊ ┃ ┏━┻┓  ┊\n0.40┊ ┃ ┃  4  ┊  4  ┃ ┃ ┊ ┃ ┃  4  ┊\n    ┊ ┃ ┃ ┏┻┓ ┊ ┏┻┓ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊\n0.20┊ ┃ ┃ ┃ 3 ┊ ┃ ┃ ┃ 3 ┊ ┃ ┃ ┃ 3 ┊\n    ┊ ┃ ┃ ┃   ┊ ┃ ┃ ┃   ┊ ┃ ┃ ┃   ┊\n0.10┊ ┃ 1 2   ┊ ┃ 2 1   ┊ ┃ 1 2   ┊\n    ┊ ┃       ┊ ┃       ┊ ┃       ┊\n0.00┊ 0       ┊ 0       ┊ 0       ┊\n  0.00      2.00      8.00      10.00\n*/\n\nconst char *internal_sample_ex_nodes = \"1  0.0   0   -1\\n\"\n                                       \"1  0.1   0   -1\\n\"\n                                       \"1  0.1   0   -1\\n\"\n                                       \"1  0.2   0   -1\\n\"\n                                       \"0  0.4   0   -1\\n\"\n                                       \"1  0.5   0   -1\\n\"\n                                       \"0  0.7   0   -1\\n\"\n                                       \"0  1.0   0   -1\\n\"\n                                       \"0  1.2   0   -1\\n\";\nconst char *internal_sample_ex_edges = \"2 8  4 0\\n\"\n                                       \"0 10 4 2\\n\"\n                                       \"0 2  4 3\\n\"\n                                       \"8 10 4 3\\n\"\n                                       \"0 10 5 1,4\\n\"\n                                       \"8 10 6 0,5\\n\"\n                                       \"0 2  7 0,5\\n\"\n                                       \"2 8  8 3,5\\n\";\n/* We make one mutation for each tree, some above the internal node */\nconst char *internal_sample_ex_sites = \"1.0    0\\n\"\n                                       \"4.5    0\\n\"\n                                       \"8.5    0\\n\";\nconst char *internal_sample_ex_mutations = \"0    2   1\\n\"\n                                           \"1    5   1\\n\"\n                                           \"2    5   1\\n\";\n\n/*** An example of a tree sequence with multiple roots. ***/\n/*\n0.90┊             ┊         11  ┊             ┊\n    ┊             ┊         ┏┻┓ ┊             ┊\n0.80┊         10  ┊         ┃ ┃ ┊             ┊\n    ┊         ┏┻┓ ┊         ┃ ┃ ┊             ┊\n0.40┊     9   ┃ ┃ ┊    9    ┃ ┃ ┊     9       ┊\n    ┊   ┏━┻┓  ┃ ┃ ┊  ┏━┻━┓  ┃ ┃ ┊   ┏━┻━━┓    ┊\n0.30┊   ┃  ┃  ┃ ┃ ┊  ┃   8  ┃ ┃ ┊   ┃    8    ┊\n    ┊   ┃  ┃  ┃ ┃ ┊  ┃  ┏┻┓ ┃ ┃ ┊   ┃   ┏┻┓   ┊\n0.20┊   ┃  7  ┃ ┃ ┊  7  ┃ ┃ ┃ ┃ ┊   7   ┃ ┃   ┊\n    ┊   ┃ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┃ ┃ ┊ ┏━┻┓  ┃ ┃   ┊\n0.10┊   ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃  6  ┃ ┃   ┊\n    ┊   ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┃   ┊\n0.00┊ 5 2 3 4 0 1 ┊ 3 4 1 2 0 5 ┊ 4 0 3 1 2 5 ┊\n    0             4             8            10\n*/\nconst char *multiroot_ex_nodes = \"1  0.0  0  -1\\n\"\n                                 \"1  0.0  0  -1\\n\"\n                                 \"1  0.0  0  -1\\n\"\n                                 \"1  0.0  0  -1\\n\"\n                                 \"1  0.0  0  -1\\n\"\n                                 \"1  0.0  0  -1\\n\"\n                                 \"0  0.1  0  -1\\n\"\n                                 \"0  0.2  0  -1\\n\"\n                                 \"0  0.3  0  -1\\n\"\n                                 \"0  0.4  0  -1\\n\"\n                                 \"0  0.8  0  -1\\n\"\n                                 \"0  0.9  0  -1\\n\";\nconst char *multiroot_ex_edges = \"8  10  6   0,3\\n\"\n                                 \"0  8   7   3\\n\"\n                                 \"0  10  7   4\\n\"\n                                 \"8  10  7   6\\n\"\n                                 \"4  10  8   1,2\\n\"\n                                 \"0  4   9   2\\n\"\n                                 \"0  10  9   7\\n\"\n                                 \"4  10  9   8\\n\"\n                                 \"0  4   10  0,1\\n\"\n                                 \"4  8   11  0,5\\n\";\n\n/* We make one mutation over each root node */\nconst char *multiroot_ex_sites = \"1.0    0\\n\"\n                                 \"2.0    0\\n\"\n                                 \"3.0    0\\n\"\n                                 \"5.0    0\\n\"\n                                 \"6.0    0\\n\"\n                                 \"8.0    0\\n\"\n                                 \"9.0    0\\n\";\nconst char *multiroot_ex_mutations = \"0    10  1\\n\"\n                                     \"1    9   1\\n\"\n                                     \"2    5   1\\n\"\n                                     \"3    11  1\\n\"\n                                     \"4    9   1\\n\"\n                                     \"5    9   1\\n\"\n                                     \"6    5   1\\n\";\n\n/*** An example of a empty tree sequence. ***/\nconst char *empty_ex_nodes = \"1  0.0  0  -1\\n\"\n                             \"1  0.0  0  -1\\n\"\n                             \"1  0.0  0  -1\\n\"\n                             \"1  0.0  0  -1\\n\"\n                             \"1  0.0  0  -1\\n\"\n                             \"1  0.0  0  -1\\n\";\nconst char *empty_ex_edges = \"\";\n\n/*** An example of a tree sequence with missing marginal trees. ***/\n/*\n     |     4     | |     4     |\n     |    / \\    | |    / \\    |\n     |   3   \\   | |   /   3   |\n     |  / \\   \\  | |  /   / \\  |\n     | 0   1   2 | | 0   1   2 |\n   |-|-----------|-|-----------|-|\n   0 1           2 3           4 5\n*/\nconst char *missing_ex_nodes = \"1  0.0  0  -1\\n\"\n                               \"1  0.0  0  -1\\n\"\n                               \"1  0.0  0  -1\\n\"\n                               \"0  1.0  0  -1\\n\"\n                               \"0  2.0  0  -1\\n\";\n\nconst char *missing_ex_edges = \"1.0  2.0  3  0\\n\"\n                               \"1.0  2.0  3  1\\n\"\n                               \"3.0  4.0  3  1\\n\"\n                               \"3.0  4.0  3  2\\n\"\n                               \"3.0  4.0  4  0\\n\"\n                               \"1.0  2.0  4  2\\n\"\n                               \"1.0  2.0  4  3\\n\"\n                               \"3.0  4.0  4  3\\n\";\n\n/* Simple utilities to parse text so we can write declaritive\n * tests. This is not intended as a robust general input mechanism.\n */\n\nvoid\nparse_nodes(const char *text, tsk_node_table_t *node_table)\n{\n    tsk_id_t ret_id;\n    size_t c, k;\n    size_t MAX_LINE = 1024;\n    char line[MAX_LINE];\n    const char *whitespace = \" \\t\";\n    char *p;\n    double time;\n    int flags, population, individual;\n    char *name;\n\n    c = 0;\n    while (text[c] != '\\0') {\n        /* Fill in the line */\n        k = 0;\n        while (text[c] != '\\n' && text[c] != '\\0') {\n            CU_ASSERT_FATAL(k < MAX_LINE - 1);\n            line[k] = text[c];\n            c++;\n            k++;\n        }\n        if (text[c] == '\\n') {\n            c++;\n        }\n        line[k] = '\\0';\n        p = strtok(line, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        flags = atoi(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        time = atof(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        population = atoi(p);\n        p = strtok(NULL, whitespace);\n        if (p == NULL) {\n            individual = -1;\n        } else {\n            individual = atoi(p);\n            p = strtok(NULL, whitespace);\n        }\n        if (p == NULL) {\n            name = \"\";\n        } else {\n            name = p;\n        }\n        ret_id = tsk_node_table_add_row(\n            node_table, flags, time, population, individual, name, strlen(name));\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n}\n\nvoid\nparse_edges(const char *text, tsk_edge_table_t *edge_table)\n{\n    tsk_id_t ret_id;\n    size_t c, k;\n    size_t MAX_LINE = 1024;\n    char line[MAX_LINE], sub_line[MAX_LINE];\n    const char *whitespace = \" \\t\";\n    char *p, *q;\n    double left, right;\n    tsk_id_t parent, child;\n    uint32_t num_children;\n\n    c = 0;\n    while (text[c] != '\\0') {\n        /* Fill in the line */\n        k = 0;\n        while (text[c] != '\\n' && text[c] != '\\0') {\n            CU_ASSERT_FATAL(k < MAX_LINE - 1);\n            line[k] = text[c];\n            c++;\n            k++;\n        }\n        if (text[c] == '\\n') {\n            c++;\n        }\n        line[k] = '\\0';\n        p = strtok(line, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        left = atof(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        right = atof(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        parent = atoi(p);\n        num_children = 0;\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n\n        num_children = 1;\n        q = p;\n        while (*q != '\\0') {\n            if (*q == ',') {\n                num_children++;\n            }\n            q++;\n        }\n        CU_ASSERT_FATAL(num_children >= 1);\n        strncpy(sub_line, p, MAX_LINE);\n        q = strtok(sub_line, \",\");\n        for (k = 0; k < num_children; k++) {\n            CU_ASSERT_FATAL(q != NULL);\n            child = atoi(q);\n            ret_id = tsk_edge_table_add_row(\n                edge_table, left, right, parent, child, NULL, 0);\n            CU_ASSERT_FATAL(ret_id >= 0);\n            q = strtok(NULL, \",\");\n        }\n        CU_ASSERT_FATAL(q == NULL);\n    }\n}\n\nvoid\nparse_migrations(const char *text, tsk_migration_table_t *migration_table)\n{\n    tsk_id_t ret_id;\n    size_t c, k;\n    size_t MAX_LINE = 1024;\n    char line[MAX_LINE];\n    const char *whitespace = \" \\t\";\n    char *p;\n    double left, right, time;\n    int node, source, dest;\n    char *metadata;\n\n    c = 0;\n    while (text[c] != '\\0') {\n        /* Fill in the line */\n        k = 0;\n        while (text[c] != '\\n' && text[c] != '\\0') {\n            CU_ASSERT_FATAL(k < MAX_LINE - 1);\n            line[k] = text[c];\n            c++;\n            k++;\n        }\n        if (text[c] == '\\n') {\n            c++;\n        }\n        line[k] = '\\0';\n        p = strtok(line, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        left = atof(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        right = atof(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        node = atoi(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        source = atoi(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        dest = atoi(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        time = atof(p);\n        p = strtok(NULL, whitespace);\n        if (p == NULL) {\n            metadata = \"\";\n        } else {\n            metadata = p;\n        }\n        ret_id = tsk_migration_table_add_row(migration_table, left, right, node, source,\n            dest, time, metadata, strlen(metadata));\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n}\n\nvoid\nparse_sites(const char *text, tsk_site_table_t *site_table)\n{\n    tsk_id_t ret_id;\n    size_t c, k;\n    size_t MAX_LINE = 1024;\n    char line[MAX_LINE];\n    double position;\n    char ancestral_state[MAX_LINE];\n    const char *whitespace = \" \\t\";\n    char *p;\n\n    c = 0;\n    while (text[c] != '\\0') {\n        /* Fill in the line */\n        k = 0;\n        while (text[c] != '\\n' && text[c] != '\\0') {\n            CU_ASSERT_FATAL(k < MAX_LINE - 1);\n            line[k] = text[c];\n            c++;\n            k++;\n        }\n        if (text[c] == '\\n') {\n            c++;\n        }\n        line[k] = '\\0';\n        p = strtok(line, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        position = atof(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        strncpy(ancestral_state, p, MAX_LINE);\n        ret_id = tsk_site_table_add_row(\n            site_table, position, ancestral_state, strlen(ancestral_state), NULL, 0);\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n}\n\nvoid\nparse_mutations(const char *text, tsk_mutation_table_t *mutation_table)\n{\n    tsk_id_t ret_id;\n    size_t c, k;\n    size_t MAX_LINE = 1024;\n    char line[MAX_LINE];\n    const char *whitespace = \" \\t\";\n    char *p;\n    tsk_id_t node, site, parent;\n    double time;\n    char derived_state[MAX_LINE];\n\n    /* site, node, derived_state, [parent, time] */\n    c = 0;\n    while (text[c] != '\\0') {\n        /* Fill in the line */\n        k = 0;\n        while (text[c] != '\\n' && text[c] != '\\0') {\n            CU_ASSERT_FATAL(k < MAX_LINE - 1);\n            line[k] = text[c];\n            c++;\n            k++;\n        }\n        if (text[c] == '\\n') {\n            c++;\n        }\n        line[k] = '\\0';\n        p = strtok(line, whitespace);\n        site = atoi(p);\n        CU_ASSERT_FATAL(p != NULL);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        node = atoi(p);\n        p = strtok(NULL, whitespace);\n        CU_ASSERT_FATAL(p != NULL);\n        strncpy(derived_state, p, MAX_LINE);\n        parent = TSK_NULL;\n        p = strtok(NULL, whitespace);\n        if (p != NULL) {\n            parent = atoi(p);\n        }\n        time = TSK_UNKNOWN_TIME;\n        p = strtok(NULL, whitespace);\n        if (p != NULL) {\n            time = atof(p);\n        }\n        ret_id = tsk_mutation_table_add_row(mutation_table, site, node, parent, time,\n            derived_state, strlen(derived_state), NULL, 0);\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n}\n\nvoid\nparse_individuals(const char *text, tsk_individual_table_t *individual_table)\n{\n    tsk_id_t ret_id;\n    size_t c, k;\n    size_t MAX_LINE = 1024;\n    char line[MAX_LINE];\n    char sub_line[MAX_LINE];\n    const char *whitespace = \" \\t\";\n    char *p, *q;\n    char *p_cont, *q_cont; // re-entrant pointers for strtok_r\n    double location[MAX_LINE];\n    int location_len;\n    tsk_id_t parents[MAX_LINE];\n    int parents_len;\n    int flags;\n    char *name;\n\n    c = 0;\n    while (text[c] != '\\0') {\n        /* Fill in the line */\n        k = 0;\n        while (text[c] != '\\n' && text[c] != '\\0') {\n            CU_ASSERT_FATAL(k < MAX_LINE - 1);\n            line[k] = text[c];\n            c++;\n            k++;\n        }\n        if (text[c] == '\\n') {\n            c++;\n        }\n        line[k] = '\\0';\n        p = strtok_r(line, whitespace, &p_cont);\n        CU_ASSERT_FATAL(p != NULL);\n        flags = atoi(p);\n\n        p = strtok_r(NULL, whitespace, &p_cont);\n        CU_ASSERT_FATAL(p != NULL);\n        // the locations are comma-separated\n        location_len = 1;\n        q = p;\n        while (*q != '\\0') {\n            if (*q == ',') {\n                location_len++;\n            }\n            q++;\n        }\n        CU_ASSERT_FATAL(location_len >= 1);\n        strncpy(sub_line, p, MAX_LINE);\n        q = strtok_r(sub_line, \",\", &q_cont);\n        for (k = 0; k < location_len; k++) {\n            CU_ASSERT_FATAL(q != NULL);\n            location[k] = atof(q);\n            q = strtok_r(NULL, \",\", &q_cont);\n        }\n        CU_ASSERT_FATAL(q == NULL);\n\n        /* parents and name are optional */\n        p = strtok_r(NULL, whitespace, &p_cont);\n        parents_len = 0;\n        name = \"\";\n        if (p != NULL) {\n            // the parents are comma-separated\n            parents_len = 1;\n            q = p;\n            while (*q != '\\0') {\n                if (*q == ',') {\n                    parents_len++;\n                }\n                q++;\n            }\n            CU_ASSERT_FATAL(parents_len >= 1);\n            strncpy(sub_line, p, MAX_LINE);\n            q = strtok_r(sub_line, \",\", &q_cont);\n            for (k = 0; k < parents_len; k++) {\n                CU_ASSERT_FATAL(q != NULL);\n                parents[k] = atoi(q);\n                q = strtok_r(NULL, \",\", &q_cont);\n            }\n            CU_ASSERT_FATAL(q == NULL);\n            p = strtok_r(NULL, whitespace, &p_cont);\n            if (p != NULL) {\n                name = p;\n            }\n        }\n        ret_id = tsk_individual_table_add_row(individual_table, flags, location,\n            location_len, parents, parents_len, name, strlen(name));\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n}\n\nvoid\ntsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nodes,\n    const char *edges, const char *migrations, const char *sites, const char *mutations,\n    const char *individuals, const char *provenance, tsk_flags_t tc_options)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_table_collection_t tables;\n    tsk_id_t max_population_id;\n    tsk_size_t j;\n    tsk_flags_t ts_flags;\n    bool all_parents_null;\n\n    CU_ASSERT_FATAL(ts != NULL);\n    CU_ASSERT_FATAL(nodes != NULL);\n    CU_ASSERT_FATAL(edges != NULL);\n    /* Not supporting provenance here for now */\n    CU_ASSERT_FATAL(provenance == NULL);\n\n    ret = tsk_table_collection_init(&tables, tc_options);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tables.sequence_length = sequence_length;\n    parse_nodes(nodes, &tables.nodes);\n    parse_edges(edges, &tables.edges);\n    if (sites != NULL) {\n        parse_sites(sites, &tables.sites);\n    }\n    if (mutations != NULL) {\n        parse_mutations(mutations, &tables.mutations);\n    }\n    if (individuals != NULL) {\n        parse_individuals(individuals, &tables.individuals);\n    }\n    if (migrations != NULL) {\n        parse_migrations(migrations, &tables.migrations);\n    }\n    /* We need to add in populations if they are referenced */\n    max_population_id = -1;\n    for (j = 0; j < tables.nodes.num_rows; j++) {\n        max_population_id = TSK_MAX(max_population_id, tables.nodes.population[j]);\n    }\n    for (j = 0; j < tables.migrations.num_rows; j++) {\n        max_population_id = TSK_MAX(max_population_id, tables.migrations.source[j]);\n        max_population_id = TSK_MAX(max_population_id, tables.migrations.dest[j]);\n    }\n    if (max_population_id >= 0) {\n        for (j = 0; j <= (tsk_size_t) max_population_id; j++) {\n            ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);\n            CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        }\n    }\n\n    /* If all mutation.parent are TSK_NULL, use TSK_TS_COMPUTE_MUTATION_PARENTS flag too\n     */\n    ts_flags = TSK_TS_INIT_BUILD_INDEXES;\n    all_parents_null = true;\n    for (j = 0; j < tables.mutations.num_rows; j++) {\n        if (tables.mutations.parent[j] != TSK_NULL) {\n            all_parents_null = false;\n            break;\n        }\n    }\n    if (all_parents_null) {\n        ts_flags |= TSK_TS_INIT_COMPUTE_MUTATION_PARENTS;\n    }\n\n    ret = tsk_treeseq_init(ts, &tables, ts_flags);\n    /* tsk_treeseq_print_state(ts, stdout); */\n    if (ret != 0) {\n        printf(\"\\nret = %s\\n\", tsk_strerror(ret));\n    }\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_table_collection_free(&tables);\n}\n\n/* Returns a tree sequence consisting of a single tree with n samples. This\n * is a full example of the data model, with values included for all fields.\n */\ntsk_treeseq_t *\ncaterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations)\n{\n    int ret;\n    tsk_id_t ret_id;\n    tsk_treeseq_t *ts = tsk_malloc(sizeof(tsk_treeseq_t));\n    tsk_table_collection_t tables;\n    tsk_id_t j, k, last_node, u;\n    int state, m;\n    double position[2];\n    tsk_id_t parents[2] = { -1, -1 };\n    const char *states[] = { \"0\", \"1\" };\n    const char *metadata[] = { \"This\", \"is\", \"some\", \"metadata\" };\n    const int num_metadatas = sizeof(metadata) / sizeof(*metadata);\n    const char *metadata_schema = \"mock metadata schema\";\n    const char *ts_metadata = \"This is a caterpillar tree\";\n    const char *ts_metadata_schema = \"The metadata is an example\";\n    const char *prov_timestamp = \"a timestamp, should be ISO8601\";\n    const char *prov_record = \"Produced by caterpillar_tree for testing purposes\";\n\n    CU_ASSERT_FATAL(ts != NULL);\n    ret = tsk_table_collection_init(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n\n    CU_ASSERT_FATAL(num_sites > 0 && num_mutations < n - 1);\n\n    tables.sequence_length = 1.0;\n\n    tsk_table_collection_set_metadata(&tables, ts_metadata, strlen(ts_metadata));\n    tsk_table_collection_set_metadata_schema(\n        &tables, ts_metadata_schema, strlen(ts_metadata_schema));\n    tsk_reference_sequence_set_metadata_schema(\n        &tables.reference_sequence, ts_metadata_schema, strlen(ts_metadata_schema));\n    tsk_reference_sequence_set_metadata(\n        &tables.reference_sequence, ts_metadata, strlen(ts_metadata));\n    tsk_reference_sequence_set_data(&tables.reference_sequence, \"A\", 1);\n    tsk_reference_sequence_set_url(&tables.reference_sequence, \"B\", 1);\n\n    tsk_population_table_set_metadata_schema(\n        &tables.populations, metadata_schema, strlen(metadata_schema));\n    tsk_individual_table_set_metadata_schema(\n        &tables.individuals, metadata_schema, strlen(metadata_schema));\n    tsk_node_table_set_metadata_schema(\n        &tables.nodes, metadata_schema, strlen(metadata_schema));\n    tsk_edge_table_set_metadata_schema(\n        &tables.edges, metadata_schema, strlen(metadata_schema));\n    tsk_site_table_set_metadata_schema(\n        &tables.sites, metadata_schema, strlen(metadata_schema));\n    tsk_mutation_table_set_metadata_schema(\n        &tables.mutations, metadata_schema, strlen(metadata_schema));\n    tsk_migration_table_set_metadata_schema(\n        &tables.migrations, metadata_schema, strlen(metadata_schema));\n\n    for (j = 0; j < (tsk_id_t) n; j++) {\n        position[0] = j;\n        position[1] = j;\n        m = j % num_metadatas;\n        ret_id = tsk_population_table_add_row(\n            &tables.populations, metadata[m], strlen(metadata[m]));\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        ret_id = tsk_individual_table_add_row(&tables.individuals, 0, position, 2,\n            parents, 2, metadata[m], strlen(metadata[m]));\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n        ret_id = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, j, j,\n            metadata[m], strlen(metadata[m]));\n        CU_ASSERT_EQUAL_FATAL(ret_id, j);\n    }\n    last_node = 0;\n    for (j = 0; j < n - 1; j++) {\n        m = j % num_metadatas;\n        ret_id = tsk_node_table_add_row(\n            &tables.nodes, 0, j + 1, j % n, TSK_NULL, metadata[m], strlen(metadata[m]));\n        CU_ASSERT_FATAL(ret_id >= 0);\n        u = ret_id;\n        ret_id = tsk_edge_table_add_row(\n            &tables.edges, 0, 1, u, last_node, metadata[m], strlen(metadata[m]));\n        CU_ASSERT_FATAL(ret_id >= 0);\n        ret_id = tsk_edge_table_add_row(\n            &tables.edges, 0, 1, u, j + 1, metadata[m], strlen(metadata[m]));\n        CU_ASSERT_FATAL(ret_id >= 0);\n        last_node = u;\n    }\n    for (j = 0; j < num_sites; j++) {\n        m = j % num_metadatas;\n        ret_id = tsk_site_table_add_row(&tables.sites, (j + 1) / (double) n, states[0],\n            strlen(states[0]), metadata[m], strlen(metadata[m]));\n        CU_ASSERT_FATAL(ret_id >= 0);\n        u = 2 * n - 3;\n        state = 0;\n        for (k = 0; k < num_mutations; k++) {\n            m = k % num_metadatas;\n            state = (state + 1) % 2;\n            ret_id = tsk_mutation_table_add_row(&tables.mutations, j, u, TSK_NULL,\n                tables.nodes.time[u], states[state], strlen(states[state]), metadata[m],\n                strlen(metadata[m]));\n            CU_ASSERT_FATAL(ret_id >= 0);\n            u--;\n        }\n    }\n    ret_id = tsk_provenance_table_add_row(&tables.provenances, prov_timestamp,\n        strlen(prov_timestamp), prov_record, strlen(prov_record));\n    CU_ASSERT_EQUAL_FATAL(ret_id, 0);\n\n    /* TODO make these consistent with the caterpillar tree topology. */\n    for (j = 0; j < n - 1; j++) {\n        m = j % num_metadatas;\n        ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 1, j, j, j + 1,\n            j + 1.5, metadata[m], strlen(metadata[m]));\n        CU_ASSERT_FATAL(ret_id >= 0);\n    }\n\n    ret = tsk_table_collection_sort(&tables, 0, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_build_index(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_table_collection_compute_mutation_parents(&tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    ret = tsk_treeseq_init(ts, &tables, 0);\n    CU_ASSERT_EQUAL_FATAL(ret, 0);\n    tsk_table_collection_free(&tables);\n    return ts;\n}\n\nvoid\nunsort_edges(tsk_edge_table_t *edges, size_t start)\n{\n    size_t j, k;\n    size_t n = edges->num_rows - start;\n    tsk_edge_t *buff = tsk_malloc(n * sizeof(tsk_edge_t));\n    CU_ASSERT_FATAL(buff != NULL);\n\n    for (j = 0; j < n; j++) {\n        k = start + j;\n        buff[j].left = edges->left[k];\n        buff[j].right = edges->right[k];\n        buff[j].parent = edges->parent[k];\n        buff[j].child = edges->child[k];\n    }\n    for (j = 0; j < n; j++) {\n        k = start + j;\n        edges->left[k] = buff[n - j - 1].left;\n        edges->right[k] = buff[n - j - 1].right;\n        edges->parent[k] = buff[n - j - 1].parent;\n        edges->child[k] = buff[n - j - 1].child;\n    }\n    free(buff);\n}\n\nstatic int\ntskit_suite_init(void)\n{\n    int fd = -1;\n    static char template[] = \"/tmp/tsk_c_test_XXXXXX\";\n\n    _tmp_file_name = NULL;\n    _devnull = NULL;\n\n    _tmp_file_name = tsk_malloc(sizeof(template));\n    if (_tmp_file_name == NULL) {\n        return CUE_NOMEMORY;\n    }\n    strcpy(_tmp_file_name, template);\n    fd = mkstemp(_tmp_file_name);\n    if (fd == -1) {\n        return CUE_SINIT_FAILED;\n    }\n    close(fd);\n    _devnull = fopen(\"/dev/null\", \"w\");\n    if (_devnull == NULL) {\n        return CUE_SINIT_FAILED;\n    }\n    return CUE_SUCCESS;\n}\n\nstatic int\ntskit_suite_cleanup(void)\n{\n    if (_tmp_file_name != NULL) {\n        unlink(_tmp_file_name);\n        free(_tmp_file_name);\n    }\n    if (_devnull != NULL) {\n        fclose(_devnull);\n    }\n    return CUE_SUCCESS;\n}\n\nstatic void\nhandle_cunit_error(void)\n{\n    fprintf(stderr, \"CUnit error occured: %d: %s\\n\", CU_get_error(), CU_get_error_msg());\n    exit(EXIT_FAILURE);\n}\n\nint\ntest_main(CU_TestInfo *tests, int argc, char **argv)\n{\n    int ret;\n    CU_pTest test;\n    CU_pSuite suite;\n    CU_SuiteInfo suites[] = {\n        {\n            .pName = \"tskit\",\n            .pInitFunc = tskit_suite_init,\n            .pCleanupFunc = tskit_suite_cleanup,\n            .pTests = tests,\n        },\n        CU_SUITE_INFO_NULL,\n    };\n    if (CUE_SUCCESS != CU_initialize_registry()) {\n        handle_cunit_error();\n    }\n    if (CUE_SUCCESS != CU_register_suites(suites)) {\n        handle_cunit_error();\n    }\n    CU_basic_set_mode(CU_BRM_VERBOSE);\n\n    if (argc == 1) {\n        CU_basic_run_tests();\n    } else if (argc == 2) {\n        suite = CU_get_suite_by_name(\"tskit\", CU_get_registry());\n        if (suite == NULL) {\n            printf(\"Suite not found\\n\");\n            return EXIT_FAILURE;\n        }\n        test = CU_get_test_by_name(argv[1], suite);\n        if (test == NULL) {\n            printf(\"Test '%s' not found\\n\", argv[1]);\n            return EXIT_FAILURE;\n        }\n        CU_basic_run_test(suite, test);\n    } else {\n        printf(\"usage: %s <test_name>\\n\", argv[0]);\n        return EXIT_FAILURE;\n    }\n\n    ret = EXIT_SUCCESS;\n    if (CU_get_number_of_tests_failed() != 0) {\n        printf(\"Test failed!\\n\");\n        ret = EXIT_FAILURE;\n    }\n    CU_cleanup_registry();\n    return ret;\n}\n"
  },
  {
    "path": "c/tests/testlib.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef __TESTLIB_H__\n#define __TESTLIB_H__\n\n#define _GNU_SOURCE\n#include <stdio.h>\n#include <unistd.h>\n#include <stdlib.h>\n\n#include <CUnit/Basic.h>\n#include <tskit/trees.h>\n\n/* Global variables used in the test suite */\n\nextern char *_tmp_file_name;\nextern FILE *_devnull;\n\nint test_main(CU_TestInfo *tests, int argc, char **argv);\n\nvoid tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nodes,\n    const char *edges, const char *migrations, const char *sites, const char *mutations,\n    const char *individuals, const char *provenance, tsk_flags_t tc_options);\ntsk_treeseq_t *caterpillar_tree(\n    tsk_size_t num_samples, tsk_size_t num_sites, tsk_size_t num_mutations);\n\nvoid parse_nodes(const char *text, tsk_node_table_t *node_table);\nvoid parse_edges(const char *text, tsk_edge_table_t *edge_table);\nvoid parse_sites(const char *text, tsk_site_table_t *site_table);\nvoid parse_mutations(const char *text, tsk_mutation_table_t *mutation_table);\nvoid parse_individuals(const char *text, tsk_individual_table_t *individual_table);\n\nvoid unsort_edges(tsk_edge_table_t *edges, size_t start);\n\n/* Use a macro so we can get line numbers at roughly the right place */\n#define assert_arrays_almost_equal(len, a, b)                                           \\\n    {                                                                                   \\\n        do {                                                                            \\\n            tsk_size_t _j;                                                              \\\n            for (_j = 0; _j < len; _j++) {                                              \\\n                CU_ASSERT_DOUBLE_EQUAL(a[_j], b[_j], 1e-9);                             \\\n            }                                                                           \\\n        } while (0);                                                                    \\\n    }\n\n#define assert_arrays_equal(len, a, b)                                                  \\\n    {                                                                                   \\\n        do {                                                                            \\\n            tsk_size_t _j;                                                              \\\n            for (_j = 0; _j < len; _j++) {                                              \\\n                CU_ASSERT_EQUAL(a[_j], b[_j]);                                          \\\n            }                                                                           \\\n        } while (0);                                                                    \\\n    }\n\n/* Array equality if the arrays contain NaN values\n   NB: the float cast for NaNs is for mingw, which complains without */\n#define assert_arrays_almost_equal_nan(len, a, b)                                       \\\n    {                                                                                   \\\n        do {                                                                            \\\n            tsk_size_t _j;                                                              \\\n            for (_j = 0; _j < len; _j++) {                                              \\\n                if (isnan((float) a[_j]) || isnan((float) b[_j])) {                     \\\n                    CU_ASSERT_EQUAL_FATAL(isnan((float) a[_j]), isnan((float) b[_j]));  \\\n                } else {                                                                \\\n                    CU_ASSERT_DOUBLE_EQUAL(a[_j], b[_j], 1e-9);                         \\\n                }                                                                       \\\n            }                                                                           \\\n        } while (0);                                                                    \\\n    }\n\nextern const char *single_tree_ex_nodes;\nextern const char *single_tree_ex_edges;\nextern const char *single_tree_ex_sites;\nextern const char *single_tree_ex_mutations;\n\nextern const char *multiple_tree_ex_nodes;\nextern const char *multiple_tree_ex_edges;\n\nextern const char *odd_tree1_ex_nodes;\nextern const char *odd_tree1_ex_edges;\n\nextern const char *multi_root_tree_ex_nodes;\nextern const char *multi_root_tree_ex_edges;\n\nextern const char *multi_path_tree_ex_nodes;\nextern const char *multi_path_tree_ex_edges;\n\nextern const char *nonbinary_ex_nodes;\nextern const char *nonbinary_ex_edges;\nextern const char *nonbinary_ex_sites;\nextern const char *nonbinary_ex_mutations;\n\nextern const char *unary_ex_nodes;\nextern const char *unary_ex_edges;\nextern const char *unary_ex_sites;\nextern const char *unary_ex_mutations;\n\nextern const char *internal_sample_ex_nodes;\nextern const char *internal_sample_ex_edges;\nextern const char *internal_sample_ex_sites;\nextern const char *internal_sample_ex_mutations;\n\nextern const char *multiroot_ex_nodes;\nextern const char *multiroot_ex_edges;\nextern const char *multiroot_ex_sites;\nextern const char *multiroot_ex_mutations;\n\nextern const char *empty_ex_nodes;\nextern const char *empty_ex_edges;\n\nextern const char *paper_ex_nodes;\nextern const char *paper_ex_edges;\nextern const char *paper_ex_sites;\nextern const char *paper_ex_mutations;\nextern const char *paper_ex_individuals;\n\nextern const char *missing_ex_nodes;\nextern const char *missing_ex_edges;\n\n#endif\n"
  },
  {
    "path": "c/tskit/convert.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2018-2025 Tskit Developers\n * Copyright (c) 2015-2017 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <string.h>\n#include <stdbool.h>\n#include <stdlib.h>\n#include <math.h>\n\n#include <tskit/convert.h>\n\n/* ======================================================== *\n * Newick output.\n * ======================================================== */\n\n/* This infrastructure is left-over from an earlier more complex version\n * of this algorithm that worked over a tree sequence and cached the newick\n * subtrees, updating according to diffs. It's unclear whether this complexity\n * was of any real-world use, since newick output for large trees is pretty\n * pointless. */\n\ntypedef struct {\n    unsigned int precision;\n    tsk_flags_t options;\n    char *newick;\n    tsk_id_t *traversal_stack;\n    const tsk_tree_t *tree;\n} tsk_newick_converter_t;\n\nstatic int\ntsk_newick_converter_run(\n    tsk_newick_converter_t *self, tsk_id_t root, size_t buffer_size, char *buffer)\n{\n    int ret = TSK_ERR_GENERIC;\n    const tsk_tree_t *tree = self->tree;\n    tsk_id_t *stack = self->traversal_stack;\n    const double *time = self->tree->tree_sequence->tables->nodes.time;\n    const tsk_flags_t *flags = self->tree->tree_sequence->tables->nodes.flags;\n    int stack_top = 0;\n    int label;\n    size_t s = 0;\n    int r;\n    tsk_id_t u, v, w, root_parent;\n    double branch_length;\n    bool ms_labels = self->options & TSK_NEWICK_LEGACY_MS_LABELS;\n    const char *label_format = ms_labels ? \"%d\" : \"n%d\";\n\n    if (root < 0 || root >= (tsk_id_t) self->tree->num_nodes) {\n        ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n        goto out;\n    }\n    if (buffer == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    root_parent = tree->parent[root];\n    stack[0] = root;\n    u = root_parent;\n    while (stack_top >= 0) {\n        v = stack[stack_top];\n        if (tree->left_child[v] != TSK_NULL && v != u) {\n            if (s >= buffer_size) {\n                ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);\n                goto out;\n            }\n            buffer[s] = '(';\n            s++;\n            for (w = tree->right_child[v]; w != TSK_NULL; w = tree->left_sib[w]) {\n                stack_top++;\n                stack[stack_top] = w;\n            }\n        } else {\n            u = tree->parent[v];\n            stack_top--;\n            label = -1;\n            if (ms_labels) {\n                if (tree->left_child[v] == TSK_NULL) {\n                    label = (int) v + 1;\n                }\n            } else if (flags[v] & TSK_NODE_IS_SAMPLE) {\n                label = (int) v;\n            }\n            if (label != -1) {\n                if (s >= buffer_size) {\n                    ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);\n                    goto out;\n                }\n                r = snprintf(buffer + s, buffer_size - s, label_format, label);\n                if (r < 0) {\n                    ret = tsk_trace_error(TSK_ERR_IO);\n                    goto out;\n                }\n                s += (size_t) r;\n                if (s >= buffer_size) {\n                    ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);\n                    goto out;\n                }\n            }\n            if (u != root_parent) {\n                branch_length = (time[u] - time[v]);\n                r = snprintf(buffer + s, buffer_size - s, \":%.*f\", (int) self->precision,\n                    branch_length);\n                if (r < 0) {\n                    ret = tsk_trace_error(TSK_ERR_IO);\n                    goto out;\n                }\n                s += (size_t) r;\n                if (s >= buffer_size) {\n                    ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);\n                    goto out;\n                }\n                if (v == tree->right_child[u]) {\n                    buffer[s] = ')';\n                } else {\n                    buffer[s] = ',';\n                }\n                s++;\n            }\n        }\n    }\n    if ((s + 1) >= buffer_size) {\n        ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);\n        goto out;\n    }\n    buffer[s] = ';';\n    buffer[s + 1] = '\\0';\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ntsk_newick_converter_init(tsk_newick_converter_t *self, const tsk_tree_t *tree,\n    unsigned int precision, tsk_flags_t options)\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_newick_converter_t));\n    self->precision = precision;\n    self->options = options;\n    self->tree = tree;\n    self->traversal_stack\n        = tsk_malloc(tsk_tree_get_size_bound(tree) * sizeof(*self->traversal_stack));\n    if (self->traversal_stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_newick_converter_free(tsk_newick_converter_t *self)\n{\n    tsk_safe_free(self->traversal_stack);\n    return 0;\n}\n\nint\ntsk_convert_newick(const tsk_tree_t *tree, tsk_id_t root, unsigned int precision,\n    tsk_flags_t options, size_t buffer_size, char *buffer)\n{\n    int ret = 0;\n    tsk_newick_converter_t nc;\n\n    ret = tsk_newick_converter_init(&nc, tree, precision, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_newick_converter_run(&nc, root, buffer_size, buffer);\nout:\n    tsk_newick_converter_free(&nc);\n    return ret;\n}\n"
  },
  {
    "path": "c/tskit/convert.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2018-2021 Tskit Developers\n * Copyright (c) 2015-2017 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef TSK_CONVERT_H\n#define TSK_CONVERT_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <tskit/trees.h>\n\n#define TSK_NEWICK_LEGACY_MS_LABELS (1 << 0)\n\nint tsk_convert_newick(const tsk_tree_t *tree, tsk_id_t root, unsigned int precision,\n    tsk_flags_t options, size_t buffer_size, char *buffer);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "c/tskit/core.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <errno.h>\n#include <math.h>\n\n#include <kastore.h>\n#include <tskit/core.h>\n\n#define UUID_NUM_BYTES 16\n\n#if defined(_WIN32)\n\n#include <windows.h>\n#include <wincrypt.h>\n\nstatic int TSK_WARN_UNUSED\nget_random_bytes(uint8_t *buf)\n{\n    /* Based on CPython's code in bootstrap_hash.c */\n    int ret = 0;\n    HCRYPTPROV hCryptProv = (HCRYPTPROV) NULL;\n\n    if (!CryptAcquireContext(\n            &hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\n    if (!CryptGenRandom(hCryptProv, (DWORD) UUID_NUM_BYTES, buf)) {\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\n    if (!CryptReleaseContext(hCryptProv, 0)) {\n        hCryptProv = (HCRYPTPROV) NULL;\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\n    hCryptProv = (HCRYPTPROV) NULL;\nout:\n    if (hCryptProv != (HCRYPTPROV) NULL) {\n        CryptReleaseContext(hCryptProv, 0);\n    }\n    return ret;\n}\n\n#else\n\n/* Assuming the existance of /dev/urandom on Unix platforms */\nstatic int TSK_WARN_UNUSED\nget_random_bytes(uint8_t *buf)\n{\n    int ret = 0;\n    FILE *f = fopen(\"/dev/urandom\", \"r\");\n\n    if (f == NULL) {\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\n    if (fread(buf, UUID_NUM_BYTES, 1, f) != 1) {\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\n    if (fclose(f) != 0) {\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\nout:\n    return ret;\n}\n\n#endif\n\n/* Generate a new UUID4 using a system-generated source of randomness.\n * Note that this function writes a NULL terminator to the end of this\n * string, so that the total length of the buffer must be 37 bytes.\n */\nint\ntsk_generate_uuid(char *dest, int TSK_UNUSED(flags))\n{\n    int ret = 0;\n    uint8_t buf[UUID_NUM_BYTES];\n    const char *pattern\n        = \"%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\";\n\n    ret = get_random_bytes(buf);\n    if (ret != 0) {\n        goto out;\n    }\n    if (snprintf(dest, TSK_UUID_SIZE + 1, pattern, buf[0], buf[1], buf[2], buf[3],\n            buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12],\n            buf[13], buf[14], buf[15])\n        < 0) {\n        ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic const char *\ntsk_strerror_internal(int err)\n{\n    const char *ret = \"Unknown error\";\n\n    switch (err) {\n        case 0:\n            ret = \"Normal exit condition. This is not an error!\";\n            break;\n\n        /* General errors */\n        case TSK_ERR_GENERIC:\n            ret = \"Generic error; please file a bug report. (TSK_ERR_GENERIC)\";\n            break;\n        case TSK_ERR_NO_MEMORY:\n            ret = \"Out of memory. (TSK_ERR_NO_MEMORY)\";\n            break;\n        case TSK_ERR_IO:\n            if (errno != 0) {\n                ret = strerror(errno);\n            } else {\n                ret = \"Unspecified IO error\";\n            }\n            break;\n        case TSK_ERR_BAD_PARAM_VALUE:\n            ret = \"Bad parameter value provided. (TSK_ERR_BAD_PARAM_VALUE)\";\n            break;\n        case TSK_ERR_BUFFER_OVERFLOW:\n            ret = \"Supplied buffer is too small. (TSK_ERR_BUFFER_OVERFLOW)\";\n            break;\n        case TSK_ERR_UNSUPPORTED_OPERATION:\n            ret = \"Operation cannot be performed in current configuration. \"\n                  \"(TSK_ERR_UNSUPPORTED_OPERATION)\";\n            break;\n        case TSK_ERR_GENERATE_UUID:\n            ret = \"Error generating UUID. (TSK_ERR_GENERATE_UUID)\";\n            break;\n        case TSK_ERR_EOF:\n            ret = \"End of file. (TSK_ERR_EOF)\";\n            break;\n\n        /* File format errors */\n        case TSK_ERR_FILE_FORMAT:\n            ret = \"File format error. (TSK_ERR_FILE_FORMAT)\";\n            break;\n        case TSK_ERR_FILE_VERSION_TOO_OLD:\n            ret = \"tskit file version too old. Please upgrade using the \"\n                  \"'tskit upgrade' command from tskit version<0.6.2. \"\n                  \"(TSK_ERR_FILE_VERSION_TOO_OLD)\";\n            break;\n        case TSK_ERR_FILE_VERSION_TOO_NEW:\n            ret = \"tskit file version is too new for this instance. \"\n                  \"Please upgrade tskit to the latest version. \"\n                  \"(TSK_ERR_FILE_VERSION_TOO_NEW)\";\n            break;\n        case TSK_ERR_REQUIRED_COL_NOT_FOUND:\n            ret = \"A required column was not found in the file. \"\n                  \"(TSK_ERR_REQUIRED_COL_NOT_FOUND)\";\n            break;\n        case TSK_ERR_BOTH_COLUMNS_REQUIRED:\n            ret = \"Both columns in a related pair must be provided. \"\n                  \"(TSK_ERR_BOTH_COLUMNS_REQUIRED)\";\n            break;\n        case TSK_ERR_BAD_COLUMN_TYPE:\n            ret = \"An incompatible type for a column was found in the file. \"\n                  \"(TSK_ERR_BAD_COLUMN_TYPE)\";\n            break;\n\n        /* Out of bounds errors */\n        case TSK_ERR_BAD_OFFSET:\n            ret = \"Bad offset provided in input array. (TSK_ERR_BAD_OFFSET)\";\n            break;\n        case TSK_ERR_NODE_OUT_OF_BOUNDS:\n            ret = \"Node out of bounds. (TSK_ERR_NODE_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_EDGE_OUT_OF_BOUNDS:\n            ret = \"Edge out of bounds. (TSK_ERR_EDGE_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_POPULATION_OUT_OF_BOUNDS:\n            ret = \"Population out of bounds. (TSK_ERR_POPULATION_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_SITE_OUT_OF_BOUNDS:\n            ret = \"Site out of bounds. (TSK_ERR_SITE_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_MUTATION_OUT_OF_BOUNDS:\n            ret = \"Mutation out of bounds. (TSK_ERR_MUTATION_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_MIGRATION_OUT_OF_BOUNDS:\n            ret = \"Migration out of bounds. (TSK_ERR_MIGRATION_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS:\n            ret = \"Individual out of bounds. (TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_PROVENANCE_OUT_OF_BOUNDS:\n            ret = \"Provenance out of bounds. (TSK_ERR_PROVENANCE_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_TIME_NONFINITE:\n            ret = \"Times must be finite. (TSK_ERR_TIME_NONFINITE)\";\n            break;\n        case TSK_ERR_GENOME_COORDS_NONFINITE:\n            ret = \"Genome coordinates must be finite numbers. \"\n                  \"(TSK_ERR_GENOME_COORDS_NONFINITE)\";\n            break;\n        case TSK_ERR_SEEK_OUT_OF_BOUNDS:\n            ret = \"Tree seek position out of bounds. (TSK_ERR_SEEK_OUT_OF_BOUNDS)\";\n            break;\n        case TSK_ERR_KEEP_ROWS_MAP_TO_DELETED:\n            ret = \"One of the kept rows in the table refers to a deleted row. \"\n                  \"(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED)\";\n            break;\n        case TSK_ERR_POSITION_OUT_OF_BOUNDS:\n            ret = \"Position out of bounds. (TSK_ERR_POSITION_OUT_OF_BOUNDS)\";\n            break;\n\n        /* Edge errors */\n        case TSK_ERR_NULL_PARENT:\n            ret = \"Edge parent is null. (TSK_ERR_NULL_PARENT)\";\n            break;\n        case TSK_ERR_NULL_CHILD:\n            ret = \"Edge child is null. (TSK_ERR_NULL_CHILD)\";\n            break;\n        case TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME:\n            ret = \"Edges must be listed in (time[parent], child, left) order;\"\n                  \" time[parent] order violated. (TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME)\";\n            break;\n        case TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS:\n            ret = \"All edges for a given parent must be contiguous. \"\n                  \"(TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS)\";\n            break;\n        case TSK_ERR_EDGES_NOT_SORTED_CHILD:\n            ret = \"Edges must be listed in (time[parent], child, left) order;\"\n                  \" child order violated. (TSK_ERR_EDGES_NOT_SORTED_CHILD)\";\n            break;\n        case TSK_ERR_EDGES_NOT_SORTED_LEFT:\n            ret = \"Edges must be listed in (time[parent], child, left) order;\"\n                  \" left order violated. (TSK_ERR_EDGES_NOT_SORTED_LEFT)\";\n            break;\n        case TSK_ERR_BAD_NODE_TIME_ORDERING:\n            ret = \"time[parent] must be greater than time[child]. \"\n                  \"(TSK_ERR_BAD_NODE_TIME_ORDERING)\";\n            break;\n        case TSK_ERR_BAD_EDGE_INTERVAL:\n            ret = \"Bad edge interval where right <= left. (TSK_ERR_BAD_EDGE_INTERVAL)\";\n            break;\n        case TSK_ERR_DUPLICATE_EDGES:\n            ret = \"Duplicate edges provided. (TSK_ERR_DUPLICATE_EDGES)\";\n            break;\n        case TSK_ERR_RIGHT_GREATER_SEQ_LENGTH:\n            ret = \"Right coordinate > sequence length. \"\n                  \"(TSK_ERR_RIGHT_GREATER_SEQ_LENGTH)\";\n            break;\n        case TSK_ERR_LEFT_LESS_ZERO:\n            ret = \"Left coordinate must be >= 0. (TSK_ERR_LEFT_LESS_ZERO)\";\n            break;\n        case TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN:\n            ret = \"Bad edges: contradictory children for a given parent over \"\n                  \"an interval, or indexes need to be rebuilt. \"\n                  \"(TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN)\";\n            break;\n        case TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA:\n            ret = \"Can't squash, flush, simplify or link ancestors with edges that have \"\n                  \"non-empty metadata. Removing the metadata from the edges will allow \"\n                  \"these operations to proceed. For example using \"\n                  \"tables.edges.drop_metadata() in the tskit Python API. \"\n                  \"(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA)\";\n            break;\n\n        /* Site errors */\n        case TSK_ERR_UNSORTED_SITES:\n            ret = \"Sites must be provided in strictly increasing position order. \"\n                  \"(TSK_ERR_UNSORTED_SITES)\";\n            break;\n        case TSK_ERR_DUPLICATE_SITE_POSITION:\n            ret = \"Duplicate site positions. (TSK_ERR_DUPLICATE_SITE_POSITION)\";\n            break;\n        case TSK_ERR_BAD_SITE_POSITION:\n            ret = \"Site positions must be between 0 and sequence_length. \"\n                  \"(TSK_ERR_BAD_SITE_POSITION)\";\n            break;\n\n        /* Mutation errors */\n        case TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE:\n            ret = \"Specified parent mutation is at a different site. \"\n                  \"(TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE)\";\n            break;\n        case TSK_ERR_MUTATION_PARENT_EQUAL:\n            ret = \"Parent mutation refers to itself. (TSK_ERR_MUTATION_PARENT_EQUAL)\";\n            break;\n        case TSK_ERR_MUTATION_PARENT_AFTER_CHILD:\n            ret = \"Parent mutation ID must be < current ID. \"\n                  \"(TSK_ERR_MUTATION_PARENT_AFTER_CHILD)\";\n            break;\n        case TSK_ERR_MUTATION_PARENT_INCONSISTENT:\n            ret = \"Mutation parent references form a loop. \"\n                  \"(TSK_ERR_MUTATION_PARENT_INCONSISTENT)\";\n            break;\n        case TSK_ERR_UNSORTED_MUTATIONS:\n            ret = \"Mutations must be provided in non-decreasing site order and \"\n                  \"non-increasing time order within each site. \"\n                  \"(TSK_ERR_UNSORTED_MUTATIONS)\";\n            break;\n        case TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE:\n            ret = \"A mutation's time must be >= the node time, or be marked as \"\n                  \"'unknown'. (TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE)\";\n            break;\n        case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION:\n            ret = \"A mutation's time must be <= the parent mutation time (if known), or \"\n                  \"be marked as 'unknown'. \"\n                  \"(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION)\";\n            break;\n        case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE:\n            ret = \"A mutation's time must be < the parent node of the edge on which it \"\n                  \"occurs, or be marked as 'unknown'. \"\n                  \"(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE)\";\n            break;\n        case TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN:\n            ret = \"Mutation times must either be all marked 'unknown', or all be known \"\n                  \"values for any single site. \"\n                  \"(TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN)\";\n            break;\n        case TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME:\n            ret = \"Some mutation times are marked 'unknown' for a method that requires \"\n                  \"no unknown times. (Use compute_mutation_times to add times?) \"\n                  \"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)\";\n            break;\n\n        case TSK_ERR_BAD_MUTATION_PARENT:\n            ret = \"A mutation's parent is not consistent with the topology of the tree. \"\n                  \"Use compute_mutation_parents to set the parents correctly.\"\n                  \"(TSK_ERR_BAD_MUTATION_PARENT)\";\n            break;\n\n        /* Migration errors */\n        case TSK_ERR_UNSORTED_MIGRATIONS:\n            ret = \"Migrations must be sorted by time. (TSK_ERR_UNSORTED_MIGRATIONS)\";\n            break;\n\n        /* Sample errors */\n        case TSK_ERR_DUPLICATE_SAMPLE:\n            ret = \"Duplicate sample value. (TSK_ERR_DUPLICATE_SAMPLE)\";\n            break;\n        case TSK_ERR_BAD_SAMPLES:\n            ret = \"The nodes provided are not samples. (TSK_ERR_BAD_SAMPLES)\";\n            break;\n\n        /* Table errors */\n        case TSK_ERR_BAD_TABLE_POSITION:\n            ret = \"Bad table position provided to truncate/reset. \"\n                  \"(TSK_ERR_BAD_TABLE_POSITION)\";\n            break;\n        case TSK_ERR_BAD_SEQUENCE_LENGTH:\n            ret = \"Sequence length must be > 0. (TSK_ERR_BAD_SEQUENCE_LENGTH)\";\n            break;\n        case TSK_ERR_TABLES_NOT_INDEXED:\n            ret = \"Table collection must be indexed. (TSK_ERR_TABLES_NOT_INDEXED)\";\n            break;\n        case TSK_ERR_TABLES_BAD_INDEXES:\n            ret = \"Table collection indexes inconsistent: do they need to be rebuilt? \"\n                  \"(TSK_ERR_TABLES_BAD_INDEXES)\";\n            break;\n        case TSK_ERR_TABLE_OVERFLOW:\n            ret = \"Table too large; cannot allocate more than 2**31 rows. This error \"\n                  \"is often caused by a lack of simplification when simulating. \"\n                  \"(TSK_ERR_TABLE_OVERFLOW)\";\n            break;\n        case TSK_ERR_COLUMN_OVERFLOW:\n            ret = \"Table column too large; cannot be more than 2**64 bytes. \"\n                  \"(TSK_ERR_COLUMN_OVERFLOW)\";\n            break;\n        case TSK_ERR_TREE_OVERFLOW:\n            ret = \"Too many trees; cannot be more than 2**31. (TSK_ERR_TREE_OVERFLOW)\";\n            break;\n        case TSK_ERR_METADATA_DISABLED:\n            ret = \"Metadata is disabled for this table, so cannot be set. \"\n                  \"(TSK_ERR_METADATA_DISABLED)\";\n            break;\n\n        /* Limitations */\n        case TSK_ERR_ONLY_INFINITE_SITES:\n            ret = \"Only infinite sites mutations are supported for this operation, \"\n                  \"i.e. at most a single mutation per site. \"\n                  \"(TSK_ERR_ONLY_INFINITE_SITES)\";\n            break;\n        case TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED:\n            ret = \"Migrations not currently supported by simplify. \"\n                  \"(TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED)\";\n            break;\n        case TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED:\n            ret = \"Migrations not currently supported by sort. \"\n                  \"(TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED)\";\n            break;\n        case TSK_ERR_SORT_OFFSET_NOT_SUPPORTED:\n            ret = \"Sort offsets for sites and mutations must be either 0 \"\n                  \"or the length of the respective tables. Intermediate values \"\n                  \"are not supported. (TSK_ERR_SORT_OFFSET_NOT_SUPPORTED)\";\n            break;\n        case TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED:\n            ret = \"Only binary mutations are supported for this operation. \"\n                  \"(TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED)\";\n            break;\n        case TSK_ERR_MIGRATIONS_NOT_SUPPORTED:\n            ret = \"Migrations not currently supported by this operation. \"\n                  \"(TSK_ERR_MIGRATIONS_NOT_SUPPORTED)\";\n            break;\n        case TSK_ERR_CANNOT_EXTEND_FROM_SELF:\n            ret = \"Tables can only be extended using rows from a different table. \"\n                  \"(TSK_ERR_CANNOT_EXTEND_FROM_SELF)\";\n            break;\n        case TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED:\n            ret = \"Silent mutations not supported by this operation. \"\n                  \"(TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED)\";\n            break;\n        case TSK_ERR_VARIANT_CANT_DECODE_COPY:\n            ret = \"Can't decode a copy of a variant. (TSK_ERR_VARIANT_CANT_DECODE_COPY)\";\n            break;\n        case TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA:\n            ret = \"A tree sequence can't take ownership of tables with \"\n                  \"TSK_NO_EDGE_METADATA. (TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA)\";\n            break;\n        case TSK_ERR_UNDEFINED_NONBINARY:\n            ret = \"Operation undefined for nonbinary trees. \"\n                  \"(TSK_ERR_UNDEFINED_NONBINARY)\";\n            break;\n        case TSK_ERR_UNDEFINED_MULTIROOT:\n            ret = \"Operation undefined for trees that are not singly-rooted. \"\n                  \"(TSK_ERR_UNDEFINED_MULTIROOT)\";\n            break;\n\n        /* Stats errors */\n        case TSK_ERR_BAD_NUM_WINDOWS:\n            ret = \"Must have at least one window, [0, L]. (TSK_ERR_BAD_NUM_WINDOWS)\";\n            break;\n        case TSK_ERR_BAD_WINDOWS:\n            ret = \"Windows must be increasing list [0, ..., L]. (TSK_ERR_BAD_WINDOWS)\";\n            break;\n        case TSK_ERR_MULTIPLE_STAT_MODES:\n            ret = \"Cannot specify more than one stats mode. \"\n                  \"(TSK_ERR_MULTIPLE_STAT_MODES)\";\n            break;\n        case TSK_ERR_BAD_STATE_DIMS:\n            ret = \"Must have state dimension >= 1. (TSK_ERR_BAD_STATE_DIMS)\";\n            break;\n        case TSK_ERR_BAD_RESULT_DIMS:\n            ret = \"Must have result dimension >= 1. (TSK_ERR_BAD_RESULT_DIMS)\";\n            break;\n        case TSK_ERR_INSUFFICIENT_SAMPLE_SETS:\n            ret = \"Insufficient sample sets provided. \"\n                  \"(TSK_ERR_INSUFFICIENT_SAMPLE_SETS)\";\n            break;\n        case TSK_ERR_INSUFFICIENT_INDEX_TUPLES:\n            ret = \"Insufficient sample set index tuples provided. \"\n                  \"(TSK_ERR_INSUFFICIENT_INDEX_TUPLES)\";\n            break;\n        case TSK_ERR_BAD_SAMPLE_SET_INDEX:\n            ret = \"Sample set index out of bounds. (TSK_ERR_BAD_SAMPLE_SET_INDEX)\";\n            break;\n        case TSK_ERR_EMPTY_SAMPLE_SET:\n            ret = \"Samples cannot be empty. (TSK_ERR_EMPTY_SAMPLE_SET)\";\n            break;\n        case TSK_ERR_UNSUPPORTED_STAT_MODE:\n            ret = \"Requested statistics mode not supported for this method. \"\n                  \"(TSK_ERR_UNSUPPORTED_STAT_MODE)\";\n            break;\n        case TSK_ERR_TIME_UNCALIBRATED:\n            ret = \"Statistics using branch lengths cannot be calculated when time_units \"\n                  \"is 'uncalibrated'. (TSK_ERR_TIME_UNCALIBRATED)\";\n            break;\n        case TSK_ERR_STAT_POLARISED_UNSUPPORTED:\n            ret = \"The TSK_STAT_POLARISED option is not supported by this statistic. \"\n                  \"(TSK_ERR_STAT_POLARISED_UNSUPPORTED)\";\n            break;\n        case TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED:\n            ret = \"The TSK_STAT_SPAN_NORMALISE option is not supported by this \"\n                  \"statistic. \"\n                  \"(TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED)\";\n            break;\n        case TSK_ERR_INSUFFICIENT_WEIGHTS:\n            ret = \"Insufficient weights provided (at least 1 required). \"\n                  \"(TSK_ERR_INSUFFICIENT_WEIGHTS)\";\n            break;\n\n        /* Pair coalescence errors */\n        case TSK_ERR_BAD_NODE_BIN_MAP:\n            ret = \"Node-to-bin map contains values less than TSK_NULL. \"\n                  \"(TSK_ERR_BAD_NODE_BIN_MAP)\";\n            break;\n        case TSK_ERR_BAD_NODE_BIN_MAP_DIM:\n            ret = \"Maximum index in node-to-bin map is greater than the \"\n                  \"output dimension. (TSK_ERR_BAD_NODE_BIN_MAP_DIM)\";\n            break;\n        case TSK_ERR_BAD_QUANTILES:\n            ret = \"Quantiles must be between 0 and 1 (inclusive) \"\n                  \"and strictly increasing. (TSK_ERR_BAD_QUANTILES)\";\n            break;\n        case TSK_ERR_UNSORTED_TIMES:\n            ret = \"Times must be strictly increasing. (TSK_ERR_UNSORTED_TIMES)\";\n            break;\n        case TSK_ERR_BAD_TIME_WINDOWS_DIM:\n            ret = \"Must have at least one time window. (TSK_ERR_BAD_TIME_WINDOWS_DIM)\";\n            break;\n        case TSK_ERR_BAD_SAMPLE_PAIR_TIMES:\n            ret = \"All sample times must be equal to the start of first time window. \"\n                  \"(TSK_ERR_BAD_SAMPLE_PAIR_TIMES)\";\n            break;\n        case TSK_ERR_BAD_TIME_WINDOWS:\n            ret = \"Time windows must start at zero and be strictly increasing. \"\n                  \"(TSK_ERR_BAD_TIME_WINDOWS)\";\n            break;\n        case TSK_ERR_BAD_TIME_WINDOWS_END:\n            ret = \"Time windows must end at infinity for this method. \"\n                  \"(TSK_ERR_BAD_TIME_WINDOWS_END)\";\n            break;\n        case TSK_ERR_BAD_NODE_TIME_WINDOW:\n            ret = \"Node time does not fall within assigned time window. \"\n                  \"(TSK_ERR_BAD_NODE_TIME_WINDOW)\";\n            break;\n\n        /* Two locus errors */\n        case TSK_ERR_STAT_UNSORTED_POSITIONS:\n            ret = \"The provided positions are not sorted in strictly increasing \"\n                  \"order. (TSK_ERR_STAT_UNSORTED_POSITIONS)\";\n            break;\n        case TSK_ERR_STAT_DUPLICATE_POSITIONS:\n            ret = \"The provided positions contain duplicates. \"\n                  \"(TSK_ERR_STAT_DUPLICATE_POSITIONS)\";\n            break;\n        case TSK_ERR_STAT_UNSORTED_SITES:\n            ret = \"The provided sites are not sorted in strictly increasing position \"\n                  \"order. (TSK_ERR_STAT_UNSORTED_SITES)\";\n            break;\n        case TSK_ERR_STAT_DUPLICATE_SITES:\n            ret = \"The provided sites contain duplicated entries. \"\n                  \"(TSK_ERR_STAT_DUPLICATE_SITES)\";\n            break;\n\n        /* Mutation mapping errors */\n        case TSK_ERR_GENOTYPES_ALL_MISSING:\n            ret = \"Must provide at least one non-missing genotype. \"\n                  \"(TSK_ERR_GENOTYPES_ALL_MISSING)\";\n            break;\n        case TSK_ERR_BAD_GENOTYPE:\n            ret = \"Bad genotype value provided. (TSK_ERR_BAD_GENOTYPE)\";\n            break;\n        case TSK_ERR_BAD_ANCESTRAL_STATE:\n            ret = \"Bad ancestral state specified. (TSK_ERR_BAD_ANCESTRAL_STATE)\";\n            break;\n\n        /* Genotype decoding errors */\n        case TSK_ERR_MUST_IMPUTE_NON_SAMPLES:\n            ret = \"Cannot generate genotypes for non-samples when isolated nodes are \"\n                  \"considered as missing. (TSK_ERR_MUST_IMPUTE_NON_SAMPLES)\";\n            break;\n        case TSK_ERR_ALLELE_NOT_FOUND:\n            ret = \"An allele was not found in the user-specified allele map. \"\n                  \"(TSK_ERR_ALLELE_NOT_FOUND)\";\n            break;\n        case TSK_ERR_TOO_MANY_ALLELES:\n            ret = \"Cannot have more than 2147483647 alleles (TSK_ERR_TOO_MANY_ALLELES)\";\n            break;\n        case TSK_ERR_ZERO_ALLELES:\n            ret = \"Must have at least one allele when specifying an allele map. \"\n                  \"(TSK_ERR_ZERO_ALLELES)\";\n            break;\n        case TSK_ERR_BAD_ALLELE_LENGTH:\n            ret = \"Alleles used when decoding alignments must have length one. \"\n                  \"(TSK_ERR_BAD_ALLELE_LENGTH)\";\n            break;\n        case TSK_ERR_MISSING_CHAR_COLLISION:\n            ret = \"Alleles used when decoding alignments must not match the missing \"\n                  \"data character. (TSK_ERR_MISSING_CHAR_COLLISION)\";\n            break;\n\n        /* Distance metric errors */\n        case TSK_ERR_SAMPLE_SIZE_MISMATCH:\n            ret = \"Cannot compare trees with different numbers of samples. \"\n                  \"(TSK_ERR_SAMPLE_SIZE_MISMATCH)\";\n            break;\n        case TSK_ERR_SAMPLES_NOT_EQUAL:\n            ret = \"Samples must be identical in trees to compare. \"\n                  \"(TSK_ERR_SAMPLES_NOT_EQUAL)\";\n            break;\n        case TSK_ERR_MULTIPLE_ROOTS:\n            ret = \"Trees with multiple roots not supported. (TSK_ERR_MULTIPLE_ROOTS)\";\n            break;\n        case TSK_ERR_UNARY_NODES:\n            ret = \"Unsimplified trees with unary nodes are not supported. \"\n                  \"(TSK_ERR_UNARY_NODES)\";\n            break;\n        case TSK_ERR_SEQUENCE_LENGTH_MISMATCH:\n            ret = \"Sequence lengths must be identical to compare. \"\n                  \"(TSK_ERR_SEQUENCE_LENGTH_MISMATCH)\";\n            break;\n        case TSK_ERR_NO_SAMPLE_LISTS:\n            ret = \"The sample_lists option must be enabled on the tree to perform this \"\n                  \"operation. Pass the option to the constructor or method that created \"\n                  \"the tree. (TSK_ERR_NO_SAMPLE_LISTS)\";\n            break;\n\n        /* Haplotype matching errors */\n        case TSK_ERR_NULL_VITERBI_MATRIX:\n            ret = \"Viterbi matrix has not filled. (TSK_ERR_NULL_VITERBI_MATRIX)\";\n            break;\n        case TSK_ERR_MATCH_IMPOSSIBLE:\n            ret = \"No matching haplotype exists with current parameters. \"\n                  \"(TSK_ERR_MATCH_IMPOSSIBLE)\";\n            break;\n        case TSK_ERR_BAD_COMPRESSED_MATRIX_NODE:\n            ret = \"The compressed matrix contains a node that subtends no samples. \"\n                  \"(TSK_ERR_BAD_COMPRESSED_MATRIX_NODE)\";\n            break;\n        case TSK_ERR_TOO_MANY_VALUES:\n            ret = \"Too many values to compress. (TSK_ERR_TOO_MANY_VALUES)\";\n            break;\n\n        /* Union errors */\n        case TSK_ERR_UNION_BAD_MAP:\n            ret = \"Node map contains an entry of a node not present in this table \"\n                  \"collection. (TSK_ERR_UNION_BAD_MAP)\";\n            break;\n        case TSK_ERR_UNION_DIFF_HISTORIES:\n            // histories could be equivalent, because subset does not reorder\n            // edges (if not sorted) or mutations.\n            ret = \"Shared portions of the tree sequences are not equal. \"\n                  \"(TSK_ERR_UNION_DIFF_HISTORIES)\";\n            break;\n\n        /* IBD errors */\n        case TSK_ERR_SAME_NODES_IN_PAIR:\n            ret = \"Both nodes in the sample pair are the same. \"\n                  \"(TSK_ERR_SAME_NODES_IN_PAIR)\";\n            break;\n\n        case TSK_ERR_IBD_PAIRS_NOT_STORED:\n            ret = \"The sample pairs are not stored by default in ibd_segments. Please \"\n                  \"add the TSK_IBD_STORE_PAIRS option flag if per-pair statistics are \"\n                  \"required. (TSK_ERR_IBD_PAIRS_NOT_STORED)\";\n            break;\n\n        case TSK_ERR_IBD_SEGMENTS_NOT_STORED:\n            ret = \"All segments are not stored by default in ibd_segments. Please \"\n                  \"add the TSK_IBD_STORE_SEGMENTS option flag if they are required. \"\n                  \"(TSK_ERR_IBD_SEGMENTS_NOT_STORED)\";\n            break;\n\n        /* Simplify errors */\n        case TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE:\n            ret = \"You cannot specify both TSK_SIMPLIFY_KEEP_UNARY and \"\n                  \"TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVDUALS. \"\n                  \"(TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE)\";\n            break;\n\n        /* Individual errors */\n        case TSK_ERR_UNSORTED_INDIVIDUALS:\n            ret = \"Individuals must be provided in an order where children are after \"\n                  \"their parent individuals (TSK_ERR_UNSORTED_INDIVIDUALS)\";\n            break;\n\n        case TSK_ERR_INDIVIDUAL_SELF_PARENT:\n            ret = \"Individuals cannot be their own parents. \"\n                  \"(TSK_ERR_INDIVIDUAL_SELF_PARENT)\";\n            break;\n\n        case TSK_ERR_INDIVIDUAL_PARENT_CYCLE:\n            ret = \"Individuals cannot be their own ancestor. \"\n                  \"(TSK_ERR_INDIVIDUAL_PARENT_CYCLE)\";\n            break;\n\n        case TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH:\n            ret = \"Individual populations cannot be returned \"\n                  \"if an individual has nodes from more than one population. \"\n                  \"(TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH)\";\n            break;\n\n        case TSK_ERR_INDIVIDUAL_TIME_MISMATCH:\n            ret = \"Individual times cannot be returned \"\n                  \"if an individual has nodes from more than one time. \"\n                  \"(TSK_ERR_INDIVIDUAL_TIME_MISMATCH)\";\n            break;\n\n        case TSK_ERR_EXTEND_EDGES_BAD_MAXITER:\n            ret = \"Maximum number of iterations must be positive. \"\n                  \"(TSK_ERR_EXTEND_EDGES_BAD_MAXITER)\";\n            break;\n    }\n    return ret;\n}\n\nint\ntsk_set_kas_error(int err)\n{\n    if (err == KAS_ERR_IO) {\n        /* If we've detected an IO error, report it as TSK_ERR_IO so that we have\n         * a consistent error code covering these situtations */\n        return TSK_ERR_IO;\n    } else {\n        /* Flip this bit. As the error is negative, this sets the bit to 0 */\n        return err ^ (1 << TSK_KAS_ERR_BIT);\n    }\n}\n\nbool\ntsk_is_kas_error(int err)\n{\n    return !(err & (1 << TSK_KAS_ERR_BIT));\n}\n\nint\ntsk_get_kas_error(int err)\n{\n    return err ^ (1 << TSK_KAS_ERR_BIT);\n}\n\nconst char *\ntsk_strerror(int err)\n{\n    if (err != 0 && tsk_is_kas_error(err)) {\n        return kas_strerror(tsk_get_kas_error(err));\n    } else {\n        return tsk_strerror_internal(err);\n    }\n}\n\nvoid\n__tsk_safe_free(void **ptr)\n{\n    if (ptr != NULL) {\n        if (*ptr != NULL) {\n            free(*ptr);\n            *ptr = NULL;\n        }\n    }\n}\n\n/* Block allocator. Simple allocator when we lots of chunks of memory\n * and don't need to free them individually.\n */\n\nvoid\ntsk_blkalloc_print_state(tsk_blkalloc_t *self, FILE *out)\n{\n    fprintf(out, \"Block allocator%p::\\n\", (void *) self);\n    fprintf(out, \"\\ttop = %lld\\n\", (long long) self->top);\n    fprintf(out, \"\\tchunk_size = %lld\\n\", (long long) self->chunk_size);\n    fprintf(out, \"\\tnum_chunks = %lld\\n\", (long long) self->num_chunks);\n    fprintf(out, \"\\ttotal_allocated = %lld\\n\", (long long) self->total_allocated);\n    fprintf(out, \"\\ttotal_size = %lld\\n\", (long long) self->total_size);\n}\n\nint TSK_WARN_UNUSED\ntsk_blkalloc_reset(tsk_blkalloc_t *self)\n{\n    int ret = 0;\n\n    self->top = 0;\n    self->current_chunk = 0;\n    self->total_allocated = 0;\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_blkalloc_init(tsk_blkalloc_t *self, size_t chunk_size)\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_blkalloc_t));\n    if (chunk_size < 1) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    self->chunk_size = chunk_size;\n    self->top = 0;\n    self->current_chunk = 0;\n    self->total_allocated = 0;\n    self->total_size = 0;\n    self->num_chunks = 0;\n    self->mem_chunks = malloc(sizeof(char *));\n    if (self->mem_chunks == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    self->mem_chunks[0] = malloc(chunk_size);\n    if (self->mem_chunks[0] == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    self->num_chunks = 1;\n    self->total_size = chunk_size + sizeof(void *);\nout:\n    return ret;\n}\n\nvoid *TSK_WARN_UNUSED\ntsk_blkalloc_get(tsk_blkalloc_t *self, size_t size)\n{\n    void *ret = NULL;\n    void *p;\n\n    if (size > self->chunk_size) {\n        goto out;\n    }\n    if ((self->top + size) > self->chunk_size) {\n        if (self->current_chunk == (self->num_chunks - 1)) {\n            p = realloc(self->mem_chunks, (self->num_chunks + 1) * sizeof(void *));\n            if (p == NULL) {\n                goto out;\n            }\n            self->mem_chunks = p;\n            p = malloc(self->chunk_size);\n            if (p == NULL) {\n                goto out;\n            }\n            self->mem_chunks[self->num_chunks] = p;\n            self->num_chunks++;\n            self->total_size += self->chunk_size + sizeof(void *);\n        }\n        self->current_chunk++;\n        self->top = 0;\n    }\n    ret = self->mem_chunks[self->current_chunk] + self->top;\n    self->top += size;\n    self->total_allocated += size;\nout:\n    return ret;\n}\n\nvoid\ntsk_blkalloc_free(tsk_blkalloc_t *self)\n{\n    size_t j;\n\n    for (j = 0; j < self->num_chunks; j++) {\n        if (self->mem_chunks[j] != NULL) {\n            free(self->mem_chunks[j]);\n        }\n    }\n    if (self->mem_chunks != NULL) {\n        free(self->mem_chunks);\n    }\n}\n\n/* Mirrors the semantics of numpy's searchsorted function. Uses binary\n * search to find the index of the closest value in the array. */\ntsk_size_t\ntsk_search_sorted(const double *restrict array, tsk_size_t size, double value)\n{\n    int64_t upper = (int64_t) size;\n    int64_t lower = 0;\n    int64_t offset = 0;\n    int64_t mid;\n\n    if (upper == 0) {\n        return 0;\n    }\n\n    while (upper - lower > 1) {\n        mid = (upper + lower) / 2;\n        if (value >= array[mid]) {\n            lower = mid;\n        } else {\n            upper = mid;\n        }\n    }\n    offset = (int64_t) (array[lower] < value);\n    return (tsk_size_t) (lower + offset);\n}\n\n/* Rounds the specified double to the closest multiple of 10**-num_digits. If\n * num_digits > 22, return value without changes. This is intended for use with\n * small positive numbers; behaviour with large inputs has not been considered.\n *\n * Based on double_round from the Python standard library\n * https://github.com/python/cpython/blob/master/Objects/floatobject.c#L985\n */\ndouble\ntsk_round(double x, unsigned int ndigits)\n{\n    double pow1, y, z;\n\n    z = x;\n    if (ndigits < 22) {\n        pow1 = pow(10.0, (double) ndigits);\n        y = x * pow1;\n        z = round(y);\n        if (fabs(y - z) == 0.5) {\n            /* halfway between two integers; use round-half-even */\n            z = 2.0 * round(y / 2.0);\n        }\n        z = z / pow1;\n    }\n    return z;\n}\n\n/* As NANs are not equal, use this function to check for equality to TSK_UNKNOWN_TIME */\nbool\ntsk_is_unknown_time(double val)\n{\n    union {\n        uint64_t i;\n        double f;\n    } nan_union;\n    nan_union.f = val;\n    return nan_union.i == TSK_UNKNOWN_TIME_HEX;\n}\n\n/* Work around a bug which seems to show up on various mixtures of\n * compiler and libc versions, where isfinite and isnan result in\n * spurious warnings about casting down to float. The original issue\n * is here:\n * https://github.com/tskit-dev/tskit/issues/721\n *\n * The simplest approach seems to be to use the builtins where they\n * are available (clang and gcc), and to use the library macro\n * otherwise. There would be no disadvantage to using the builtin\n * version, so there's no real harm in this approach.\n */\n\nbool\ntsk_isnan(double val)\n{\n#if defined(__GNUC__)\n    return __builtin_isnan(val);\n#else\n    return isnan(val);\n#endif\n}\n\nbool\ntsk_isfinite(double val)\n{\n#if defined(__GNUC__)\n    return __builtin_isfinite(val);\n#else\n    return isfinite(val);\n#endif\n}\n\nvoid *\ntsk_malloc(tsk_size_t size)\n{\n    /* Avoid malloc(0) as it's not portable */\n    if (size == 0) {\n        size = 1;\n    }\n#if TSK_MAX_SIZE > SIZE_MAX\n    if (size > SIZE_MAX) {\n        return NULL;\n    }\n#endif\n    return malloc((size_t) size);\n}\n\nvoid *\ntsk_realloc(void *ptr, tsk_size_t size)\n{\n    /* We shouldn't ever realloc to a zero size in tskit */\n    tsk_bug_assert(size > 0);\n    return realloc(ptr, (size_t) size);\n}\n\n/* We keep the size argument here as a size_t because we'd have to\n * cast the outputs of sizeof() otherwise, which would lead to\n * less readable code. We need to be careful to use calloc within\n * the library accordingly, so that size can't overflow on 32 bit.\n */\nvoid *\ntsk_calloc(tsk_size_t n, size_t size)\n{\n    /* Avoid calloc(0) as it's not portable */\n    if (n == 0) {\n        n = 1;\n    }\n#if TSK_MAX_SIZE > SIZE_MAX\n    if (n > SIZE_MAX) {\n        return NULL;\n    }\n#endif\n    return calloc((size_t) n, size);\n}\n\nvoid *\ntsk_memset(void *ptr, int fill, tsk_size_t size)\n{\n    return memset(ptr, fill, (size_t) size);\n}\n\nvoid *\ntsk_memcpy(void *dest, const void *src, tsk_size_t size)\n{\n    return memcpy(dest, src, (size_t) size);\n}\n\nvoid *\ntsk_memmove(void *dest, const void *src, tsk_size_t size)\n{\n    return memmove(dest, src, (size_t) size);\n}\n\nint\ntsk_memcmp(const void *s1, const void *s2, tsk_size_t size)\n{\n    return memcmp(s1, s2, (size_t) size);\n}\n\n/* We can't initialise the stream to its real default value because\n * of limitations on static initialisers. To work around this, we initialise\n * it to NULL and then set the value to the required standard stream\n * when called. */\n\nFILE *_tsk_debug_stream = NULL;\n\nvoid\ntsk_set_debug_stream(FILE *f)\n{\n    _tsk_debug_stream = f;\n}\n\nFILE *\ntsk_get_debug_stream(void)\n{\n    if (_tsk_debug_stream == NULL) {\n        _tsk_debug_stream = TSK_DEFAULT_DEBUG_STREAM;\n    }\n    return _tsk_debug_stream;\n}\n\n/* AVL Tree implementation. This is based directly on Knuth's implementation\n * in TAOCP. See the python/tests/test_avl_tree.py for more information,\n * and equivalent code annotated with the original algorithm listing.\n */\n\nstatic void\ntsk_avl_tree_int_print_node(tsk_avl_node_int_t *node, int depth, FILE *out)\n{\n    int d;\n\n    if (node == NULL) {\n        return;\n    }\n    for (d = 0; d < depth; d++) {\n        fprintf(out, \"  \");\n    }\n    fprintf(out, \"key=%d balance=%d\\n\", (int) node->key, node->balance);\n    tsk_avl_tree_int_print_node(node->llink, depth + 1, out);\n    tsk_avl_tree_int_print_node(node->rlink, depth + 1, out);\n}\nvoid\ntsk_avl_tree_int_print_state(tsk_avl_tree_int_t *self, FILE *out)\n{\n    fprintf(out, \"AVL tree: size=%d height=%d\\n\", (int) self->size, (int) self->height);\n    tsk_avl_tree_int_print_node(self->head.rlink, 0, out);\n}\n\nint\ntsk_avl_tree_int_init(tsk_avl_tree_int_t *self)\n{\n    memset(self, 0, sizeof(*self));\n    return 0;\n}\n\nint\ntsk_avl_tree_int_free(tsk_avl_tree_int_t *TSK_UNUSED(self))\n{\n    return 0;\n}\n\ntsk_avl_node_int_t *\ntsk_avl_tree_int_get_root(const tsk_avl_tree_int_t *self)\n{\n    return self->head.rlink;\n}\n\ntsk_avl_node_int_t *\ntsk_avl_tree_int_search(const tsk_avl_tree_int_t *self, int64_t key)\n{\n    tsk_avl_node_int_t *P = self->head.rlink;\n\n    while (P != NULL) {\n        if (key == P->key) {\n            break;\n        } else if (key < P->key) {\n            P = P->llink;\n        } else {\n            P = P->rlink;\n        }\n    }\n    return P;\n}\n\nstatic int\ntsk_avl_tree_int_insert_empty(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node)\n{\n    self->head.rlink = node;\n    self->size = 1;\n    self->height = 1;\n    node->llink = NULL;\n    node->rlink = NULL;\n    node->balance = 0;\n    return 0;\n}\n\n#define get_link(a, P) ((a) == -1 ? (P)->llink : (P)->rlink)\n#define set_link(a, P, val)                                                             \\\n    do {                                                                                \\\n        if ((a) == -1) {                                                                \\\n            (P)->llink = val;                                                           \\\n        } else {                                                                        \\\n            (P)->rlink = val;                                                           \\\n        }                                                                               \\\n    } while (0);\n\nstatic int\ntsk_avl_tree_int_insert_non_empty(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node)\n{\n    const int64_t K = node->key;\n    tsk_avl_node_int_t *T = &self->head;\n    tsk_avl_node_int_t *S = T->rlink;\n    tsk_avl_node_int_t *P = T->rlink;\n    tsk_avl_node_int_t *Q, *R;\n    int a;\n\n    while (true) {\n        if (K == P->key) {\n            /* TODO figure out what the most useful semantics are here. Just\n             * returning 1 as a non-zero value for now. */\n            return 1;\n        } else if (K < P->key) {\n            Q = P->llink;\n            if (Q == NULL) {\n                Q = node;\n                P->llink = Q;\n                break;\n            }\n        } else {\n            Q = P->rlink;\n            if (Q == NULL) {\n                Q = node;\n                P->rlink = Q;\n                break;\n            }\n        }\n        if (Q->balance != 0) {\n            T = P;\n            S = Q;\n        }\n        P = Q;\n    }\n\n    self->size++;\n    Q->llink = NULL;\n    Q->rlink = NULL;\n    Q->balance = 0;\n\n    if (K < S->key) {\n        a = -1;\n    } else {\n        a = 1;\n    }\n    P = get_link(a, S);\n    R = P;\n    while (P != Q) {\n        if (K < P->key) {\n            P->balance = -1;\n            P = P->llink;\n        } else if (K > P->key) {\n            P->balance = 1;\n            P = P->rlink;\n        }\n    }\n\n    if (S->balance == 0) {\n        S->balance = a;\n        self->height++;\n    } else if (S->balance == -a) {\n        S->balance = 0;\n    } else {\n        if (R->balance == a) {\n            P = R;\n            set_link(a, S, get_link(-a, R));\n            set_link(-a, R, S);\n            S->balance = 0;\n            R->balance = 0;\n        } else if (R->balance == -a) {\n            P = get_link(-a, R);\n            set_link(-a, R, get_link(a, P));\n            set_link(a, P, R);\n            set_link(a, S, get_link(-a, P));\n            set_link(-a, P, S);\n            if (P->balance == a) {\n                S->balance = -a;\n                R->balance = 0;\n            } else if (P->balance == 0) {\n                S->balance = 0;\n                R->balance = 0;\n            } else {\n                S->balance = 0;\n                R->balance = a;\n            }\n            P->balance = 0;\n        }\n        if (S == T->rlink) {\n            T->rlink = P;\n        } else {\n            T->llink = P;\n        }\n    }\n    return 0;\n}\n\nint\ntsk_avl_tree_int_insert(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node)\n{\n    int ret = 0;\n\n    if (self->size == 0) {\n        ret = tsk_avl_tree_int_insert_empty(self, node);\n    } else {\n        ret = tsk_avl_tree_int_insert_non_empty(self, node);\n    }\n    return ret;\n}\n\n/* An inorder traversal of the nodes in an AVL tree (or any binary search tree)\n * yields the keys in sorted order. The recursive implementation is safe here\n * because this is an AVL tree and it is strictly balanced, the depth is very\n * limited. Using GCC's __builtin_frame_address it looks like the size of a stack\n * frame for this function is 48 bytes. Assuming a stack size of 1MiB, this\n * would give us a maximum tree depth of 21845 - so, we're pretty safe.\n */\nstatic int\nordered_nodes_traverse(tsk_avl_node_int_t *node, int index, tsk_avl_node_int_t **out)\n{\n    if (node == NULL) {\n        return index;\n    }\n    index = ordered_nodes_traverse(node->llink, index, out);\n    out[index] = node;\n    return ordered_nodes_traverse(node->rlink, index + 1, out);\n}\n\nint\ntsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_t **out)\n{\n    ordered_nodes_traverse(self->head.rlink, 0, out);\n    return 0;\n}\n\n// Bit Array implementation. Allows us to store unsigned integers in a compact manner.\n// Currently implemented as an array of 32-bit unsigned integers.\n\nint\ntsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)\n{\n    int ret = 0;\n\n    self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);\n    self->len = length;\n    self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));\n    if (self->data == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\n#define BITSET_DATA_ROW(bs, row) ((bs)->data + (row) * (bs)->row_len)\n\nvoid\ntsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,\n    const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)\n{\n    const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);\n    const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);\n    tsk_bitset_val_t *restrict out_d = out->data;\n    for (tsk_size_t i = 0; i < self->row_len; i++) {\n        out_d[i] = self_d[i] & other_d[i];\n    }\n}\n\nvoid\ntsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,\n    tsk_size_t other_row)\n{\n    tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);\n    const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);\n    for (tsk_size_t i = 0; i < self->row_len; i++) {\n        self_d[i] &= ~(other_d[i]);\n    }\n}\n\nvoid\ntsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,\n    tsk_size_t other_row)\n{\n    tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);\n    const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);\n    for (tsk_size_t i = 0; i < self->row_len; i++) {\n        self_d[i] |= other_d[i];\n    }\n}\n\nvoid\ntsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)\n{\n    tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);\n    *(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1\n                                         << (bit - (TSK_BITSET_BITS * i));\n}\n\nbool\ntsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)\n{\n    tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);\n    return *(BITSET_DATA_ROW(self, row) + i)\n           & ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));\n}\n\nstatic inline uint32_t\npopcount(tsk_bitset_val_t v)\n{\n    // Utilizes 12 operations per chunk. NB this only works on 32 bit integers.\n    // Taken from:\n    //   https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel\n    // There's a nice breakdown of this algorithm here:\n    //   https://stackoverflow.com/a/109025\n    //\n    // The gcc/clang compiler flag will -mpopcnt will convert this code to a\n    // popcnt instruction (most if not all modern CPUs will support this). The\n    // popcnt instruction will yield some speed improvements, which depend on\n    // the tree sequence.\n    //\n    // NB: 32bit counting is typically faster than 64bit counting for this task.\n    //     (at least on x86-64)\n\n    v = v - ((v >> 1) & 0x55555555);\n    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);\n    return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;\n}\n\ntsk_size_t\ntsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)\n{\n    tsk_size_t i = 0;\n    tsk_size_t count = 0;\n    const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);\n\n    for (i = 0; i < self->row_len; i++) {\n        count += popcount(self_d[i]);\n    }\n    return count;\n}\n\nvoid\ntsk_bitset_get_items(\n    const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)\n{\n    // Get the items stored in the row of a bitset.\n    // Uses a de Bruijn sequence lookup table to determine the lowest bit set.\n    // See the wikipedia article for more info: https://w.wiki/BYiF\n\n    tsk_size_t i, n, off;\n    tsk_bitset_val_t v, lsb; // least significant bit\n    static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,\n        17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };\n    const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);\n\n    n = 0;\n    for (i = 0; i < self->row_len; i++) {\n        v = self_d[i];\n        off = i * TSK_BITSET_BITS;\n        if (v == 0) {\n            continue;\n        }\n        while ((lsb = v & -v)) {\n            items[n] = lookup[(lsb * 0x077cb531U) >> 27] + (tsk_id_t) off;\n            n++;\n            v ^= lsb;\n        }\n    }\n    *n_items = n;\n}\n\nvoid\ntsk_bitset_free(tsk_bitset_t *self)\n{\n    tsk_safe_free(self->data);\n}\n"
  },
  {
    "path": "c/tskit/core.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/**\n * @file core.h\n * @brief Core utilities used in all of tskit.\n */\n#ifndef __TSK_CORE_H__\n#define __TSK_CORE_H__\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <math.h>\n#include <stdbool.h>\n#include <stdint.h>\n#include <stdio.h>\n#include <limits.h>\n\n#ifdef __GNUC__\n#define TSK_WARN_UNUSED __attribute__((warn_unused_result))\n#define TSK_UNUSED(x)   TSK_UNUSED_##x __attribute__((__unused__))\n#else\n#define TSK_WARN_UNUSED\n#define TSK_UNUSED(x) TSK_UNUSED_##x\n/* Don't bother with restrict for MSVC */\n#define restrict\n#endif\n\n/* We assume CHAR_BIT == 8 when loading strings from 8-bit byte arrays */\n#if CHAR_BIT != 8\n#error CHAR_BIT MUST EQUAL 8\n#endif\n\n/* This sets up TSK_DBL_DECIMAL_DIG, which can then be used as a\n * precision specifier when writing out doubles, if you want sufficient\n * decimal digits to be written to guarantee a lossless round-trip\n * after being read back in.  Usage:\n *\n *     printf(\"%.*g\", TSK_DBL_DECIMAL_DIG, foo);\n *\n * See https://stackoverflow.com/a/19897395/2752221\n */\n#ifdef DBL_DECIMAL_DIG\n#define TSK_DBL_DECIMAL_DIG (DBL_DECIMAL_DIG)\n#else\n#define TSK_DBL_DECIMAL_DIG (DBL_DIG + 3)\n#endif\n\n/**\n@brief Tskit Object IDs.\n\n@rst\nAll objects in tskit are referred to by integer IDs corresponding to the\nrow they occupy in the relevant table. The ``tsk_id_t`` type should be used\nwhen manipulating these ID values. The reserved value :c:macro:`TSK_NULL` (-1) defines\nmissing data.\n@endrst\n*/\n#ifdef _TSK_BIG_TABLES\n/* Allow tables to have more than 2^31 rows. This is an EXPERIMENTAL feature\n * and is not supported in any way. This typedef is only included for\n * future-proofing purposes, so that we can be sure that we don't make any\n * design decisions that are incompatible with big tables by building the\n * library in 64 bit mode in CI. See the discussion here for more background:\n\n * https://github.com/tskit-dev/tskit/issues/343\n *\n * If you need big tables, please open an issue on GitHub to discuss, or comment\n * on the thread above.\n */\ntypedef int64_t tsk_id_t;\n#define TSK_MAX_ID          INT64_MAX - 1\n#define TSK_ID_STORAGE_TYPE KAS_INT64\n#else\ntypedef int32_t tsk_id_t;\n#define TSK_MAX_ID          INT32_MAX - 1\n#define TSK_ID_STORAGE_TYPE KAS_INT32\n#endif\n\n/**\n@brief Tskit sizes.\n\n@rst\nThe ``tsk_size_t`` type is an unsigned integer used for any size or count value.\n@endrst\n*/\ntypedef uint64_t tsk_size_t;\n#define TSK_MAX_SIZE          UINT64_MAX\n#define TSK_SIZE_STORAGE_TYPE KAS_UINT64\n\n/**\n@brief Container for bitwise flags.\n\n@rst\nBitwise flags are used in tskit as a column type and also as a way to\nspecify options to API functions.\n@endrst\n*/\ntypedef uint32_t tsk_flags_t;\n#define TSK_FLAGS_STORAGE_TYPE KAS_UINT32\n\n/**\n@brief Boolean type.\n\n@rst\nFixed-size (1 byte) boolean values.\n@endrst\n*/\ntypedef uint8_t tsk_bool_t;\n\n// clang-format off\n/**\n@defgroup API_VERSION_GROUP API version macros.\n@{\n*/\n/**\nThe library major version. Incremented when breaking changes to the API or ABI are\nintroduced. This includes any changes to the signatures of functions and the\nsizes and types of externally visible structs.\n*/\n#define TSK_VERSION_MAJOR   1\n/**\nThe library minor version. Incremented when non-breaking backward-compatible changes\nto the API or ABI are introduced, i.e., the addition of a new function.\n*/\n#define TSK_VERSION_MINOR   3\n/**\nThe library patch version. Incremented when any changes not relevant to the\nto the API or ABI are introduced, i.e., internal refactors of bugfixes.\n*/\n#define TSK_VERSION_PATCH   1\n/** @} */\n\n/*\nWe define a specific NAN value for default mutation time which indicates\nthe time is unknown. We use a specific value so that if mutation time is set to\na NAN from a computation we can reject it. This specific value is a non-signalling\nNAN with the last six fraction bytes set to the ascii of \"tskit!\"\n*/\n#define TSK_UNKNOWN_TIME_HEX 0x7FF874736B697421ULL\nstatic inline double\n__tsk_nan_f(void)\n{\n    const union {\n        uint64_t i;\n        double f;\n    } nan_union = { .i = TSK_UNKNOWN_TIME_HEX };\n    return nan_union.f;\n}\n\n/**\n@defgroup GENERIC_CONSTANTS General options flags used in some functions.\n@{\n*/\n/**\nUsed in node flags to indicate that a node is a sample node.\n*/\n#define TSK_NODE_IS_SAMPLE 1u\n\n/**\nNull value used for cases such as absent id references.\n*/\n#define TSK_NULL ((tsk_id_t) -1)\n\n/**\nValue used for missing data in genotype arrays.\n*/\n#define TSK_MISSING_DATA    (-1)\n\n/**\nValue to indicate that a time is unknown. Note that this value is a non-signalling NAN\nwhose representation differs from the NAN generated by computations such as divide by zeros.\n*/\n#define TSK_UNKNOWN_TIME __tsk_nan_f()\n\n/** @} */\n\n#define TSK_TIME_UNITS_UNKNOWN \"unknown\"\n#define TSK_TIME_UNITS_UNCALIBRATED \"uncalibrated\"\n\n\n#define TSK_FILE_FORMAT_NAME          \"tskit.trees\"\n#define TSK_FILE_FORMAT_NAME_LENGTH   11\n#define TSK_FILE_FORMAT_VERSION_MAJOR 12\n#define TSK_FILE_FORMAT_VERSION_MINOR 7\n\n/**\n@defgroup GENERIC_FUNCTION_OPTIONS General options flags used in some functions.\n@{\n*/\n\n/* Place the common options at the top of the space; this way we can start\noptions for individual functions at the bottom without worrying about\nclashing with the common options\n*/\n\n/** Turn on debugging output. Not supported by all functions. */\n#define TSK_DEBUG (1u << 31)\n\n/** Do not initialise the parameter object. */\n#define TSK_NO_INIT (1u << 30)\n\n/**\nDo not run integrity checks before performing an operation.\nThis performance optimisation should not be used unless the calling code can\nguarantee reference integrity within the table collection. References\nto rows not in the table or bad offsets will result in undefined\nbehaviour.\n*/\n#define TSK_NO_CHECK_INTEGRITY (1u << 29)\n\n/**\nInstead of taking a copy of input objects, the function should take ownership\nof them and manage their lifecycle. The caller specifying this flag should no\nlonger modify or free the object or objects passed. See individual functions\nusing this flag for what object it applies to.\n*/\n#define TSK_TAKE_OWNERSHIP (1u << 28)\n\n/** @} */\n\n\n/**\n@defgroup GENERAL_ERROR_GROUP General errors.\n@{\n*/\n\n/**\nGeneric error thrown when no other message can be generated.\n*/\n#define TSK_ERR_GENERIC                                             -1\n/**\nMemory could not be allocated.\n*/\n#define TSK_ERR_NO_MEMORY                                           -2\n/**\nAn IO error occurred.\n*/\n#define TSK_ERR_IO                                                  -3\n#define TSK_ERR_BAD_PARAM_VALUE                                     -4\n#define TSK_ERR_BUFFER_OVERFLOW                                     -5\n#define TSK_ERR_UNSUPPORTED_OPERATION                               -6\n#define TSK_ERR_GENERATE_UUID                                       -7\n/**\nThe file stream ended after reading zero bytes.\n*/\n#define TSK_ERR_EOF                                                 -8\n/** @} */\n\n/**\n@defgroup FILE_FORMAT_ERROR_GROUP File format errors.\n@{\n*/\n\n/**\nA file could not be read because it is in the wrong format\n*/\n#define TSK_ERR_FILE_FORMAT                                         -100\n/**\nThe file is in tskit format, but the version is too old for the\nlibrary to read. The file should be upgraded to the latest version\nusing the ``tskit upgrade`` command line utility from tskit version<0.6.2.\n*/\n#define TSK_ERR_FILE_VERSION_TOO_OLD                                -101\n/**\nThe file is in tskit format, but the version is too new for the\nlibrary to read. To read the file you must upgrade the version\nof tskit.\n*/\n#define TSK_ERR_FILE_VERSION_TOO_NEW                                -102\n\n/**\nA column that is a required member of a table was not found in\nthe file.\n*/\n#define TSK_ERR_REQUIRED_COL_NOT_FOUND                              -103\n\n/**\nOne of a pair of columns that must be specified together was\nnot found in the file.\n*/\n#define TSK_ERR_BOTH_COLUMNS_REQUIRED                               -104\n\n/**\nAn unsupported type was provided for a column in the file.\n*/\n#define TSK_ERR_BAD_COLUMN_TYPE                                     -105\n/** @} */\n\n/**\n@defgroup OOB_ERROR_GROUP Out of bounds errors.\n@{\n*/\n/**\nA bad value was provided for a ragged column offset, values should\nstart at zero and be monotonically increasing.\n*/\n#define TSK_ERR_BAD_OFFSET                                          -200\n/**\nA position to seek to was less than zero or greater than the length\nof the genome\n*/\n#define TSK_ERR_SEEK_OUT_OF_BOUNDS                                  -201\n/**\nA node id was less than zero or greater than the final index\n*/\n#define TSK_ERR_NODE_OUT_OF_BOUNDS                                  -202\n/**\nA edge id was less than zero or greater than the final index\n*/\n#define TSK_ERR_EDGE_OUT_OF_BOUNDS                                  -203\n/**\nA population id was less than zero or greater than the final index\n*/\n#define TSK_ERR_POPULATION_OUT_OF_BOUNDS                            -204\n/**\nA site id was less than zero or greater than the final index\n*/\n#define TSK_ERR_SITE_OUT_OF_BOUNDS                                  -205\n/**\nA mutation id was less than zero or greater than the final index\n*/\n#define TSK_ERR_MUTATION_OUT_OF_BOUNDS                              -206\n/**\nAn individual id was less than zero or greater than the final index\n*/\n#define TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS                            -207\n/**\nA migration id was less than zero or greater than the final index\n*/\n#define TSK_ERR_MIGRATION_OUT_OF_BOUNDS                             -208\n/**\nA provenance id was less than zero or greater than the final index\n*/\n#define TSK_ERR_PROVENANCE_OUT_OF_BOUNDS                            -209\n/**\nA time value was non-finite (NaN counts as finite)\n*/\n#define TSK_ERR_TIME_NONFINITE                                      -210\n/**\nA genomic position was non-finite\n*/\n#define TSK_ERR_GENOME_COORDS_NONFINITE                             -211\n/**\nOne of the rows in the retained table refers to a row that has been\ndeleted.\n*/\n#define TSK_ERR_KEEP_ROWS_MAP_TO_DELETED                            -212\n/**\nA genomic position was less than zero or greater equal to the sequence\nlength\n*/\n#define TSK_ERR_POSITION_OUT_OF_BOUNDS                              -213\n\n/** @} */\n\n/**\n@defgroup EDGE_ERROR_GROUP Edge errors.\n@{\n*/\n/**\nA parent node of an edge was TSK_NULL.\n*/\n#define TSK_ERR_NULL_PARENT                                         -300\n/**\nA child node of an edge was TSK_NULL.\n*/\n#define TSK_ERR_NULL_CHILD                                          -301\n/**\nThe edge table was not sorted by the time of each edge's parent\nnodes. Sort order is (time[parent], child, left).\n*/\n#define TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME                        -302\n/**\nA parent node had edges that were non-contigious.\n*/\n#define TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS                         -303\n/**\nThe edge table was not sorted by the id of the child node of each edge.\nSort order is (time[parent], child, left).\n*/\n#define TSK_ERR_EDGES_NOT_SORTED_CHILD                              -304\n/**\nThe edge table was not sorted by the left coordinate each edge.\nSort order is (time[parent], child, left).\n*/\n#define TSK_ERR_EDGES_NOT_SORTED_LEFT                               -305\n/**\nAn edge had child node that was older than the parent. Parent times must\nbe greater than the child time.\n*/\n#define TSK_ERR_BAD_NODE_TIME_ORDERING                              -306\n/**\nAn edge had a genomic interval where right was greater or equal to left.\n*/\n#define TSK_ERR_BAD_EDGE_INTERVAL                                   -307\n/**\nAn edge was duplicated.\n*/\n#define TSK_ERR_DUPLICATE_EDGES                                     -308\n/**\nAn edge had a right coord greater than the genomic length.\n*/\n#define TSK_ERR_RIGHT_GREATER_SEQ_LENGTH                            -309\n/**\nAn edge had a left coord less than zero.\n*/\n#define TSK_ERR_LEFT_LESS_ZERO                                      -310\n/**\nA parent node had edges that were contradictory over an interval.\n*/\n#define TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN                    -311\n/**\nA method that doesn't support edge metadata was attempted on an edge\ntable containing metadata.\n*/\n#define TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA                    -312\n/** @} */\n\n/**\n@defgroup SITE_ERROR_GROUP Site errors.\n@{\n*/\n/**\nThe site table was not in order of increasing genomic position.\n*/\n#define TSK_ERR_UNSORTED_SITES                                      -400\n/**\nThe site table had more than one site at a single genomic position.\n*/\n#define TSK_ERR_DUPLICATE_SITE_POSITION                             -401\n/**\nA site had a position that was less than zero or greater than the sequence\nlength.\n*/\n#define TSK_ERR_BAD_SITE_POSITION                                   -402\n/** @} */\n\n/**\n@defgroup MUTATION_ERROR_GROUP Mutation errors.\n@{\n*/\n/**\nA mutation had a parent mutation that was at a different site.\n*/\n#define TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE                      -500\n/**\nA mutation had a parent mutation that was itself.\n*/\n#define TSK_ERR_MUTATION_PARENT_EQUAL                               -501\n/**\nA mutation had a parent mutation that had a greater id.\n*/\n#define TSK_ERR_MUTATION_PARENT_AFTER_CHILD                         -502\n/**\nTwo or more mutation parent references formed a loop\n*/\n#define TSK_ERR_MUTATION_PARENT_INCONSISTENT                        -503\n/**\nThe mutation table was not in the order of non-decreasing site id and\nnon-increasing time within each site.\n*/\n#define TSK_ERR_UNSORTED_MUTATIONS                                  -504\n/* 505 was the now unused TSK_ERR_NON_SINGLE_CHAR_MUTATION */\n/**\nA mutation's time was younger (not >=) the time of its node\nand wasn't TSK_UNKNOWN_TIME.\n*/\n#define TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE                     -506\n/**\nA mutation's time was older (not <=) than the time of its parent\nmutation, and wasn't TSK_UNKNOWN_TIME.\n*/\n#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION            -507\n/**\nA mutation's time was older (not <) than the time of the parent node of\nthe edge on which it occurs, and wasn't TSK_UNKNOWN_TIME.\n*/\n#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE                -508\n/**\nA single site had a mixture of known mutation times and TSK_UNKNOWN_TIME\n*/\n#define TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN            -509\n/**\nSome mutations have TSK_UNKNOWN_TIME in an algorithm where that's\ndisallowed (use compute_mutation_times?).\n*/\n#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME                    -510\n\n/**\nA mutation's parent was not consistent with the topology of the tree.\n */\n#define TSK_ERR_BAD_MUTATION_PARENT                                 -511\n\n/** @} */\n\n/**\n@defgroup MIGRATION_ERROR_GROUP Migration errors.\n@{\n*/\n/**\nThe migration table was not sorted by time.\n*/\n#define TSK_ERR_UNSORTED_MIGRATIONS                                 -550\n/** @} */\n\n/**\n@defgroup SAMPLE_ERROR_GROUP Sample errors.\n@{\n*/\n/**\nA duplicate sample was specified.\n*/\n#define TSK_ERR_DUPLICATE_SAMPLE                                    -600\n/**\nA sample id that was not valid was specified.\n*/\n#define TSK_ERR_BAD_SAMPLES                                         -601\n/** @} */\n\n/**\n@defgroup TABLE_ERROR_GROUP Table errors.\n@{\n*/\n/**\nAn invalid table position was specifed.\n*/\n#define TSK_ERR_BAD_TABLE_POSITION                                  -700\n/**\nA sequence length equal to or less than zero was specified.\n*/\n#define TSK_ERR_BAD_SEQUENCE_LENGTH                                 -701\n/**\nThe table collection was not indexed.\n*/\n#define TSK_ERR_TABLES_NOT_INDEXED                                  -702\n/**\nTables cannot be larger than 2**31 rows.\n*/\n#define TSK_ERR_TABLE_OVERFLOW                                      -703\n/**\nRagged array columns cannot be larger than 2**64 bytes.\n*/\n#define TSK_ERR_COLUMN_OVERFLOW                                     -704\n/**\nThe table collection contains more than 2**31 trees.\n*/\n#define TSK_ERR_TREE_OVERFLOW                                       -705\n/**\nMetadata was attempted to be set on a table where it is disabled.\n*/\n#define TSK_ERR_METADATA_DISABLED                                   -706\n/**\nThere was an error with the table's indexes.\n*/\n#define TSK_ERR_TABLES_BAD_INDEXES                                  -707\n/** @} */\n\n/**\n@defgroup LIMITATION_ERROR_GROUP Limitation errors.\n@{\n*/\n/**\nAn operation was attempted that only supports infinite sites, i.e.\nat most a single mutation per site.\n*/\n#define TSK_ERR_ONLY_INFINITE_SITES                                 -800\n/**\nSimplification was attempted with migrations present, which are not\nsupported.\n*/\n#define TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED                   -801\n/**\nSorting was attempted on migrations, which is not supported.\n*/\n#define TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED                       -802\n/**\nAn invalid sort offset was specified, for sites and mutations this must\nbe either 0 or the table length.\n*/\n#define TSK_ERR_SORT_OFFSET_NOT_SUPPORTED                           -803\n/**\nAn operation was attempted that only supports binary mutations.\n*/\n#define TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED                     -804\n/**\nAn operation was attempted that doesn't support migrations, with a\nnon-empty migration table.\n*/\n#define TSK_ERR_MIGRATIONS_NOT_SUPPORTED                            -805\n/**\nA table attempted to extend from itself.\n*/\n#define TSK_ERR_CANNOT_EXTEND_FROM_SELF                             -806\n/**\nAn operation was attempted that doesn't support silent mutations, i.e.\na mutation that doesn't change the allelic state.\n*/\n#define TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED                      -807\n/**\nA copy of a variant cannot be decoded.\n*/\n#define TSK_ERR_VARIANT_CANT_DECODE_COPY                            -808\n/**\nA tree sequence cannot take ownership of a table collection where\nTSK_NO_EDGE_METADATA.\n*/\n#define TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA                -809\n/**\nOperation is undefined for nonbinary trees\n*/\n#define TSK_ERR_UNDEFINED_NONBINARY                                 -810\n/**\nOperation is undefined for trees with multiple roots.\n*/\n#define TSK_ERR_UNDEFINED_MULTIROOT                                 -811\n\n/** @} */\n\n/**\n@defgroup STATS_ERROR_GROUP Stats errors.\n@{\n*/\n/**\nZero windows were specified, at least one window must be specified.\n*/\n#define TSK_ERR_BAD_NUM_WINDOWS                                     -900\n/**\nThe window specification was not an increasing list of positions between\n0 and the sequence length.\n*/\n#define TSK_ERR_BAD_WINDOWS                                         -901\n/**\nMore than one stat mode was specified.\n*/\n#define TSK_ERR_MULTIPLE_STAT_MODES                                 -902\n/**\nThe state dimension was not >=1.\n*/\n#define TSK_ERR_BAD_STATE_DIMS                                      -903\n/**\nThe result dimension was not >=1.\n*/\n#define TSK_ERR_BAD_RESULT_DIMS                                     -904\n/**\nInsufficient sample sets were provided.\n*/\n#define TSK_ERR_INSUFFICIENT_SAMPLE_SETS                            -905\n/**\nInsufficient sample set index tuples were provided.\n*/\n#define TSK_ERR_INSUFFICIENT_INDEX_TUPLES                           -906\n/**\nThe sample set index was out of bounds.\n*/\n#define TSK_ERR_BAD_SAMPLE_SET_INDEX                                -907\n/**\nThe sample set index was empty.\n*/\n#define TSK_ERR_EMPTY_SAMPLE_SET                                    -908\n/**\nA stat mode was attempted that is not supported by the operation.\n*/\n#define TSK_ERR_UNSUPPORTED_STAT_MODE                               -909\n/**\nStatistics based on branch lengths were attempted when the ``time_units``\nwere ``uncalibrated``.\n*/\n#define TSK_ERR_TIME_UNCALIBRATED                                   -910\n/**\nThe TSK_STAT_POLARISED option was passed to a statistic that does\nnot support it.\n*/\n#define TSK_ERR_STAT_POLARISED_UNSUPPORTED                          -911\n/**\nThe TSK_STAT_SPAN_NORMALISE option was passed to a statistic that does\nnot support it.\n*/\n#define TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED                     -912\n/**\nInsufficient weights were provided.\n*/\n#define TSK_ERR_INSUFFICIENT_WEIGHTS                                -913\n/**\nThe node bin map contains a value less than TSK_NULL.\n*/\n#define TSK_ERR_BAD_NODE_BIN_MAP                                    -914\n/**\nMaximum index in node bin map is greater than output dimension.\n*/\n#define TSK_ERR_BAD_NODE_BIN_MAP_DIM                                -915\n/**\nThe vector of quantiles is out of bounds or in nonascending order.\n*/\n#define TSK_ERR_BAD_QUANTILES                                       -916\n/**\nTimes are not in ascending order\n*/\n#define TSK_ERR_UNSORTED_TIMES                                      -917\n/*\nThe provided positions are not provided in strictly increasing order\n*/\n#define TSK_ERR_STAT_UNSORTED_POSITIONS                             -918\n/**\nThe provided positions are not unique\n*/\n#define TSK_ERR_STAT_DUPLICATE_POSITIONS                            -919\n/**\nThe provided sites are not provided in strictly increasing position order\n*/\n#define TSK_ERR_STAT_UNSORTED_SITES                                 -920\n/**\nThe provided sites are not unique\n*/\n#define TSK_ERR_STAT_DUPLICATE_SITES                                -921\n/**\nThe number of time windows is zero\n*/\n#define TSK_ERR_BAD_TIME_WINDOWS_DIM                                -922\n/**\nSample times do not all equal the start of first time window\n*/\n#define TSK_ERR_BAD_SAMPLE_PAIR_TIMES                               -923\n/**\nTime windows are not strictly increasing\n*/\n#define TSK_ERR_BAD_TIME_WINDOWS                                    -924\n/**\nTime windows do not end at infinity\n*/\n#define TSK_ERR_BAD_TIME_WINDOWS_END                                -925\n/**\nNode time does not fall within assigned time window\n*/\n#define TSK_ERR_BAD_NODE_TIME_WINDOW                                -926\n/** @} */\n\n/**\n@defgroup MAPPING_ERROR_GROUP Mutation mapping errors.\n@{\n*/\n/**\nOnly missing genotypes were specified, at least one non-missing is\nrequired.\n*/\n#define TSK_ERR_GENOTYPES_ALL_MISSING                              -1000\n/**\nA genotype value was greater than the maximum allowed (64) or less\nthan TSK_MISSING_DATA (-1).\n*/\n#define TSK_ERR_BAD_GENOTYPE                                       -1001\n/**\nA ancestral genotype value was greater than the maximum allowed (64) or less\nthan 0.\n*/\n#define TSK_ERR_BAD_ANCESTRAL_STATE                                -1002\n/** @} */\n\n/**\n@defgroup GENOTYPE_ERROR_GROUP Genotype decoding errors.\n@{\n*/\n/**\nGenotypes were requested for non-samples at the same time\nas asking that isolated nodes be marked as missing. This is not\nsupported.\n*/\n#define TSK_ERR_MUST_IMPUTE_NON_SAMPLES                            -1100\n/**\nA user-specified allele map was used, but didn't contain an allele\nfound in the tree sequence.\n*/\n#define TSK_ERR_ALLELE_NOT_FOUND                                   -1101\n/**\nMore than 2147483647 alleles were specified.\n*/\n#define TSK_ERR_TOO_MANY_ALLELES                                   -1102\n/**\nA user-specified allele map was used, but it contained zero alleles.\n*/\n#define TSK_ERR_ZERO_ALLELES                                       -1103\n/**\nAn allele used when decoding alignments had length other than one.\n*/\n#define TSK_ERR_BAD_ALLELE_LENGTH                                  -1104\n/**\nAn allele used when decoding alignments matched the missing data character.\n*/\n#define TSK_ERR_MISSING_CHAR_COLLISION                             -1105\n/** @} */\n\n/**\n@defgroup DISTANCE_ERROR_GROUP Distance metric errors.\n@{\n*/\n/**\nTrees with different numbers of samples were specified.\n*/\n#define TSK_ERR_SAMPLE_SIZE_MISMATCH                               -1200\n/**\nTrees with nonidentical samples were specified.\n*/\n#define TSK_ERR_SAMPLES_NOT_EQUAL                                  -1201\n/**\nA tree with multiple roots was specified.\n*/\n#define TSK_ERR_MULTIPLE_ROOTS                                     -1202\n/**\nA tree with unary nodes was specified.\n*/\n#define TSK_ERR_UNARY_NODES                                        -1203\n/**\nTrees were specifed that had unequal sequence lengths.\n*/\n#define TSK_ERR_SEQUENCE_LENGTH_MISMATCH                           -1204\n/**\nA tree was specifed that did not have the sample lists option\nenabled (TSK_SAMPLE_LISTS).\n*/\n#define TSK_ERR_NO_SAMPLE_LISTS                                    -1205\n/** @} */\n\n/**\n@defgroup HAPLOTYPE_ERROR_GROUP Haplotype matching errors.\n@{\n*/\n/**\nThe Viterbi matrix has not filled (it has zero transitions).\n*/\n#define TSK_ERR_NULL_VITERBI_MATRIX                                -1300\n/**\nThere was no matching haplotype.\n*/\n#define TSK_ERR_MATCH_IMPOSSIBLE                                   -1301\n/**\nThe compressed matrix has a node that has no samples in it's descendants.\n*/\n#define TSK_ERR_BAD_COMPRESSED_MATRIX_NODE                         -1302\n/**\nThere are too many values to compress.\n*/\n#define TSK_ERR_TOO_MANY_VALUES                                    -1303\n/** @} */\n\n/**\n@defgroup UNION_ERROR_GROUP Union errors.\n@{\n*/\n/**\nA node map was specified that contained a node not present in the\nspecified table collection.\n*/\n#define TSK_ERR_UNION_BAD_MAP                                      -1400\n/**\nThe shared portions of the specified tree sequences are not equal.\nNote that this may be the case if the table collections were not\nfully sorted before union was called.\n*/\n#define TSK_ERR_UNION_DIFF_HISTORIES                               -1401\n/** @} */\n\n/**\n@defgroup IBD_ERROR_GROUP IBD errors.\n@{\n*/\n/**\nBoth nodes in a sample pair are the same node.\n*/\n#define TSK_ERR_SAME_NODES_IN_PAIR                                 -1500\n/**\nPer-pair statistics were requested without TSK_IBD_STORE_PAIRS being\nspecified.\n*/\n#define TSK_ERR_IBD_PAIRS_NOT_STORED                               -1501\n/**\nSegments were requested without TSK_IBD_STORE_SEGMENTS being specified.\n*/\n#define TSK_ERR_IBD_SEGMENTS_NOT_STORED                            -1502\n/** @} */\n\n/**\n@defgroup SIMPLIFY_ERROR_GROUP Simplify errors.\n@{\n*/\n/**\nBoth TSK_SIMPLIFY_KEEP_UNARY and TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS\nwere specified. Only one can be used.\n*/\n#define TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE                      -1600\n/** @} */\n\n/**\n@defgroup INDIVIDUAL_ERROR_GROUP Individual errors.\n@{\n*/\n/**\nIndividuals were provided in an order where parents were after their\nchildren.\n*/\n#define TSK_ERR_UNSORTED_INDIVIDUALS                               -1700\n/**\nAn individual was its own parent.\n*/\n#define TSK_ERR_INDIVIDUAL_SELF_PARENT                             -1701\n/**\nAn individual was its own ancestor in a cycle of references.\n*/\n#define TSK_ERR_INDIVIDUAL_PARENT_CYCLE                            -1702\n/**\nAn individual had nodes from more than one population\n(and only one was requested).\n*/\n#define TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH                     -1703\n/**\nAn individual had nodes from more than one time\n(and only one was requested).\n*/\n#define TSK_ERR_INDIVIDUAL_TIME_MISMATCH                           -1704\n/** @} */\n\n/**\n@defgroup EXTEND_EDGES_ERROR_GROUP Extend edges errors.\n@{\n*/\n/**\nMaximum iteration number (max_iter) must be positive.\n*/\n#define TSK_ERR_EXTEND_EDGES_BAD_MAXITER                          -1800\n/** @} */\n// clang-format on\n\n/* This bit is 0 for any errors originating from kastore */\n#define TSK_KAS_ERR_BIT 14\n\nint tsk_set_kas_error(int err);\nbool tsk_is_kas_error(int err);\nint tsk_get_kas_error(int err);\n\n/**\n@brief Return a description of the specified error.\n\nThe memory for the returned string is handled by the library and should\nnot be freed by client code.\n\n@param err A tskit error code.\n@return A description of the error.\n*/\nconst char *tsk_strerror(int err);\n\n/* Redefine this macro in downstream builds if stdout is not the\n * approriate stream to emit debug information when the TSK_DEBUG\n * flag is passed to supporting functions (e.g. in R).\n */\n#define TSK_DEFAULT_DEBUG_STREAM stdout\n\n#ifdef TSK_TRACE_ERRORS\n\nstatic inline int\n_tsk_trace_error(int err, int line, const char *file)\n{\n    fprintf(stderr, \"tskit-trace-error: %d='%s' at line %d in %s\\n\", err,\n        tsk_strerror(err), line, file);\n    return err;\n}\n\n/*\nDeveloper note: this macro may be redefined as part of compilation for\nan R package, and should be treated as part of the documented API\n(no changes).\n*/\n#define tsk_trace_error(err) (_tsk_trace_error(err, __LINE__, __FILE__))\n#else\n#define tsk_trace_error(err) (err)\n#endif\n\n#ifndef TSK_BUG_ASSERT_MESSAGE\n#define TSK_BUG_ASSERT_MESSAGE                                                          \\\n    \"If you are using tskit directly please open an issue on\"                           \\\n    \" GitHub, ideally with a reproducible example.\"                                     \\\n    \" (https://github.com/tskit-dev/tskit/issues) If you are\"                           \\\n    \" using software that uses tskit, please report an issue\"                           \\\n    \" to that software's issue tracker, at least initially.\"\n#endif\n\n/**\nWe often wish to assert a condition that is unexpected, but using the normal `assert`\nmeans compiling without NDEBUG. This macro still asserts when NDEBUG is defined.\nIf you are using this macro in your own software then please set TSK_BUG_ASSERT_MESSAGE\nto point users to your issue tracker.\n*/\n/*\nDeveloper note: this macro may redefined as part of compilation for\nan R package, and should be treated as part of the documented API\n(no changes).\n*/\n#define tsk_bug_assert(condition)                                                       \\\n    do {                                                                                \\\n        if (!(condition)) {                                                             \\\n            fprintf(stderr, \"Bug detected in %s at line %d. %s\\n\", __FILE__, __LINE__,  \\\n                TSK_BUG_ASSERT_MESSAGE);                                                \\\n            abort();                                                                    \\\n        }                                                                               \\\n    } while (0)\n\nvoid __tsk_safe_free(void **ptr);\n#define tsk_safe_free(pointer) __tsk_safe_free((void **) &(pointer))\n\n#define TSK_MAX(a, b) ((a) > (b) ? (a) : (b))\n#define TSK_MIN(a, b) ((a) < (b) ? (a) : (b))\n\n/* This is a simple allocator that is optimised to efficiently allocate a\n * large number of small objects without large numbers of calls to malloc.\n * The allocator mallocs memory in chunks of a configurable size. When\n * responding to calls to get(), it will return a chunk of this memory.\n * This memory cannot be subsequently handed back to the allocator. However,\n * all memory allocated by the allocator can be returned at once by calling\n * reset.\n */\n\ntypedef struct {\n    size_t chunk_size; /* number of bytes per chunk */\n    size_t top;        /* the offset of the next available byte in the current chunk */\n    size_t current_chunk;   /* the index of the chunk currently being used */\n    size_t total_size;      /* the total number of bytes allocated + overhead. */\n    size_t total_allocated; /* the total number of bytes allocated. */\n    size_t num_chunks;      /* the number of memory chunks. */\n    char **mem_chunks;      /* the memory chunks */\n} tsk_blkalloc_t;\n\nextern void tsk_blkalloc_print_state(tsk_blkalloc_t *self, FILE *out);\nextern int tsk_blkalloc_reset(tsk_blkalloc_t *self);\nextern int tsk_blkalloc_init(tsk_blkalloc_t *self, size_t chunk_size);\nextern void *tsk_blkalloc_get(tsk_blkalloc_t *self, size_t size);\nextern void tsk_blkalloc_free(tsk_blkalloc_t *self);\n\ntypedef struct _tsk_avl_node_int_t {\n    int64_t key;\n    void *value;\n    struct _tsk_avl_node_int_t *llink;\n    struct _tsk_avl_node_int_t *rlink;\n    /* This can only contain -1, 0, 1. We could set it to a smaller type,\n     * but there's no point because of struct padding and alignment so\n     * it's simplest to keep it as a plain int. */\n    int balance;\n} tsk_avl_node_int_t;\n\ntypedef struct {\n    tsk_avl_node_int_t head;\n    tsk_size_t size;\n    tsk_size_t height;\n} tsk_avl_tree_int_t;\n\nint tsk_avl_tree_int_init(tsk_avl_tree_int_t *self);\nint tsk_avl_tree_int_free(tsk_avl_tree_int_t *self);\nvoid tsk_avl_tree_int_print_state(tsk_avl_tree_int_t *self, FILE *out);\nint tsk_avl_tree_int_insert(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node);\ntsk_avl_node_int_t *tsk_avl_tree_int_search(const tsk_avl_tree_int_t *self, int64_t key);\nint tsk_avl_tree_int_ordered_nodes(\n    const tsk_avl_tree_int_t *self, tsk_avl_node_int_t **out);\ntsk_avl_node_int_t *tsk_avl_tree_int_get_root(const tsk_avl_tree_int_t *self);\n\ntsk_size_t tsk_search_sorted(const double *array, tsk_size_t size, double value);\n\ndouble tsk_round(double x, unsigned int ndigits);\n\n/**\n@brief Check if a number is ``TSK_UNKNOWN_TIME``\n\n@rst\nUnknown time values in tskit are represented by a particular NaN value. Since NaN values\nare not equal to each other by definition, a simple comparison like\n``mutation.time == TSK_UNKNOWN_TIME`` will fail even if the mutation's time is\nTSK_UNKNOWN_TIME. This function compares the underlying bit representation of a double\nvalue and returns true iff it is equal to the specific NaN value\n:c:macro:`TSK_UNKNOWN_TIME`.\n@endrst\n\n@param val The number to check\n@return true if the number is ``TSK_UNKNOWN_TIME`` else false\n*/\nbool tsk_is_unknown_time(double val);\n\n/* We define local versions of isnan and isfinite to workaround some portability\n * issues. */\nbool tsk_isnan(double val);\nbool tsk_isfinite(double val);\n\n#define TSK_UUID_SIZE 36\nint tsk_generate_uuid(char *dest, int flags);\n\n/* TODO most of these can probably be macros so they compile out as no-ops.\n * Lets do the 64 bit tsk_size_t switch first though. */\nvoid *tsk_malloc(tsk_size_t size);\nvoid *tsk_realloc(void *ptr, tsk_size_t size);\nvoid *tsk_calloc(tsk_size_t n, size_t size);\nvoid *tsk_memset(void *ptr, int fill, tsk_size_t size);\nvoid *tsk_memcpy(void *dest, const void *src, tsk_size_t size);\nvoid *tsk_memmove(void *dest, const void *src, tsk_size_t size);\nint tsk_memcmp(const void *s1, const void *s2, tsk_size_t size);\n\n/* Developer debug utilities. These are **not** threadsafe */\nvoid tsk_set_debug_stream(FILE *f);\nFILE *tsk_get_debug_stream(void);\n\n/* Bit Array functionality */\n\n// define a 32-bit chunk size for our bitsets.\n// this means we'll be able to hold 32 distinct items in each 32 bit uint\n#define TSK_BITSET_BITS ((tsk_size_t) 32)\ntypedef uint32_t tsk_bitset_val_t;\n\ntypedef struct {\n    tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row\n    tsk_size_t len;     // Number of rows\n    tsk_bitset_val_t *data;\n} tsk_bitset_t;\n\nint tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);\nvoid tsk_bitset_free(tsk_bitset_t *self);\nvoid tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,\n    const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);\nvoid tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,\n    const tsk_bitset_t *other, tsk_size_t other_row);\nvoid tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,\n    tsk_size_t other_row);\nvoid tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);\nbool tsk_bitset_contains(\n    const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);\ntsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);\nvoid tsk_bitset_get_items(\n    const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "c/tskit/genotypes.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n * Copyright (c) 2016-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <string.h>\n#include <stdbool.h>\n#include <stdlib.h>\n#include <math.h>\n\n#include <tskit/genotypes.h>\n\n/* ======================================================== *\n * Variant generator\n * ======================================================== */\n\nvoid\ntsk_variant_print_state(const tsk_variant_t *self, FILE *out)\n{\n    tsk_size_t j;\n\n    fprintf(out, \"tsk_variant state\\n\");\n    fprintf(out, \"user_alleles = %lld\\n\", (long long) self->user_alleles);\n    fprintf(out, \"num_alleles = %lld\\n\", (long long) self->num_alleles);\n    for (j = 0; j < self->num_alleles; j++) {\n        fprintf(out, \"\\tlen = %lld, '%.*s'\\n\", (long long) self->allele_lengths[j],\n            (int) self->allele_lengths[j], self->alleles[j]);\n    }\n    fprintf(out, \"num_samples = %lld\\n\", (long long) self->num_samples);\n}\n\nvoid\ntsk_vargen_print_state(const tsk_vargen_t *self, FILE *out)\n{\n    tsk_variant_print_state(&self->variant, out);\n}\n\n/* Copy the fixed allele mapping specified by the user into local\n * memory. */\nstatic int\ntsk_variant_copy_alleles(tsk_variant_t *self, const char **alleles)\n{\n    int ret = 0;\n    tsk_size_t j;\n    size_t total_len, allele_len, offset;\n\n    self->num_alleles = self->max_alleles;\n\n    total_len = 0;\n    for (j = 0; j < self->num_alleles; j++) {\n        allele_len = strlen(alleles[j]);\n        self->allele_lengths[j] = (tsk_size_t) allele_len;\n        total_len += allele_len;\n    }\n    self->user_alleles_mem = tsk_malloc(total_len * sizeof(char *));\n    if (self->user_alleles_mem == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    offset = 0;\n    for (j = 0; j < self->num_alleles; j++) {\n        strcpy(self->user_alleles_mem + offset, alleles[j]);\n        self->alleles[j] = self->user_alleles_mem + offset;\n        offset += (size_t) self->allele_lengths[j];\n    }\nout:\n    return ret;\n}\n\nstatic int\nvariant_init_samples_and_index_map(tsk_variant_t *self,\n    const tsk_treeseq_t *tree_sequence, const tsk_id_t *samples, tsk_size_t num_samples,\n    size_t num_samples_alloc, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_size_t j, num_nodes;\n    tsk_id_t u;\n\n    num_nodes = tsk_treeseq_get_num_nodes(tree_sequence);\n    self->alt_samples = tsk_malloc(num_samples_alloc * sizeof(*samples));\n    self->alt_sample_index_map\n        = tsk_malloc(num_nodes * sizeof(*self->alt_sample_index_map));\n    if (self->alt_samples == NULL || self->alt_sample_index_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memcpy(self->alt_samples, samples, num_samples * sizeof(*samples));\n    tsk_memset(self->alt_sample_index_map, 0xff,\n        num_nodes * sizeof(*self->alt_sample_index_map));\n    /* Create the reverse mapping */\n    for (j = 0; j < num_samples; j++) {\n        u = samples[j];\n        if (u < 0 || u >= (tsk_id_t) num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (self->alt_sample_index_map[u] != TSK_NULL) {\n            ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n            goto out;\n        }\n        self->alt_sample_index_map[samples[j]] = (tsk_id_t) j;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence,\n    const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,\n    tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t max_alleles_limit, max_alleles;\n    tsk_size_t num_samples_alloc;\n\n    tsk_memset(self, 0, sizeof(tsk_variant_t));\n\n    /* Set site id to NULL to indicate the variant is not decoded */\n    self->site.id = TSK_NULL;\n\n    self->tree_sequence = tree_sequence;\n    ret = tsk_tree_init(\n        &self->tree, tree_sequence, samples == NULL ? TSK_SAMPLE_LISTS : 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (samples != NULL) {\n        /* Take a copy of the samples so we don't have to manage the lifecycle*/\n        self->samples = tsk_malloc(num_samples * sizeof(*samples));\n        if (self->samples == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        tsk_memcpy(self->samples, samples, num_samples * sizeof(*samples));\n        self->num_samples = num_samples;\n    }\n\n    self->options = options;\n\n    max_alleles_limit = INT32_MAX;\n\n    if (alleles == NULL) {\n        self->user_alleles = false;\n        max_alleles = 4; /* Arbitrary --- we'll rarely have more than this */\n    } else {\n        self->user_alleles = true;\n        /* Count the input alleles. The end is designated by the NULL sentinel. */\n        for (max_alleles = 0; alleles[max_alleles] != NULL; max_alleles++)\n            ;\n        if (max_alleles > max_alleles_limit) {\n            ret = tsk_trace_error(TSK_ERR_TOO_MANY_ALLELES);\n            goto out;\n        }\n        if (max_alleles == 0) {\n            ret = tsk_trace_error(TSK_ERR_ZERO_ALLELES);\n            goto out;\n        }\n    }\n    self->max_alleles = max_alleles;\n    self->alleles = tsk_calloc(max_alleles, sizeof(*self->alleles));\n    self->allele_lengths = tsk_malloc(max_alleles * sizeof(*self->allele_lengths));\n    if (self->alleles == NULL || self->allele_lengths == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (self->user_alleles) {\n        ret = tsk_variant_copy_alleles(self, alleles);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (self->samples == NULL) {\n        self->num_samples = tsk_treeseq_get_num_samples(tree_sequence);\n        self->samples = tsk_malloc(self->num_samples * sizeof(*self->samples));\n        if (self->samples == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        tsk_memcpy(self->samples, tsk_treeseq_get_samples(tree_sequence),\n            self->num_samples * sizeof(*self->samples));\n\n        self->sample_index_map = tsk_treeseq_get_sample_index_map(tree_sequence);\n        num_samples_alloc = self->num_samples;\n    } else {\n        num_samples_alloc = self->num_samples;\n        ret = variant_init_samples_and_index_map(self, tree_sequence, self->samples,\n            self->num_samples, (size_t) num_samples_alloc, self->options);\n        if (ret != 0) {\n            goto out;\n        }\n        self->sample_index_map = self->alt_sample_index_map;\n    }\n    /* When a list of samples is given, we use the traversal based algorithm\n     * which doesn't use sample list tracking in the tree */\n    if (self->alt_samples != NULL) {\n        self->traversal_stack = tsk_malloc(\n            tsk_treeseq_get_num_nodes(tree_sequence) * sizeof(*self->traversal_stack));\n        if (self->traversal_stack == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n\n    self->genotypes = tsk_malloc(num_samples_alloc * sizeof(*self->genotypes));\n    if (self->genotypes == NULL || self->alleles == NULL\n        || self->allele_lengths == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\nout:\n    return ret;\n}\n\nint\ntsk_vargen_init(tsk_vargen_t *self, const tsk_treeseq_t *tree_sequence,\n    const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,\n    tsk_flags_t options)\n{\n    int ret = 0;\n\n    tsk_bug_assert(tree_sequence != NULL);\n    tsk_memset(self, 0, sizeof(tsk_vargen_t));\n\n    self->tree_sequence = tree_sequence;\n    ret = tsk_variant_init(\n        &self->variant, tree_sequence, samples, num_samples, alleles, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nint\ntsk_variant_free(tsk_variant_t *self)\n{\n    if (self->tree_sequence != NULL) {\n        tsk_tree_free(&self->tree);\n    }\n    tsk_safe_free(self->genotypes);\n    tsk_safe_free(self->alleles);\n    tsk_safe_free(self->allele_lengths);\n    tsk_safe_free(self->user_alleles_mem);\n    tsk_safe_free(self->samples);\n    tsk_safe_free(self->alt_samples);\n    tsk_safe_free(self->alt_sample_index_map);\n    tsk_safe_free(self->traversal_stack);\n    return 0;\n}\n\nint\ntsk_vargen_free(tsk_vargen_t *self)\n{\n    tsk_variant_free(&self->variant);\n    return 0;\n}\n\nstatic int\ntsk_variant_expand_alleles(tsk_variant_t *self)\n{\n    int ret = 0;\n    void *p;\n    tsk_size_t hard_limit = INT32_MAX;\n\n    if (self->max_alleles == hard_limit) {\n        ret = tsk_trace_error(TSK_ERR_TOO_MANY_ALLELES);\n        goto out;\n    }\n    self->max_alleles = TSK_MIN(hard_limit, self->max_alleles * 2);\n    p = tsk_realloc(self->alleles, self->max_alleles * sizeof(*self->alleles));\n    if (p == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    self->alleles = p;\n    p = tsk_realloc(\n        self->allele_lengths, self->max_alleles * sizeof(*self->allele_lengths));\n    if (p == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    self->allele_lengths = p;\nout:\n    return ret;\n}\n\n/* The following pair of functions are identical except one handles 8 bit\n * genotypes and the other handles 16 bit genotypes. This is done for performance\n * reasons as this is a key function and for common alleles can entail\n * iterating over millions of samples. The compiler hints are included for the\n * same reason.\n */\nstatic int TSK_WARN_UNUSED\ntsk_variant_update_genotypes_sample_list(\n    tsk_variant_t *self, tsk_id_t node, tsk_id_t derived)\n{\n    int32_t *restrict genotypes = self->genotypes;\n    const tsk_id_t *restrict list_left = self->tree.left_sample;\n    const tsk_id_t *restrict list_right = self->tree.right_sample;\n    const tsk_id_t *restrict list_next = self->tree.next_sample;\n    tsk_id_t index, stop;\n    int ret = 0;\n\n    tsk_bug_assert(derived < INT32_MAX);\n\n    index = list_left[node];\n    if (index != TSK_NULL) {\n        stop = list_right[node];\n        while (true) {\n\n            ret += genotypes[index] == TSK_MISSING_DATA;\n            genotypes[index] = (int32_t) derived;\n            if (index == stop) {\n                break;\n            }\n            index = list_next[index];\n        }\n    }\n\n    return ret;\n}\n\n/* The following functions implement the genotype setting by traversing\n * down the tree to the samples. We're not so worried about performance here\n * because this should only be used when we have a very small number of samples,\n * and so we use a visit function to avoid duplicating code.\n */\n\ntypedef int (*visit_func_t)(tsk_variant_t *, tsk_id_t, tsk_id_t);\n\nstatic int TSK_WARN_UNUSED\ntsk_variant_traverse(\n    tsk_variant_t *self, tsk_id_t node, tsk_id_t derived, visit_func_t visit)\n{\n    int ret = 0;\n    tsk_id_t *restrict stack = self->traversal_stack;\n    const tsk_id_t *restrict left_child = self->tree.left_child;\n    const tsk_id_t *restrict right_sib = self->tree.right_sib;\n    const tsk_id_t *restrict sample_index_map = self->sample_index_map;\n    tsk_id_t u, v, sample_index;\n    int stack_top;\n    int no_longer_missing = 0;\n\n    stack_top = 0;\n    stack[0] = node;\n    while (stack_top >= 0) {\n        u = stack[stack_top];\n        sample_index = sample_index_map[u];\n        if (sample_index != TSK_NULL) {\n            ret = visit(self, sample_index, derived);\n            if (ret < 0) {\n                goto out;\n            }\n            no_longer_missing += ret;\n        }\n        stack_top--;\n        for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {\n            stack_top++;\n            stack[stack_top] = v;\n        }\n    }\n    ret = no_longer_missing;\nout:\n    return ret;\n}\n\nstatic int\ntsk_variant_visit(tsk_variant_t *self, tsk_id_t sample_index, tsk_id_t derived)\n{\n    int ret = 0;\n    int32_t *restrict genotypes = self->genotypes;\n\n    tsk_bug_assert(derived < INT32_MAX);\n    tsk_bug_assert(sample_index != -1);\n\n    ret = genotypes[sample_index] == TSK_MISSING_DATA;\n    genotypes[sample_index] = (int32_t) derived;\n\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_variant_update_genotypes_traversal(\n    tsk_variant_t *self, tsk_id_t node, tsk_id_t derived)\n{\n    return tsk_variant_traverse(self, node, derived, tsk_variant_visit);\n}\n\nstatic tsk_size_t\ntsk_variant_mark_missing(tsk_variant_t *self)\n{\n    tsk_size_t num_missing = 0;\n    const tsk_id_t *restrict left_child = self->tree.left_child;\n    const tsk_id_t *restrict right_sib = self->tree.right_sib;\n    const tsk_id_t *restrict sample_index_map = self->sample_index_map;\n    const tsk_id_t N = self->tree.virtual_root;\n    int32_t *restrict genotypes = self->genotypes;\n    tsk_id_t root, sample_index;\n\n    for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) {\n        if (left_child[root] == TSK_NULL) {\n            sample_index = sample_index_map[root];\n            if (sample_index != TSK_NULL) {\n                genotypes[sample_index] = TSK_MISSING_DATA;\n                num_missing++;\n            }\n        }\n    }\n    return num_missing;\n}\n\n/* Mark missing for any requested node (sample or non-sample) that is isolated\n * in the current tree, i.e., has no parent and no children at this position. */\nstatic tsk_size_t\ntsk_variant_mark_missing_any(tsk_variant_t *self)\n{\n    tsk_size_t num_missing = 0;\n    int32_t *restrict genotypes = self->genotypes;\n    const tsk_id_t *restrict parent = self->tree.parent;\n    const tsk_id_t *restrict left_child = self->tree.left_child;\n    tsk_size_t j;\n\n    for (j = 0; j < self->num_samples; j++) {\n        tsk_id_t u = self->samples[j];\n        if (parent[u] == TSK_NULL && left_child[u] == TSK_NULL) {\n            genotypes[j] = TSK_MISSING_DATA;\n            num_missing++;\n        }\n    }\n    return num_missing;\n}\n\nstatic tsk_id_t\ntsk_variant_get_allele_index(tsk_variant_t *self, const char *allele, tsk_size_t length)\n{\n    tsk_id_t ret = -1;\n    tsk_size_t j;\n\n    for (j = 0; j < self->num_alleles; j++) {\n        if (length == self->allele_lengths[j]\n            && tsk_memcmp(allele, self->alleles[j], length) == 0) {\n            ret = (tsk_id_t) j;\n            break;\n        }\n    }\n    return ret;\n}\n\nint\ntsk_variant_decode(\n    tsk_variant_t *self, tsk_id_t site_id, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t allele_index;\n    tsk_size_t j, num_missing;\n    int no_longer_missing;\n    tsk_mutation_t mutation;\n    bool impute_missing = !!(self->options & TSK_ISOLATED_NOT_MISSING);\n    bool by_traversal = self->alt_samples != NULL;\n    int (*update_genotypes)(tsk_variant_t *, tsk_id_t, tsk_id_t);\n    tsk_size_t (*mark_missing)(tsk_variant_t *);\n\n    if (self->tree_sequence == NULL) {\n        ret = tsk_trace_error(TSK_ERR_VARIANT_CANT_DECODE_COPY);\n        goto out;\n    }\n\n    ret = tsk_treeseq_get_site(self->tree_sequence, site_id, &self->site);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_tree_seek(&self->tree, self->site.position, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* When we have no specified samples we need sample lists to be active\n     * on the tree, as indicated by the presence of left_sample */\n    if (!by_traversal && self->tree.left_sample == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_SAMPLE_LISTS);\n        goto out;\n    }\n\n    /* For now we use a traversal method to find genotypes when we have a\n     * specified set of samples, but we should provide the option to do it\n     * via tracked_samples in the tree also. There will be a tradeoff: if\n     * we only have a small number of samples, it's probably better to\n     * do it by traversal. For large sets of samples though, it may be\n     * better to use the sample list infrastructure. */\n\n    mark_missing = tsk_variant_mark_missing;\n    update_genotypes = tsk_variant_update_genotypes_sample_list;\n    if (by_traversal) {\n        update_genotypes = tsk_variant_update_genotypes_traversal;\n        /* When decoding a user-provided list of nodes (which may include\n         * non-samples), mark isolated nodes as missing directly by checking\n         * isolation status for each requested node. */\n        mark_missing = tsk_variant_mark_missing_any;\n    }\n\n    if (self->user_alleles) {\n        allele_index = tsk_variant_get_allele_index(\n            self, self->site.ancestral_state, self->site.ancestral_state_length);\n        if (allele_index == -1) {\n            ret = tsk_trace_error(TSK_ERR_ALLELE_NOT_FOUND);\n            goto out;\n        }\n    } else {\n        /* Ancestral state is always allele 0 */\n        self->alleles[0] = self->site.ancestral_state;\n        self->allele_lengths[0] = self->site.ancestral_state_length;\n        self->num_alleles = 1;\n        allele_index = 0;\n    }\n\n    /* The algorithm for generating the allelic state of every sample works by\n     * examining each mutation in order, and setting the state for all the\n     * samples under the mutation's node. For complex sites where there is\n     * more than one mutation, we depend on the ordering of mutations being\n     * correct. Specifically, any mutation that is above another mutation in\n     * the tree must be visited first. This is enforced using the mutation.parent\n     * field, where we require that a mutation's parent must appear before it\n     * in the list of mutations. This guarantees the correctness of this algorithm.\n     */\n    for (j = 0; j < self->num_samples; j++) {\n        self->genotypes[j] = (int32_t) allele_index;\n    }\n\n    /* We mark missing data *before* updating the genotypes because\n     * mutations directly over samples should not be missing */\n    num_missing = 0;\n    if (!impute_missing) {\n        num_missing = mark_missing(self);\n    }\n    for (j = 0; j < self->site.mutations_length; j++) {\n        mutation = self->site.mutations[j];\n        /* Compute the allele index for this derived state value. */\n        allele_index = tsk_variant_get_allele_index(\n            self, mutation.derived_state, mutation.derived_state_length);\n        if (allele_index == -1) {\n            if (self->user_alleles) {\n                ret = tsk_trace_error(TSK_ERR_ALLELE_NOT_FOUND);\n                goto out;\n            }\n            if (self->num_alleles == self->max_alleles) {\n                ret = tsk_variant_expand_alleles(self);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n            allele_index = (tsk_id_t) self->num_alleles;\n            self->alleles[allele_index] = mutation.derived_state;\n            self->allele_lengths[allele_index] = mutation.derived_state_length;\n            self->num_alleles++;\n        }\n\n        no_longer_missing = update_genotypes(self, mutation.node, allele_index);\n        if (no_longer_missing < 0) {\n            ret = no_longer_missing;\n            goto out;\n        }\n        /* Update genotypes returns the number of missing values marked\n         * not-missing */\n        num_missing -= (tsk_size_t) no_longer_missing;\n    }\n    self->has_missing_data = num_missing > 0;\nout:\n    return ret;\n}\n\nint\ntsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)\n{\n    int ret = 0;\n    tsk_size_t total_len, offset, j;\n\n    /* Copy everything */\n    tsk_memcpy(other, self, sizeof(*other));\n    /* Tree sequence left as NULL and zero'd tree is a way of indicating this variant is\n     * fixed and cannot be further decoded. */\n    other->tree_sequence = NULL;\n    tsk_memset(&other->tree, sizeof(other->tree), 0);\n    other->traversal_stack = NULL;\n    other->samples = NULL;\n    other->sample_index_map = NULL;\n    other->alt_samples = NULL;\n    other->alt_sample_index_map = NULL;\n    other->user_alleles_mem = NULL;\n\n    total_len = 0;\n    for (j = 0; j < self->num_alleles; j++) {\n        total_len += self->allele_lengths[j];\n    }\n    other->samples = tsk_malloc(other->num_samples * sizeof(*other->samples));\n    other->genotypes = tsk_malloc(other->num_samples * sizeof(*other->genotypes));\n    other->user_alleles_mem = tsk_malloc(total_len * sizeof(*other->user_alleles_mem));\n    other->allele_lengths\n        = tsk_malloc(other->num_alleles * sizeof(*other->allele_lengths));\n    other->alleles = tsk_malloc(other->num_alleles * sizeof(*other->alleles));\n    if (other->samples == NULL || other->genotypes == NULL\n        || other->user_alleles_mem == NULL || other->allele_lengths == NULL\n        || other->alleles == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memcpy(\n        other->samples, self->samples, other->num_samples * sizeof(*other->samples));\n    tsk_memcpy(other->genotypes, self->genotypes,\n        other->num_samples * sizeof(*other->genotypes));\n    tsk_memcpy(other->allele_lengths, self->allele_lengths,\n        other->num_alleles * sizeof(*other->allele_lengths));\n    offset = 0;\n    for (j = 0; j < other->num_alleles; j++) {\n        tsk_memcpy(other->user_alleles_mem + offset, self->alleles[j],\n            other->allele_lengths[j] * sizeof(*other->user_alleles_mem));\n        other->alleles[j] = other->user_alleles_mem + offset;\n        offset += other->allele_lengths[j];\n    }\n\nout:\n    return ret;\n}\n\nint\ntsk_vargen_next(tsk_vargen_t *self, tsk_variant_t **variant)\n{\n    int ret = 0;\n\n    if ((tsk_size_t) self->site_index < tsk_treeseq_get_num_sites(self->tree_sequence)) {\n        ret = tsk_variant_decode(&self->variant, self->site_index, 0);\n        if (ret != 0) {\n            goto out;\n        }\n        self->site_index++;\n        *variant = &self->variant;\n        ret = 1;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_treeseq_decode_alignments_overlay_missing(const tsk_treeseq_t *self,\n    const tsk_id_t *nodes, tsk_size_t num_nodes, double left, double right,\n    char missing_data_character, tsk_size_t L, char *alignments_out)\n{\n    int ret = 0;\n    tsk_tree_t tree;\n    tsk_size_t i, seg_left, seg_right;\n    char *row = NULL;\n    tsk_id_t u;\n\n    tsk_memset(&tree, 0, sizeof(tree));\n\n    ret = tsk_tree_init(&tree, self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_seek(&tree, left, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    while (tree.index != -1 && tree.interval.left < right) {\n        seg_left = TSK_MAX((tsk_size_t) tree.interval.left, (tsk_size_t) left);\n        seg_right = TSK_MIN((tsk_size_t) tree.interval.right, (tsk_size_t) right);\n        if (seg_right > seg_left) {\n            for (i = 0; i < num_nodes; i++) {\n                u = nodes[i];\n                if (tree.parent[u] == TSK_NULL && tree.left_child[u] == TSK_NULL) {\n                    row = alignments_out + i * L;\n                    /* memset takes an `int`, `missing_data_character` is a `char` which\n                     * can be signed or unsigned depending on the platform, so we need to\n                     * cast. Some tools/compilers will warn if we just cast\n                     * to `unsigned char` and leave the cast to `int` as implicit, hence\n                     * the double cast. */\n                    tsk_memset(row + (seg_left - (tsk_size_t) left),\n                        (int) (unsigned char) missing_data_character,\n                        seg_right - seg_left);\n                }\n            }\n        }\n        ret = tsk_tree_next(&tree);\n        if (ret < 0) {\n            goto out;\n        }\n    }\n\n    /* On success we should return 0, not TSK_TREE_OK from the last tsk_tree_next */\n    ret = 0;\nout:\n    tsk_tree_free(&tree);\n    return ret;\n}\n\nstatic int\ntsk_treeseq_decode_alignments_overlay_sites(const tsk_treeseq_t *self,\n    const tsk_id_t *nodes, tsk_size_t num_nodes, double left, double right,\n    char missing_data_character, tsk_size_t L, char *alignments_out, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_variant_t var;\n    tsk_id_t site_id;\n    tsk_site_t site;\n    char *allele_byte = NULL;\n    tsk_size_t allele_cap = 0;\n    tsk_size_t i, j;\n    char *row = NULL;\n    int32_t g;\n    char c;\n    char *tmp = NULL;\n\n    tsk_memset(&var, 0, sizeof(var));\n\n    ret = tsk_variant_init(&var, self, nodes, num_nodes, NULL, options);\n    if (ret != 0) {\n        goto out;\n    }\n    for (site_id = 0; site_id < (tsk_id_t) tsk_treeseq_get_num_sites(self); site_id++) {\n        ret = tsk_treeseq_get_site(self, site_id, &site);\n        if (ret != 0) {\n            goto out;\n        }\n        if (site.position < left) {\n            continue;\n        }\n        if (site.position >= right) {\n            break;\n        }\n        ret = tsk_variant_decode(&var, site_id, 0);\n        if (ret != 0) {\n            goto out;\n        }\n        if (var.num_alleles > 0) {\n            if (var.num_alleles > allele_cap) {\n                tmp = tsk_realloc(allele_byte, var.num_alleles * sizeof(*allele_byte));\n                if (tmp == NULL) {\n                    ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                    goto out;\n                }\n                allele_byte = tmp;\n                allele_cap = var.num_alleles;\n            }\n            for (j = 0; j < var.num_alleles; j++) {\n                if (var.allele_lengths[j] != 1) {\n                    ret = tsk_trace_error(TSK_ERR_BAD_ALLELE_LENGTH);\n                    goto out;\n                }\n                allele_byte[j] = var.alleles[j][0];\n                if (allele_byte[j] == missing_data_character) {\n                    ret = tsk_trace_error(TSK_ERR_MISSING_CHAR_COLLISION);\n                    goto out;\n                }\n            }\n            for (i = 0; i < num_nodes; i++) {\n                row = alignments_out + i * L;\n                g = var.genotypes[i];\n                c = missing_data_character;\n                if (g != TSK_MISSING_DATA) {\n                    tsk_bug_assert(g >= 0);\n                    tsk_bug_assert((tsk_size_t) g < var.num_alleles);\n                    c = allele_byte[g];\n                }\n                row[((tsk_size_t) site.position) - (tsk_size_t) left] = (char) c;\n            }\n        }\n    }\n\nout:\n    tsk_safe_free(allele_byte);\n    tsk_variant_free(&var);\n    return ret;\n}\n\n/* NOTE: We usually keep functions with a tsk_treeseq_t signature in trees.c.\n * tsk_treeseq_decode_alignments is implemented here instead because it\n * depends directly on tsk_variant_t and the genotype/allele machinery in\n * this file (and thus on genotypes.h). This slightly breaks that layering\n * convention but keeps the implementation close to the variant code. */\nint\ntsk_treeseq_decode_alignments(const tsk_treeseq_t *self, const char *ref_seq,\n    tsk_size_t ref_seq_length, const tsk_id_t *nodes, tsk_size_t num_nodes, double left,\n    double right, char missing_data_character, char *alignments_out, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t i, L;\n    char *row = NULL;\n\n    if (!tsk_treeseq_get_discrete_genome(self)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (ref_seq == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (ref_seq_length != (tsk_size_t) tsk_treeseq_get_sequence_length(self)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (trunc(left) != left || trunc(right) != right) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (left < 0 || right > tsk_treeseq_get_sequence_length(self)\n        || (tsk_size_t) left >= (tsk_size_t) right) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    L = (tsk_size_t) right - (tsk_size_t) left;\n    if (num_nodes == 0) {\n        return 0;\n    }\n    if (nodes == NULL || alignments_out == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    for (i = 0; i < num_nodes; i++) {\n        if (nodes[i] < 0 || nodes[i] >= (tsk_id_t) tsk_treeseq_get_num_nodes(self)) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n    }\n\n    /* Fill rows with the reference slice */\n    for (i = 0; i < num_nodes; i++) {\n        row = alignments_out + i * L;\n        tsk_memcpy(row, ref_seq + (tsk_size_t) left, L);\n    }\n    if (!(options & TSK_ISOLATED_NOT_MISSING)) {\n        ret = tsk_treeseq_decode_alignments_overlay_missing(self, nodes, num_nodes, left,\n            right, missing_data_character, L, alignments_out);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_treeseq_decode_alignments_overlay_sites(self, nodes, num_nodes, left,\n        right, missing_data_character, L, alignments_out, options);\n    if (ret != 0) {\n        goto out;\n    }\n\nout:\n    return ret;\n}\n"
  },
  {
    "path": "c/tskit/genotypes.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2022 Tskit Developers\n * Copyright (c) 2016-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef TSK_GENOTYPES_H\n#define TSK_GENOTYPES_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <tskit/trees.h>\n\n#define TSK_ISOLATED_NOT_MISSING (1 << 1)\n\n/**\n@brief A variant at a specific site.\n\n@rst\nUsed to generate the genotypes for a given set of samples at a given\nsite.\n@endrst\n*/\ntypedef struct {\n    /** @brief Unowned reference to the tree sequence of the variant */\n    const tsk_treeseq_t *tree_sequence;\n    /** @brief The site this variant is currently decoded at*/\n    tsk_site_t site;\n    tsk_tree_t tree;\n    /** @brief Array of allele strings that the genotypes of the variant refer to\n     *  These are not NULL terminated - use `allele_lengths` for example:.\n     *  `printf(\"%.*s\", (int) var->allele_lengths[j], var->alleles[j]);`\n     */\n    const char **alleles;\n    /** @brief Lengths of the allele strings */\n    tsk_size_t *allele_lengths;\n    /** @brief Length of the allele array */\n    tsk_size_t num_alleles;\n    tsk_size_t max_alleles;\n    /** @brief If True the genotypes of isolated nodes have been decoded to the \"missing\"\n     * genotype. If False they are set to the ancestral state (in the absence of\n     * mutations above them)*/\n    bool has_missing_data;\n    /** @brief Array of genotypes for the current site */\n    int32_t *genotypes;\n    /** @brief Number of samples */\n    tsk_size_t num_samples;\n    /** @brief Array of sample ids used*/\n    tsk_id_t *samples;\n\n    const tsk_id_t *sample_index_map;\n    bool user_alleles;\n    char *user_alleles_mem;\n    tsk_id_t *traversal_stack;\n    tsk_flags_t options;\n    tsk_id_t *alt_samples;\n    tsk_id_t *alt_sample_index_map;\n\n} tsk_variant_t;\n\n/* All vargen related structs and methods were deprecated in C API v1.0 */\ntypedef struct {\n    const tsk_treeseq_t *tree_sequence;\n    tsk_id_t site_index;\n    tsk_variant_t variant;\n} tsk_vargen_t;\n\n/**\n@defgroup VARIANT_API_GROUP Variant API for obtaining genotypes.\n@{\n*/\n\n/**\n@brief Initialises the variant by allocating the internal memory\n\n@rst\nThis must be called before any operations are performed on the variant.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_variant_t object.\n@param tree_sequence A pointer to the tree sequence from which this variant\nwill decode genotypes. No copy is taken, so this tree sequence must persist\nfor the lifetime of the variant.\n@param samples Optional. Either `NULL` or an array of node ids of the samples that are to\nhave their genotypes decoded. A copy of this array will be taken by the variant. If\n`NULL` then the samples from the tree sequence will be used.\n@param num_samples The number of ids in the samples array, ignored if `samples` is `NULL`\n@param alleles Optional. Either ``NULL`` or an array of string alleles with a terminal\n``NULL`` sentinel value.\nIf specified, the genotypes will be decoded to match the index in this allele array.\nIf ``NULL`` then alleles will be automatically determined from the mutations encountered.\n@param options Variant options. Either ``0`` or ``TSK_ISOLATED_NOT_MISSING`` which\nif specified indicates that isolated sample nodes should not be decoded as the \"missing\"\nstate but as the ancestral state (or the state of any mutation above them).\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence,\n    const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,\n    tsk_flags_t options);\n\n/**\n@brief Copies the state of this variant to another variant\n\n@rst\nCopies the site, genotypes and alleles from this variant to another. Note that\nthe other variant should be uninitialised as this method does not free any\nmemory that the other variant owns. After copying `other` is frozen and\nthis restricts it from being further decoded at any site. `self` remains unchanged.\n@endrst\n\n@param self A pointer to an initialised and decoded tsk_variant_t object.\n@param other A pointer to an uninitialised tsk_variant_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other);\n\n/**\n@brief Decode the genotypes at the given site, storing them in this variant.\n\n@rst\nDecodes the genotypes for this variant's samples, indexed to this variant's alleles,\nat the specified site.\nThis method is most efficient at decoding sites in-order, either forwards or backwards\nalong the tree sequence. Resulting genotypes are stored in the ``genotypes`` member of\nthis variant.\n@endrst\n\n@param self A pointer to an initialised tsk_variant_t object.\n@param site_id A valid site id for the tree sequence of this variant.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of `tskit`.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_variant_decode(tsk_variant_t *self, tsk_id_t site_id, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified variant.\n\n@param self A pointer to an initialised tsk_variant_t object.\n@return Always returns 0.\n*/\nint tsk_variant_free(tsk_variant_t *self);\n\n/**\n@brief Print out the state of this variant to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_variant_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_variant_print_state(const tsk_variant_t *self, FILE *out);\n\n/** @} */\n\n/* Deprecated vargen methods (since C API v1.0) */\nint tsk_vargen_init(tsk_vargen_t *self, const tsk_treeseq_t *tree_sequence,\n    const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,\n    tsk_flags_t options);\nint tsk_vargen_next(tsk_vargen_t *self, tsk_variant_t **variant);\nint tsk_vargen_free(tsk_vargen_t *self);\nvoid tsk_vargen_print_state(const tsk_vargen_t *self, FILE *out);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "c/tskit/haplotype_matching.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <string.h>\n#include <stdlib.h>\n#include <math.h>\n#include <float.h>\n#include <assert.h>\n\n#include <tskit/haplotype_matching.h>\n\n#define MAX_PARSIMONY_WORDS 256\n\nconst char *_zero_one_alleles[] = { \"0\", \"1\", NULL };\nconst char *_acgt_alleles[] = { \"A\", \"C\", \"G\", \"T\", NULL };\n\nstatic int\ncmp_double(const void *a, const void *b)\n{\n    const double *ia = (const double *) a;\n    const double *ib = (const double *) b;\n    return (*ia > *ib) - (*ia < *ib);\n}\n\nstatic int\ncmp_argsort(const void *a, const void *b)\n{\n    const tsk_argsort_t *ia = (const tsk_argsort_t *) a;\n    const tsk_argsort_t *ib = (const tsk_argsort_t *) b;\n    int ret = (ia->value > ib->value) - (ia->value < ib->value);\n    /* Break any ties using the index to ensure consistency */\n    if (ret == 0) {\n        ret = (ia->index > ib->index) - (ia->index < ib->index);\n    }\n    return ret;\n}\n\nstatic void\ntsk_ls_hmm_check_state(tsk_ls_hmm_t *self)\n{\n    tsk_id_t *T_index = self->transition_index;\n    tsk_value_transition_t *T = self->transitions;\n    tsk_id_t j;\n\n    for (j = 0; j < (tsk_id_t) self->num_transitions; j++) {\n        if (T[j].tree_node != TSK_NULL) {\n            tsk_bug_assert(T_index[T[j].tree_node] == j);\n        }\n    }\n    /* tsk_bug_assert(self->num_transitions <= self->num_samples); */\n\n    if (self->num_transitions > 0) {\n        for (j = 0; j < (tsk_id_t) self->num_nodes; j++) {\n            if (T_index[j] != TSK_NULL) {\n                tsk_bug_assert(T[T_index[j]].tree_node == j);\n            }\n            tsk_bug_assert(self->tree.parent[j] == self->parent[j]);\n        }\n    }\n}\n\nvoid\ntsk_ls_hmm_print_state(tsk_ls_hmm_t *self, FILE *out)\n{\n    tsk_size_t j, l;\n\n    fprintf(out, \"tree_sequence   = %p\\n\", (void *) self->tree_sequence);\n    fprintf(out, \"num_sites       = %lld\\n\", (long long) self->num_sites);\n    fprintf(out, \"num_samples     = %lld\\n\", (long long) self->num_samples);\n    fprintf(out, \"num_values      = %lld\\n\", (long long) self->num_values);\n    fprintf(out, \"max_values      = %lld\\n\", (long long) self->max_values);\n    fprintf(out, \"num_optimal_value_set_words = %lld\\n\",\n        (long long) self->num_optimal_value_set_words);\n\n    fprintf(out, \"sites::\\n\");\n    for (l = 0; l < self->num_sites; l++) {\n        fprintf(out, \"%lld\\t%lld\\t[\", (long long) l, (long long) self->num_alleles[l]);\n        for (j = 0; j < self->num_alleles[l]; j++) {\n            fprintf(out, \"%s,\", self->alleles[l][j]);\n        }\n        fprintf(out, \"]\\n\");\n    }\n    fprintf(out, \"transitions::%lld\\n\", (long long) self->num_transitions);\n    for (j = 0; j < self->num_transitions; j++) {\n        fprintf(out, \"tree_node=%lld\\tvalue=%.14f\\tvalue_index=%lld\\n\",\n            (long long) self->transitions[j].tree_node, self->transitions[j].value,\n            (long long) self->transitions[j].value_index);\n    }\n    if (self->num_transitions > 0) {\n        fprintf(out, \"tree::%lld\\n\", (long long) self->num_nodes);\n        for (j = 0; j < self->num_nodes; j++) {\n            fprintf(out, \"%lld\\tparent=%lld\\ttransition=%lld\\n\", (long long) j,\n                (long long) self->parent[j], (long long) self->transition_index[j]);\n        }\n    }\n    tsk_ls_hmm_check_state(self);\n}\n\nint TSK_WARN_UNUSED\ntsk_ls_hmm_init(tsk_ls_hmm_t *self, tsk_treeseq_t *tree_sequence,\n    double *recombination_rate, double *mutation_rate, tsk_flags_t options)\n{\n    int ret = TSK_ERR_GENERIC;\n    tsk_size_t l;\n\n    tsk_memset(self, 0, sizeof(tsk_ls_hmm_t));\n    self->tree_sequence = tree_sequence;\n    self->precision = 6; /* Seems like a safe value, but probably not ideal for perf */\n    self->num_sites = tsk_treeseq_get_num_sites(tree_sequence);\n    self->num_samples = tsk_treeseq_get_num_samples(tree_sequence);\n    self->num_alleles = tsk_malloc(self->num_sites * sizeof(*self->num_alleles));\n    self->num_nodes = tsk_treeseq_get_num_nodes(tree_sequence);\n    self->parent = tsk_malloc(self->num_nodes * sizeof(*self->parent));\n    self->allelic_state = tsk_malloc(self->num_nodes * sizeof(*self->allelic_state));\n    self->transition_index\n        = tsk_malloc(self->num_nodes * sizeof(*self->transition_index));\n    self->transition_stack\n        = tsk_malloc(self->num_nodes * sizeof(*self->transition_stack));\n    /* We can't have more than 2 * num_samples transitions, so we use this as the\n     * upper bound. Because of the implementation, we'll also have to worry about\n     * the extra mutations at the first site, which in worst case involves all\n     * mutations. We can definitely save some memory here if we want to.*/\n    self->max_transitions\n        = 2 * self->num_samples + tsk_treeseq_get_num_mutations(tree_sequence);\n    /* FIXME Arbitrarily doubling this after hitting problems */\n    self->max_transitions *= 2;\n    self->transitions = tsk_malloc(self->max_transitions * sizeof(*self->transitions));\n    self->transitions_copy\n        = tsk_malloc(self->max_transitions * sizeof(*self->transitions));\n    self->num_transition_samples\n        = tsk_malloc(self->max_transitions * sizeof(*self->num_transition_samples));\n    self->transition_parent\n        = tsk_malloc(self->max_transitions * sizeof(*self->transition_parent));\n    self->transition_time_order\n        = tsk_malloc(self->max_transitions * sizeof(*self->transition_time_order));\n    self->values = tsk_malloc(self->max_transitions * sizeof(*self->values));\n    self->recombination_rate\n        = tsk_malloc(self->num_sites * sizeof(*self->recombination_rate));\n    self->mutation_rate = tsk_malloc(self->num_sites * sizeof(*self->mutation_rate));\n    self->alleles = tsk_calloc(self->num_sites, sizeof(*self->alleles));\n    if (self->num_alleles == NULL || self->parent == NULL || self->allelic_state == NULL\n        || self->transition_index == NULL || self->transition_stack == NULL\n        || self->transitions == NULL || self->transitions_copy == NULL\n        || self->num_transition_samples == NULL || self->transition_parent == NULL\n        || self->transition_time_order == NULL || self->values == NULL\n        || self->recombination_rate == NULL || self->mutation_rate == NULL\n        || self->alleles == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (l = 0; l < self->num_sites; l++) {\n        /* TODO check these inputs */\n        self->recombination_rate[l] = recombination_rate[l];\n        self->mutation_rate[l] = mutation_rate[l];\n        if (options & TSK_ALLELES_ACGT) {\n            self->num_alleles[l] = 4;\n            self->alleles[l] = _acgt_alleles;\n        } else {\n            /* Default to the 0/1 alleles */\n            self->num_alleles[l] = 2;\n            self->alleles[l] = _zero_one_alleles;\n        }\n    }\n    ret = tsk_tree_init(&self->tree, self->tree_sequence, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    self->num_values = 0;\n    self->max_values = 0;\n    /* Keep this as a struct variable so that we can test overflow, but this\n     * should never be set to more than MAX_PARSIMONY_WORDS as we're doing\n     * a bunch of stack allocations based on this. */\n    self->max_parsimony_words = MAX_PARSIMONY_WORDS;\n    ret = 0;\nout:\n    return ret;\n}\n\nint\ntsk_ls_hmm_set_precision(tsk_ls_hmm_t *self, unsigned int precision)\n{\n    self->precision = precision;\n    return 0;\n}\n\nint\ntsk_ls_hmm_free(tsk_ls_hmm_t *self)\n{\n    tsk_tree_free(&self->tree);\n    tsk_safe_free(self->recombination_rate);\n    tsk_safe_free(self->mutation_rate);\n    tsk_safe_free(self->recombination_rate);\n    tsk_safe_free(self->alleles);\n    tsk_safe_free(self->num_alleles);\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->allelic_state);\n    tsk_safe_free(self->transition_index);\n    tsk_safe_free(self->transition_stack);\n    tsk_safe_free(self->transitions);\n    tsk_safe_free(self->transitions_copy);\n    tsk_safe_free(self->transition_time_order);\n    tsk_safe_free(self->values);\n    tsk_safe_free(self->num_transition_samples);\n    tsk_safe_free(self->transition_parent);\n    tsk_safe_free(self->optimal_value_sets);\n    return 0;\n}\n\nstatic int\ntsk_ls_hmm_reset(tsk_ls_hmm_t *self, double value)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t u;\n    const tsk_id_t *samples;\n    tsk_size_t N = self->num_nodes;\n\n    tsk_memset(self->parent, 0xff, N * sizeof(*self->parent));\n    tsk_memset(self->transition_index, 0xff, N * sizeof(*self->transition_index));\n    tsk_memset(self->allelic_state, 0xff, N * sizeof(*self->allelic_state));\n    tsk_memset(self->transitions, 0, self->max_transitions * sizeof(*self->transitions));\n    tsk_memset(self->num_transition_samples, 0,\n        self->max_transitions * sizeof(*self->num_transition_samples));\n    tsk_memset(self->transition_parent, 0xff,\n        self->max_transitions * sizeof(*self->transition_parent));\n\n    samples = tsk_treeseq_get_samples(self->tree_sequence);\n    for (j = 0; j < self->num_samples; j++) {\n        u = samples[j];\n        self->transitions[j].tree_node = u;\n        self->transitions[j].value = value;\n        self->transition_index[u] = (tsk_id_t) j;\n    }\n    self->num_transitions = self->num_samples;\n    return ret;\n}\n\n/* After we have moved on to a new tree we can have transitions still associated\n * with the old roots, which are now disconnected. Remove. */\nstatic int\ntsk_ls_hmm_remove_dead_roots(tsk_ls_hmm_t *self)\n{\n    tsk_id_t *restrict T_index = self->transition_index;\n    tsk_value_transition_t *restrict T = self->transitions;\n    const tsk_id_t *restrict right_sib = self->tree.right_sib;\n    const tsk_id_t left_root = tsk_tree_get_left_root(&self->tree);\n    const tsk_id_t *restrict parent = self->parent;\n    tsk_id_t root, u;\n    tsk_size_t j;\n    const tsk_id_t root_marker = -2;\n\n    for (root = left_root; root != TSK_NULL; root = right_sib[root]) {\n        if (T_index[root] != TSK_NULL) {\n            /* Use the value_index slot as a marker. We don't use this between\n             * iterations, so it's safe to appropriate here */\n            T[T_index[root]].value_index = root_marker;\n        }\n    }\n    for (j = 0; j < self->num_transitions; j++) {\n        u = T[j].tree_node;\n        if (u != TSK_NULL) {\n            if (parent[u] == TSK_NULL && T[j].value_index != root_marker) {\n                T_index[u] = TSK_NULL;\n                T[j].tree_node = TSK_NULL;\n            }\n            T[j].value_index = -1;\n        }\n    }\n    return 0;\n}\n\nstatic int\ntsk_ls_hmm_update_tree(tsk_ls_hmm_t *self, int direction)\n{\n    int ret = 0;\n    tsk_id_t *restrict parent = self->parent;\n    tsk_id_t *restrict T_index = self->transition_index;\n    const tsk_id_t *restrict edges_child = self->tree_sequence->tables->edges.child;\n    const tsk_id_t *restrict edges_parent = self->tree_sequence->tables->edges.parent;\n    tsk_value_transition_t *restrict T = self->transitions;\n    tsk_id_t u, c, p, j, e;\n    tsk_value_transition_t *vt;\n    tsk_tree_position_t tree_pos;\n\n    tree_pos = self->tree.tree_pos;\n    for (j = tree_pos.out.start; j != tree_pos.out.stop; j += direction) {\n        e = tree_pos.out.order[j];\n        c = edges_child[e];\n        u = c;\n        if (T_index[u] == TSK_NULL) {\n            /* Ensure the subtree we're detaching has a transition at the root */\n            while (T_index[u] == TSK_NULL) {\n                u = parent[u];\n                tsk_bug_assert(u != TSK_NULL);\n            }\n            tsk_bug_assert(self->num_transitions < self->max_transitions);\n            T_index[c] = (tsk_id_t) self->num_transitions;\n            T[self->num_transitions].tree_node = c;\n            T[self->num_transitions].value = T[T_index[u]].value;\n            self->num_transitions++;\n        }\n        parent[c] = TSK_NULL;\n    }\n\n    for (j = tree_pos.in.start; j != tree_pos.in.stop; j += direction) {\n        e = tree_pos.in.order[j];\n        c = edges_child[e];\n        p = edges_parent[e];\n        parent[c] = p;\n        u = p;\n        if (parent[p] == TSK_NULL) {\n            /* Grafting onto a new root. */\n            if (T_index[p] == TSK_NULL) {\n                T_index[p] = (tsk_id_t) self->num_transitions;\n                tsk_bug_assert(self->num_transitions < self->max_transitions);\n                T[self->num_transitions].tree_node = p;\n                T[self->num_transitions].value = T[T_index[c]].value;\n                self->num_transitions++;\n            }\n        } else {\n            /* Grafting into an existing subtree. */\n            while (T_index[u] == TSK_NULL) {\n                u = parent[u];\n            }\n            tsk_bug_assert(u != TSK_NULL);\n        }\n        tsk_bug_assert(T_index[u] != -1 && T_index[c] != -1);\n        if (T[T_index[u]].value == T[T_index[c]].value) {\n            vt = &T[T_index[c]];\n            /* Mark the value transition as unusued */\n            vt->value = -1;\n            vt->tree_node = TSK_NULL;\n            T_index[c] = TSK_NULL;\n        }\n    }\n\n    ret = tsk_ls_hmm_remove_dead_roots(self);\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_get_allele_index(tsk_ls_hmm_t *self, tsk_id_t site, const char *allele_state,\n    const tsk_size_t allele_length)\n{\n    /* Note we're not doing tsk_trace_error here because it would require changing\n     * the logic of the function. Could be done easily enough, though */\n    int ret = TSK_ERR_ALLELE_NOT_FOUND;\n    const char **alleles = self->alleles[site];\n    const tsk_id_t num_alleles = (tsk_id_t) self->num_alleles[site];\n\n    tsk_id_t j;\n\n    for (j = 0; j < num_alleles; j++) {\n        if (strlen(alleles[j]) != allele_length) {\n            break;\n        }\n        if (strncmp(alleles[j], allele_state, (size_t) allele_length) == 0) {\n            ret = (int) j;\n            break;\n        }\n    }\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_update_probabilities(\n    tsk_ls_hmm_t *self, const tsk_site_t *site, int32_t haplotype_state)\n{\n    int ret = 0;\n    tsk_id_t root;\n    tsk_tree_t *tree = &self->tree;\n    tsk_id_t *restrict parent = self->parent;\n    tsk_id_t *restrict T_index = self->transition_index;\n    tsk_value_transition_t *restrict T = self->transitions;\n    int32_t *restrict allelic_state = self->allelic_state;\n    const tsk_id_t left_root = tsk_tree_get_left_root(tree);\n    tsk_mutation_t mut;\n    tsk_id_t j, u, v;\n    double x;\n    bool match;\n\n    /* Set the allelic states */\n    ret = tsk_ls_hmm_get_allele_index(\n        self, site->id, site->ancestral_state, site->ancestral_state_length);\n    if (ret < 0) {\n        goto out;\n    }\n    for (root = left_root; root != TSK_NULL; root = tree->right_sib[root]) {\n        allelic_state[root] = (int32_t) ret;\n    }\n\n    for (j = 0; j < (tsk_id_t) site->mutations_length; j++) {\n        mut = site->mutations[j];\n        ret = tsk_ls_hmm_get_allele_index(\n            self, site->id, mut.derived_state, mut.derived_state_length);\n        if (ret < 0) {\n            goto out;\n        }\n        u = mut.node;\n        allelic_state[u] = (int32_t) ret;\n        if (T_index[u] == TSK_NULL) {\n            while (T_index[u] == TSK_NULL) {\n                u = parent[u];\n            }\n            tsk_bug_assert(self->num_transitions < self->max_transitions);\n            T_index[mut.node] = (tsk_id_t) self->num_transitions;\n            T[self->num_transitions].tree_node = mut.node;\n            T[self->num_transitions].value = T[T_index[u]].value;\n            self->num_transitions++;\n        }\n    }\n\n    for (j = 0; j < (tsk_id_t) self->num_transitions; j++) {\n        u = T[j].tree_node;\n        if (u != TSK_NULL) {\n            /* Get the allelic_state at u. */\n            v = u;\n            while (allelic_state[v] == TSK_MISSING_DATA) {\n                v = parent[v];\n                tsk_bug_assert(v != -1);\n            }\n            match = haplotype_state == TSK_MISSING_DATA\n                    || haplotype_state == allelic_state[v];\n            ret = self->next_probability(self, site->id, T[j].value, match, u, &x);\n            if (ret != 0) {\n                goto out;\n            }\n            T[j].value = x;\n        }\n    }\n\n    /* Unset the allelic states */\n    for (root = left_root; root != TSK_NULL; root = tree->right_sib[root]) {\n        allelic_state[root] = TSK_MISSING_DATA;\n    }\n    for (j = 0; j < (tsk_id_t) site->mutations_length; j++) {\n        mut = site->mutations[j];\n        allelic_state[mut.node] = TSK_MISSING_DATA;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_discretise_values(tsk_ls_hmm_t *self)\n{\n    int ret = 0;\n    tsk_value_transition_t *T = self->transitions;\n    double *values = self->values;\n    tsk_size_t j, k, num_values;\n\n    num_values = 0;\n    for (j = 0; j < self->num_transitions; j++) {\n        if (T[j].tree_node != TSK_NULL) {\n            values[num_values] = T[j].value;\n            num_values++;\n        }\n    }\n    tsk_bug_assert(num_values > 0);\n\n    qsort(values, (size_t) num_values, sizeof(double), cmp_double);\n\n    k = 0;\n    for (j = 1; j < num_values; j++) {\n        if (values[j] != values[k]) {\n            k++;\n            values[k] = values[j];\n        }\n    }\n    num_values = k + 1;\n    self->num_values = num_values;\n    for (j = 0; j < self->num_transitions; j++) {\n        if (T[j].tree_node != TSK_NULL) {\n            T[j].value_index\n                = (tsk_id_t) tsk_search_sorted(values, num_values, T[j].value);\n            tsk_bug_assert(T[j].value == self->values[T[j].value_index]);\n        }\n    }\n    return ret;\n}\n\n/*\n * TODO We also have these function in tree.c where they're used in the\n * parsimony calculations (which are slightly different). It would be good to bring\n * these together, or at least avoid having the same function in two\n * files. Keeping it as it is for now so that it can be inlined, since\n * it's perf-sensitive. */\n\nstatic inline tsk_id_t\nget_smallest_set_bit(uint64_t v)\n{\n    /* This is an inefficient implementation, there are several better\n     * approaches. On GCC we can use\n     * return (uint8_t) (__builtin_ffsll((long long) v) - 1);\n     */\n    uint64_t t = 1;\n    tsk_id_t r = 0;\n    assert(v != 0);\n\n    while ((v & t) == 0) {\n        t <<= 1;\n        r++;\n    }\n    return r;\n}\n\nstatic inline uint64_t\nset_bit(uint64_t value, uint8_t bit)\n{\n    return value | (1ULL << bit);\n}\n\nstatic inline bool\nbit_is_set(uint64_t value, uint8_t bit)\n{\n    return (value & (1ULL << bit)) != 0;\n}\n\nstatic inline tsk_id_t\nget_smallest_element(const uint64_t *restrict A, tsk_size_t u, tsk_size_t num_words)\n{\n    tsk_size_t base = u * num_words;\n    const uint64_t *restrict a = A + base;\n    tsk_id_t j = 0;\n\n    while (a[j] == 0) {\n        j++;\n        tsk_bug_assert(j < (tsk_id_t) num_words);\n    }\n    return j * 64 + get_smallest_set_bit(a[j]);\n}\n\n/* static variables are zero-initialised by default. */\nstatic const uint64_t zero_block[MAX_PARSIMONY_WORDS];\n\nstatic inline bool\nall_zero(const uint64_t *restrict A, tsk_id_t u, tsk_size_t num_words)\n{\n    if (num_words == 1) {\n        return A[u] == 0;\n    } else {\n        return tsk_memcmp(\n                   zero_block, A + (tsk_size_t) u * num_words, num_words * sizeof(*A))\n               == 0;\n    }\n}\n\nstatic inline bool\nelement_in(\n    const uint64_t *restrict A, tsk_id_t u, const tsk_id_t state, tsk_size_t num_words)\n{\n    tsk_size_t index = ((tsk_size_t) u) * num_words + (tsk_size_t) (state / 64);\n    return (A[index] & (1ULL << (state % 64))) != 0;\n}\n\nstatic inline void\nset_optimal_value(\n    uint64_t *restrict A, tsk_id_t u, const tsk_size_t num_words, tsk_id_t state)\n{\n    tsk_size_t index = ((tsk_size_t) u) * num_words + (tsk_size_t) (state / 64);\n    tsk_bug_assert(((tsk_size_t) state) / 64 < num_words);\n    A[index] |= 1ULL << (state % 64);\n}\n\n/* TODO the implementation here isn't particularly optimal and the way things\n * were organised was really driven by the old Fitch parsimony algorithm\n * (which only worked on binary trees. In particular, we should be working\n * word-by-word where possible rather than iterating by values like we do here.\n * Needs to be reworked when we're documenting/writing up this algorithm.\n */\n\nstatic void\ncompute_optimal_value_1(uint64_t *restrict A, const tsk_id_t *restrict left_child,\n    const tsk_id_t *restrict right_sib, const tsk_id_t u, const tsk_id_t parent_state,\n    const tsk_size_t num_values)\n{\n    tsk_id_t v;\n    uint64_t child;\n    tsk_size_t value_count[64], max_value_count;\n    uint8_t j;\n\n    assert(num_values < 64);\n\n    tsk_memset(value_count, 0, num_values * sizeof(*value_count));\n    for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {\n        child = A[v];\n        /* If the set for a given child is empty, then we know it inherits\n         * directly from the parent state and must be a singleton set. */\n        if (child == 0) {\n            child = 1ULL << parent_state;\n        }\n        for (j = 0; j < num_values; j++) {\n            value_count[j] += bit_is_set(child, j);\n        }\n    }\n    max_value_count = 0;\n    for (j = 0; j < num_values; j++) {\n        max_value_count = TSK_MAX(max_value_count, value_count[j]);\n    }\n    A[u] = 0;\n    for (j = 0; j < num_values; j++) {\n        if (value_count[j] == max_value_count) {\n            A[u] = set_bit(A[u], j);\n        }\n    }\n}\n\nstatic void\ncompute_optimal_value_general(uint64_t *restrict A, const tsk_id_t *restrict left_child,\n    const tsk_id_t *restrict right_sib, const tsk_id_t u, const tsk_id_t parent_state,\n    const tsk_size_t num_values, const tsk_size_t num_words)\n{\n    tsk_id_t v;\n    uint64_t child[MAX_PARSIMONY_WORDS];\n    uint64_t *Au;\n    tsk_size_t base, word, bit;\n    bool child_all_zero;\n    const tsk_id_t state_index = parent_state / 64;\n    const uint64_t state_word = 1ULL << (parent_state % 64);\n    tsk_size_t value_count[64 * MAX_PARSIMONY_WORDS], max_value_count;\n    tsk_size_t j;\n\n    tsk_bug_assert(num_values < 64 * MAX_PARSIMONY_WORDS);\n    tsk_bug_assert(num_words <= MAX_PARSIMONY_WORDS);\n    for (j = 0; j < num_values; j++) {\n        value_count[j] = 0;\n    }\n\n    for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {\n        child_all_zero = true;\n        base = ((tsk_size_t) v) * num_words;\n        for (word = 0; word < num_words; word++) {\n            child[word] = A[base + word];\n            child_all_zero = child_all_zero && (child[word] == 0);\n        }\n        /* If the set for a given child is empty, then we know it inherits\n         * directly from the parent state and must be a singleton set. */\n        if (child_all_zero) {\n            child[state_index] = state_word;\n        }\n        for (j = 0; j < num_values; j++) {\n            word = j / 64;\n            bit = j % 64;\n            assert(word < num_words);\n            value_count[j] += bit_is_set(child[word], (uint8_t) bit);\n        }\n    }\n    max_value_count = 0;\n    for (j = 0; j < num_values; j++) {\n        max_value_count = TSK_MAX(max_value_count, value_count[j]);\n    }\n\n    Au = A + ((size_t) u * num_words);\n    for (word = 0; word < num_words; word++) {\n        Au[word] = 0;\n    }\n    for (j = 0; j < num_values; j++) {\n        if (value_count[j] == max_value_count) {\n            word = j / 64;\n            bit = j % 64;\n            Au[word] = set_bit(Au[word], (uint8_t) bit);\n        }\n    }\n}\n\nstatic void\ncompute_optimal_value(uint64_t *restrict A, const tsk_id_t *restrict left_child,\n    const tsk_id_t *restrict right_sib, const tsk_id_t u, const tsk_id_t parent_state,\n    const tsk_size_t num_values, const tsk_size_t num_words)\n{\n    if (num_words == 1) {\n        compute_optimal_value_1(A, left_child, right_sib, u, parent_state, num_values);\n    } else {\n        compute_optimal_value_general(\n            A, left_child, right_sib, u, parent_state, num_values, num_words);\n    }\n}\n\nstatic int\ntsk_ls_hmm_setup_optimal_value_sets(tsk_ls_hmm_t *self)\n{\n    int ret = 0;\n\n    /* We expect that most of the time there will be one word per optimal_value set,\n     * but there will be times when we need more than one word. This approach\n     * lets us expand the memory if we need to, but when the number of\n     * values goes back below 64 we revert to using one word per set. We\n     * could in principle release back the memory as well, but it doesn't seem\n     * worth the bother. */\n    self->num_optimal_value_set_words = (self->num_values / 64) + 1;\n    if (self->num_optimal_value_set_words > self->max_parsimony_words) {\n        ret = tsk_trace_error(TSK_ERR_TOO_MANY_VALUES);\n        goto out;\n    }\n    if (self->num_values >= self->max_values) {\n        self->max_values = self->num_optimal_value_set_words * 64;\n        tsk_safe_free(self->optimal_value_sets);\n        self->optimal_value_sets\n            = tsk_calloc(self->num_nodes * self->num_optimal_value_set_words,\n                sizeof(*self->optimal_value_sets));\n        if (self->optimal_value_sets == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_build_optimal_value_sets(tsk_ls_hmm_t *self)\n{\n    int ret = 0;\n    const double *restrict node_time = self->tree_sequence->tables->nodes.time;\n    const tsk_id_t *restrict left_child = self->tree.left_child;\n    const tsk_id_t *restrict right_sib = self->tree.right_sib;\n    const tsk_id_t *restrict parent = self->parent;\n    const tsk_value_transition_t *restrict T = self->transitions;\n    const tsk_id_t *restrict T_index = self->transition_index;\n    tsk_argsort_t *restrict order = self->transition_time_order;\n    const tsk_size_t num_optimal_value_set_words = self->num_optimal_value_set_words;\n    uint64_t *restrict A = self->optimal_value_sets;\n    tsk_size_t j;\n    tsk_id_t u, v, state, parent_state;\n\n    /* argsort the transitions by node time so we can visit them in the\n     * correct order */\n    for (j = 0; j < self->num_transitions; j++) {\n        order[j].index = j;\n        order[j].value = DBL_MAX;\n        if (T[j].tree_node != TSK_NULL) {\n            order[j].value = node_time[T[j].tree_node];\n        }\n    }\n    qsort(order, (size_t) self->num_transitions, sizeof(*order), cmp_argsort);\n\n    for (j = 0; j < self->num_transitions; j++) {\n        u = T[order[j].index].tree_node;\n        if (u != TSK_NULL) {\n            state = T[order[j].index].value_index;\n            if (left_child[u] == TSK_NULL) {\n                /* leaf node */\n                set_optimal_value(A, u, num_optimal_value_set_words, state);\n            } else {\n                compute_optimal_value(A, left_child, right_sib, u, state,\n                    self->num_values, num_optimal_value_set_words);\n            }\n            v = parent[u];\n            if (v != TSK_NULL) {\n                while (T_index[v] == TSK_NULL) {\n                    v = parent[v];\n                    tsk_bug_assert(v != TSK_NULL);\n                }\n                parent_state = T[T_index[v]].value_index;\n                v = parent[u];\n                while (T_index[v] == TSK_NULL) {\n                    compute_optimal_value(A, left_child, right_sib, v, parent_state,\n                        self->num_values, num_optimal_value_set_words);\n                    v = parent[v];\n                    tsk_bug_assert(v != TSK_NULL);\n                }\n            }\n        }\n    }\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_redistribute_transitions(tsk_ls_hmm_t *self)\n{\n    int ret = 0;\n    const tsk_id_t *restrict left_child = self->tree.left_child;\n    const tsk_id_t *restrict right_sib = self->tree.right_sib;\n    const tsk_id_t *restrict parent = self->parent;\n    tsk_id_t *restrict T_index = self->transition_index;\n    tsk_id_t *restrict T_parent = self->transition_parent;\n    tsk_value_transition_t *restrict T = self->transitions;\n    tsk_value_transition_t *restrict T_old = self->transitions_copy;\n    tsk_transition_stack_t *stack = self->transition_stack;\n    uint64_t *restrict A = self->optimal_value_sets;\n    const tsk_size_t num_optimal_value_set_words = self->num_optimal_value_set_words;\n    tsk_transition_stack_t s, child_s;\n    tsk_id_t root, u, v;\n    int stack_top = 0;\n    tsk_size_t j, old_num_transitions;\n\n    tsk_memcpy(T_old, T, self->num_transitions * sizeof(*T));\n    old_num_transitions = self->num_transitions;\n    self->num_transitions = 0;\n\n    /* TODO refactor this to push the virtual root onto the stack rather then\n     * iterating over the roots. See the existing parsimony implementations\n     * for an example. */\n    for (root = tsk_tree_get_left_root(&self->tree); root != TSK_NULL;\n        root = right_sib[root]) {\n        stack[0].tree_node = root;\n        stack[0].old_state = T_old[T_index[root]].value_index;\n        stack[0].new_state\n            = get_smallest_element(A, (tsk_size_t) root, num_optimal_value_set_words);\n        stack[0].transition_parent = 0;\n        stack_top = 0;\n\n        tsk_bug_assert(self->num_transitions < self->max_transitions);\n        T_parent[self->num_transitions] = TSK_NULL;\n        T[self->num_transitions].tree_node = stack[0].tree_node;\n        T[self->num_transitions].value_index = stack[0].new_state;\n        self->num_transitions++;\n\n        while (stack_top >= 0) {\n            s = stack[stack_top];\n            stack_top--;\n            for (v = left_child[s.tree_node]; v != TSK_NULL; v = right_sib[v]) {\n                child_s = s;\n                child_s.tree_node = v;\n                if (T_index[v] != TSK_NULL) {\n                    child_s.old_state = T_old[T_index[v]].value_index;\n                }\n                if (!all_zero(A, v, num_optimal_value_set_words)) {\n                    if (!element_in(A, v, s.new_state, num_optimal_value_set_words)) {\n                        child_s.new_state = get_smallest_element(\n                            A, (tsk_size_t) v, num_optimal_value_set_words);\n                        child_s.transition_parent = (tsk_id_t) self->num_transitions;\n                        /* Add a new transition */\n                        tsk_bug_assert(self->num_transitions < self->max_transitions);\n                        T_parent[self->num_transitions] = s.transition_parent;\n                        T[self->num_transitions].tree_node = v;\n                        T[self->num_transitions].value_index = child_s.new_state;\n                        self->num_transitions++;\n                    }\n                    stack_top++;\n                    stack[stack_top] = child_s;\n                } else {\n                    /* Node that we didn't visit when moving up the tree */\n                    if (s.old_state != s.new_state) {\n                        tsk_bug_assert(self->num_transitions < self->max_transitions);\n                        T_parent[self->num_transitions] = s.transition_parent;\n                        T[self->num_transitions].tree_node = v;\n                        T[self->num_transitions].value_index = s.old_state;\n                        self->num_transitions++;\n                    }\n                }\n            }\n        }\n    }\n\n    /* Unset the old T_index pointers and optimal_value sets. */\n    for (j = 0; j < old_num_transitions; j++) {\n        u = T_old[j].tree_node;\n        if (u != TSK_NULL) {\n            T_index[u] = TSK_NULL;\n            while (u != TSK_NULL && !all_zero(A, u, num_optimal_value_set_words)) {\n                tsk_memset(A + ((tsk_size_t) u) * num_optimal_value_set_words, 0,\n                    num_optimal_value_set_words * sizeof(uint64_t));\n                u = parent[u];\n            }\n        }\n    }\n    /* Set the new pointers for transition nodes and the values.*/\n    for (j = 0; j < self->num_transitions; j++) {\n        T_index[T[j].tree_node] = (tsk_id_t) j;\n        T[j].value = self->values[T[j].value_index];\n    }\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_compress(tsk_ls_hmm_t *self)\n{\n    int ret = 0;\n\n    ret = tsk_ls_hmm_discretise_values(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ls_hmm_setup_optimal_value_sets(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ls_hmm_build_optimal_value_sets(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ls_hmm_redistribute_transitions(self);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_process_site_forward(\n    tsk_ls_hmm_t *self, const tsk_site_t *site, int32_t haplotype_state)\n{\n    int ret = 0;\n    double x, normalisation_factor;\n    tsk_compressed_matrix_t *output = (tsk_compressed_matrix_t *) self->output;\n    tsk_value_transition_t *restrict T = self->transitions;\n    const unsigned int precision = (unsigned int) self->precision;\n    tsk_size_t j;\n\n    ret = tsk_ls_hmm_update_probabilities(self, site, haplotype_state);\n    if (ret != 0) {\n        goto out;\n    }\n    /* See notes in the Python implementation on why we don't want to compress\n     * here, but rather should be doing it after rounding. */\n    ret = tsk_ls_hmm_compress(self);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_bug_assert(self->num_transitions <= self->num_samples);\n    normalisation_factor = self->compute_normalisation_factor(self);\n\n    if (normalisation_factor == 0) {\n        ret = tsk_trace_error(TSK_ERR_MATCH_IMPOSSIBLE);\n        goto out;\n    }\n    for (j = 0; j < self->num_transitions; j++) {\n        tsk_bug_assert(T[j].tree_node != TSK_NULL);\n        x = T[j].value / normalisation_factor;\n        T[j].value = tsk_round(x, precision);\n    }\n\n    ret = tsk_compressed_matrix_store_site(\n        output, site->id, normalisation_factor, (tsk_size_t) self->num_transitions, T);\nout:\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_run_forward(tsk_ls_hmm_t *self, int32_t *haplotype)\n{\n    int ret = 0;\n    int t_ret;\n    const tsk_site_t *sites;\n    tsk_size_t j, num_sites;\n    const double n = (double) self->num_samples;\n\n    ret = tsk_ls_hmm_reset(self, 1 / n);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (t_ret = tsk_tree_first(&self->tree); t_ret == TSK_TREE_OK;\n        t_ret = tsk_tree_next(&self->tree)) {\n        ret = tsk_ls_hmm_update_tree(self, TSK_DIR_FORWARD);\n        if (ret != 0) {\n            goto out;\n        }\n        /* tsk_ls_hmm_check_state(self); */\n        ret = tsk_tree_get_sites(&self->tree, &sites, &num_sites);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_sites; j++) {\n            ret = tsk_ls_hmm_process_site_forward(\n                self, &sites[j], haplotype[sites[j].id]);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\n    /* Set to zero so we can print and check the state OK. */\n    self->num_transitions = 0;\n    if (t_ret != 0) {\n        ret = t_ret;\n        goto out;\n    }\nout:\n    return ret;\n}\n\n/****************************************************************\n * Forward Algorithm\n ****************************************************************/\n\nstatic double\ntsk_ls_hmm_compute_normalisation_factor_forward(tsk_ls_hmm_t *self)\n{\n    tsk_size_t *restrict N = self->num_transition_samples;\n    tsk_value_transition_t *restrict T = self->transitions;\n    const tsk_id_t *restrict T_parent = self->transition_parent;\n    const tsk_size_t *restrict num_samples = self->tree.num_samples;\n    const tsk_id_t num_transitions = (tsk_id_t) self->num_transitions;\n    double normalisation_factor;\n    tsk_id_t j;\n\n    /* Compute the number of samples directly inheriting from each transition */\n    for (j = 0; j < num_transitions; j++) {\n        tsk_bug_assert(T[j].tree_node != TSK_NULL);\n        N[j] = num_samples[T[j].tree_node];\n    }\n    for (j = 0; j < num_transitions; j++) {\n        if (T_parent[j] != TSK_NULL) {\n            N[T_parent[j]] -= N[j];\n        }\n    }\n\n    /* Compute the normalising constant used to avoid underflow */\n    normalisation_factor = 0;\n    for (j = 0; j < num_transitions; j++) {\n        normalisation_factor += (double) N[j] * T[j].value;\n    }\n    return normalisation_factor;\n}\n\nstatic int\ntsk_ls_hmm_next_probability_forward(tsk_ls_hmm_t *self, tsk_id_t site_id, double p_last,\n    bool is_match, tsk_id_t TSK_UNUSED(node), double *result)\n{\n    const double rho = self->recombination_rate[site_id];\n    const double mu = self->mutation_rate[site_id];\n    const double n = (double) self->num_samples;\n    const double num_alleles = self->num_alleles[site_id];\n    double p_t, p_e;\n\n    p_t = p_last * (1 - rho) + rho / n;\n    p_e = mu;\n    if (is_match) {\n        p_e = 1 - (num_alleles - 1) * mu;\n    }\n    *result = p_t * p_e;\n    return 0;\n}\n\nint\ntsk_ls_hmm_forward(tsk_ls_hmm_t *self, int32_t *haplotype,\n    tsk_compressed_matrix_t *output, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_compressed_matrix_init(output, self->tree_sequence, 0, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        if (output->tree_sequence != self->tree_sequence) {\n            ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n            goto out;\n        }\n        ret = tsk_compressed_matrix_clear(output);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    self->next_probability = tsk_ls_hmm_next_probability_forward;\n    self->compute_normalisation_factor = tsk_ls_hmm_compute_normalisation_factor_forward;\n    self->output = output;\n\n    ret = tsk_ls_hmm_run_forward(self, haplotype);\nout:\n    return ret;\n}\n\n/****************************************************************\n * Backward Algorithm\n ****************************************************************/\n\nstatic int\ntsk_ls_hmm_next_probability_backward(tsk_ls_hmm_t *self, tsk_id_t site_id, double p_last,\n    bool is_match, tsk_id_t TSK_UNUSED(node), double *result)\n{\n    const double mu = self->mutation_rate[site_id];\n    const double num_alleles = self->num_alleles[site_id];\n    double p_e;\n\n    p_e = mu;\n    if (is_match) {\n        p_e = 1 - (num_alleles - 1) * mu;\n    }\n    *result = p_last * p_e;\n    return 0;\n}\n\nstatic int\ntsk_ls_hmm_process_site_backward(tsk_ls_hmm_t *self, const tsk_site_t *site,\n    const int32_t haplotype_state, const double normalisation_factor)\n{\n    int ret = 0;\n    double x, b_last_sum;\n    tsk_compressed_matrix_t *output = (tsk_compressed_matrix_t *) self->output;\n    tsk_value_transition_t *restrict T = self->transitions;\n    const unsigned int precision = (unsigned int) self->precision;\n    const double rho = self->recombination_rate[site->id];\n    const double n = (double) self->num_samples;\n    tsk_size_t j;\n\n    /* FIXME!!! We are calling compress twice here because we need to compress\n     * immediately before calling store_site in order to filter out -1 nodes,\n     * and also (crucially) to ensure that the value transitions are listed\n     * in preorder, which we rely on later for decoding.\n     *\n     * https://github.com/tskit-dev/tskit/issues/2803\n     */\n    ret = tsk_ls_hmm_compress(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_compressed_matrix_store_site(\n        output, site->id, normalisation_factor, (tsk_size_t) self->num_transitions, T);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_ls_hmm_update_probabilities(self, site, haplotype_state);\n    if (ret != 0) {\n        goto out;\n    }\n    /* DO WE NEED THIS compress?? See above */\n    ret = tsk_ls_hmm_compress(self);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_bug_assert(self->num_transitions <= self->num_samples);\n    b_last_sum = self->compute_normalisation_factor(self);\n    for (j = 0; j < self->num_transitions; j++) {\n        tsk_bug_assert(T[j].tree_node != TSK_NULL);\n        x = rho * b_last_sum / n + (1 - rho) * T[j].value;\n        x /= normalisation_factor;\n        T[j].value = tsk_round(x, precision);\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ls_hmm_run_backward(\n    tsk_ls_hmm_t *self, int32_t *haplotype, const double *forward_norm)\n{\n    int ret = 0;\n    int t_ret;\n    const tsk_site_t *sites;\n    double s;\n    tsk_size_t num_sites;\n    tsk_id_t j;\n\n    ret = tsk_ls_hmm_reset(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (t_ret = tsk_tree_last(&self->tree); t_ret == TSK_TREE_OK;\n        t_ret = tsk_tree_prev(&self->tree)) {\n        ret = tsk_ls_hmm_update_tree(self, TSK_DIR_REVERSE);\n        if (ret != 0) {\n            goto out;\n        }\n        /* tsk_ls_hmm_check_state(self); */\n        ret = tsk_tree_get_sites(&self->tree, &sites, &num_sites);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = (tsk_id_t) num_sites - 1; j >= 0; j--) {\n            s = forward_norm[sites[j].id];\n            if (s <= 0) {\n                /* NOTE: I'm not sure if this is the correct interpretation,\n                 * but norm values of 0 do lead to problems, and this seems\n                 * like a simple way of guarding against it. We do seem to\n                 * get norm values of 0 with impossible matches from the fwd\n                 * matrix.\n                 */\n                ret = tsk_trace_error(TSK_ERR_MATCH_IMPOSSIBLE);\n                goto out;\n            }\n            ret = tsk_ls_hmm_process_site_backward(\n                self, &sites[j], haplotype[sites[j].id], s);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\n    /* Set to zero so we can print and check the state OK. */\n    self->num_transitions = 0;\n    if (t_ret != 0) {\n        ret = t_ret;\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_ls_hmm_backward(tsk_ls_hmm_t *self, int32_t *haplotype, const double *forward_norm,\n    tsk_compressed_matrix_t *output, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_compressed_matrix_init(output, self->tree_sequence, 0, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        if (output->tree_sequence != self->tree_sequence) {\n            ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n            goto out;\n        }\n        ret = tsk_compressed_matrix_clear(output);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    self->next_probability = tsk_ls_hmm_next_probability_backward;\n    self->compute_normalisation_factor = tsk_ls_hmm_compute_normalisation_factor_forward;\n    self->output = output;\n\n    ret = tsk_ls_hmm_run_backward(self, haplotype, forward_norm);\nout:\n    return ret;\n}\n\n/****************************************************************\n * Viterbi Algorithm\n ****************************************************************/\n\nstatic double\ntsk_ls_hmm_compute_normalisation_factor_viterbi(tsk_ls_hmm_t *self)\n{\n    tsk_value_transition_t *restrict T = self->transitions;\n    const tsk_id_t num_transitions = (tsk_id_t) self->num_transitions;\n    tsk_value_transition_t max_vt;\n    tsk_id_t j;\n\n    max_vt.value = -1;\n    max_vt.tree_node = 0; /* keep compiler happy */\n    tsk_bug_assert(num_transitions > 0);\n    for (j = 0; j < num_transitions; j++) {\n        tsk_bug_assert(T[j].tree_node != TSK_NULL);\n        if (T[j].value > max_vt.value) {\n            max_vt = T[j];\n        }\n    }\n    return max_vt.value;\n}\n\nstatic int\ntsk_ls_hmm_next_probability_viterbi(tsk_ls_hmm_t *self, tsk_id_t site, double p_last,\n    bool is_match, tsk_id_t node, double *result)\n{\n    const double rho = self->recombination_rate[site];\n    const double mu = self->mutation_rate[site];\n    const double num_alleles = self->num_alleles[site];\n    const double n = (double) self->num_samples;\n    double p_recomb, p_no_recomb, p_t, p_e;\n    bool recombination_required = false;\n\n    p_no_recomb = p_last * (1 - rho + rho / n);\n    p_recomb = rho / n;\n    if (p_no_recomb > p_recomb) {\n        p_t = p_no_recomb;\n    } else {\n        p_t = p_recomb;\n        recombination_required = true;\n    }\n    p_e = mu;\n    if (is_match) {\n        p_e = 1 - (num_alleles - 1) * mu;\n    }\n    *result = p_t * p_e;\n    return tsk_viterbi_matrix_add_recombination_required(\n        self->output, site, node, recombination_required);\n}\n\nint\ntsk_ls_hmm_viterbi(tsk_ls_hmm_t *self, int32_t *haplotype, tsk_viterbi_matrix_t *output,\n    tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_viterbi_matrix_init(output, self->tree_sequence, 0, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        if (output->matrix.tree_sequence != self->tree_sequence) {\n            ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n            goto out;\n        }\n        ret = tsk_viterbi_matrix_clear(output);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    self->next_probability = tsk_ls_hmm_next_probability_viterbi;\n    self->compute_normalisation_factor = tsk_ls_hmm_compute_normalisation_factor_viterbi;\n    self->output = output;\n\n    ret = tsk_ls_hmm_run_forward(self, haplotype);\nout:\n    return ret;\n}\n\n/****************************************************************\n * Compressed matrix\n ****************************************************************/\n\nint\ntsk_compressed_matrix_init(tsk_compressed_matrix_t *self, tsk_treeseq_t *tree_sequence,\n    tsk_size_t block_size, tsk_flags_t options)\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(*self));\n    self->tree_sequence = tree_sequence;\n    self->options = options;\n    self->num_sites = tsk_treeseq_get_num_sites(tree_sequence);\n    self->num_samples = tsk_treeseq_get_num_samples(tree_sequence);\n    self->num_transitions = tsk_malloc(self->num_sites * sizeof(*self->num_transitions));\n    self->normalisation_factor\n        = tsk_malloc(self->num_sites * sizeof(*self->normalisation_factor));\n    self->values = tsk_malloc(self->num_sites * sizeof(*self->values));\n    self->nodes = tsk_malloc(self->num_sites * sizeof(*self->nodes));\n    if (self->num_transitions == NULL || self->values == NULL || self->nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (block_size == 0) {\n        block_size = 1 << 20;\n    }\n    ret = tsk_blkalloc_init(&self->memory, (size_t) block_size);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_compressed_matrix_clear(self);\nout:\n    return ret;\n}\n\nint\ntsk_compressed_matrix_free(tsk_compressed_matrix_t *self)\n{\n    tsk_blkalloc_free(&self->memory);\n    tsk_safe_free(self->num_transitions);\n    tsk_safe_free(self->normalisation_factor);\n    tsk_safe_free(self->values);\n    tsk_safe_free(self->nodes);\n    return 0;\n}\n\nint\ntsk_compressed_matrix_clear(tsk_compressed_matrix_t *self)\n{\n    tsk_blkalloc_reset(&self->memory);\n    tsk_memset(\n        self->num_transitions, 0, self->num_sites * sizeof(*self->num_transitions));\n    tsk_memset(self->normalisation_factor, 0,\n        self->num_sites * sizeof(*self->normalisation_factor));\n    return 0;\n}\n\nvoid\ntsk_compressed_matrix_print_state(tsk_compressed_matrix_t *self, FILE *out)\n{\n    tsk_size_t l, j;\n\n    fprintf(out, \"Compressed matrix for %p\\n\", (void *) self->tree_sequence);\n    fprintf(out, \"num_sites = %lld\\n\", (long long) self->num_sites);\n    fprintf(out, \"num_samples = %lld\\n\", (long long) self->num_samples);\n    for (l = 0; l < self->num_sites; l++) {\n        fprintf(out, \"%lld\\ts=%f\\tv=%lld [\", (long long) l,\n            self->normalisation_factor[l], (long long) self->num_transitions[l]);\n        for (j = 0; j < self->num_transitions[l]; j++) {\n            fprintf(\n                out, \"(%lld, %f)\", (long long) self->nodes[l][j], self->values[l][j]);\n            if (j < self->num_transitions[l] - 1) {\n                fprintf(out, \",\");\n            } else {\n                fprintf(out, \"]\\n\");\n            }\n        }\n    }\n    fprintf(out, \"Memory:\\n\");\n    tsk_blkalloc_print_state(&self->memory, out);\n}\n\nint\ntsk_compressed_matrix_store_site(tsk_compressed_matrix_t *self, tsk_id_t site,\n    double normalisation_factor, tsk_size_t num_transitions,\n    const tsk_value_transition_t *transitions)\n{\n    int ret = 0;\n    tsk_size_t j;\n\n    if (site < 0 || site >= (tsk_id_t) self->num_sites) {\n        ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);\n        goto out;\n    }\n\n    self->num_transitions[site] = num_transitions;\n    self->normalisation_factor[site] = normalisation_factor;\n    self->nodes[site]\n        = tsk_blkalloc_get(&self->memory, (size_t) num_transitions * sizeof(tsk_id_t));\n    self->values[site]\n        = tsk_blkalloc_get(&self->memory, (size_t) num_transitions * sizeof(double));\n    if (self->nodes[site] == NULL || self->values[site] == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (j = 0; j < num_transitions; j++) {\n        tsk_bug_assert(transitions[j].tree_node >= 0);\n        self->values[site][j] = transitions[j].value;\n        self->nodes[site][j] = transitions[j].tree_node;\n    }\n\nout:\n    return ret;\n}\n\nstatic int\ntsk_compressed_matrix_decode_site(tsk_compressed_matrix_t *self, const tsk_tree_t *tree,\n    const tsk_id_t site, double *values)\n{\n    int ret = 0;\n    const tsk_id_t *restrict list_left = tree->left_sample;\n    const tsk_id_t *restrict list_right = tree->right_sample;\n    const tsk_id_t *restrict list_next = tree->next_sample;\n    const tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(self->tree_sequence);\n    tsk_size_t j;\n    tsk_id_t node, index, stop;\n    double value;\n\n    for (j = 0; j < self->num_transitions[site]; j++) {\n        node = self->nodes[site][j];\n        if (node < 0 || node >= num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        value = self->values[site][j];\n        index = list_left[node];\n        if (index == TSK_NULL) {\n            /* It's an error if there are nodes that don't subtend any samples */\n            ret = tsk_trace_error(TSK_ERR_BAD_COMPRESSED_MATRIX_NODE);\n            goto out;\n        }\n        stop = list_right[node];\n        while (true) {\n            values[index] = value;\n            if (index == stop) {\n                break;\n            }\n            index = list_next[index];\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_compressed_matrix_decode(tsk_compressed_matrix_t *self, double *values)\n{\n    int ret = 0;\n    int t_ret;\n    tsk_tree_t tree;\n    tsk_size_t j, num_tree_sites;\n    const tsk_site_t *sites = NULL;\n    tsk_id_t site_id;\n    double *site_array;\n\n    ret = tsk_tree_init(&tree, self->tree_sequence, TSK_SAMPLE_LISTS);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (t_ret = tsk_tree_first(&tree); t_ret == TSK_TREE_OK;\n        t_ret = tsk_tree_next(&tree)) {\n        ret = tsk_tree_get_sites(&tree, &sites, &num_tree_sites);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_tree_sites; j++) {\n            site_id = sites[j].id;\n            site_array = values + ((tsk_size_t) site_id) * self->num_samples;\n            if (self->num_transitions[site_id] == 0) {\n                tsk_memset(site_array, 0, self->num_samples * sizeof(*site_array));\n            } else {\n                ret = tsk_compressed_matrix_decode_site(\n                    self, &tree, site_id, site_array);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        }\n    }\n    if (t_ret < 0) {\n        ret = t_ret;\n        goto out;\n    }\nout:\n    tsk_tree_free(&tree);\n    return ret;\n}\n\n/****************************************************************\n * Viterbi matrix\n ****************************************************************/\n\nstatic int\ntsk_viterbi_matrix_expand_recomb_records(tsk_viterbi_matrix_t *self)\n{\n    int ret = 0;\n    tsk_recomb_required_record *tmp = tsk_realloc(\n        self->recombination_required, self->max_recomb_records * sizeof(*tmp));\n\n    if (tmp == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    self->recombination_required = tmp;\nout:\n    return ret;\n}\n\nint\ntsk_viterbi_matrix_init(tsk_viterbi_matrix_t *self, tsk_treeseq_t *tree_sequence,\n    tsk_size_t block_size, tsk_flags_t options)\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(*self));\n    if (block_size == 0) {\n        block_size = 1 << 20; /* 1MiB */\n    }\n    ret = tsk_compressed_matrix_init(&self->matrix, tree_sequence, block_size, options);\n    if (ret != 0) {\n        goto out;\n    }\n\n    self->max_recomb_records\n        = TSK_MAX(1, block_size / sizeof(tsk_recomb_required_record));\n    ret = tsk_viterbi_matrix_expand_recomb_records(self);\n    if (ret != 0) {\n        goto out;\n    }\n    /* Add the sentinel at the start to simplify traceback */\n    self->recombination_required[0].site = -1;\n\n    ret = tsk_viterbi_matrix_clear(self);\nout:\n    return ret;\n}\n\nint\ntsk_viterbi_matrix_free(tsk_viterbi_matrix_t *self)\n{\n    tsk_compressed_matrix_free(&self->matrix);\n    tsk_safe_free(self->recombination_required);\n    return 0;\n}\n\nint\ntsk_viterbi_matrix_clear(tsk_viterbi_matrix_t *self)\n{\n    self->num_recomb_records = 1;\n    tsk_compressed_matrix_clear(&self->matrix);\n    return 0;\n}\n\nvoid\ntsk_viterbi_matrix_print_state(tsk_viterbi_matrix_t *self, FILE *out)\n{\n    tsk_id_t l, j;\n\n    fprintf(out, \"viterbi_matrix\\n\");\n    fprintf(out, \"num_recomb_records = %lld\\n\", (long long) self->num_recomb_records);\n    fprintf(out, \"max_recomb_records = %lld\\n\", (long long) self->max_recomb_records);\n\n    j = 1;\n    for (l = 0; l < (tsk_id_t) self->matrix.num_sites; l++) {\n        fprintf(out, \"%lld\\t[\", (long long) l);\n        while (j < (tsk_id_t) self->num_recomb_records\n               && self->recombination_required[j].site == l) {\n            fprintf(out, \"(%lld, %d) \", (long long) self->recombination_required[j].node,\n                self->recombination_required[j].required);\n            j++;\n        }\n        fprintf(out, \"]\\n\");\n    }\n    tsk_compressed_matrix_print_state(&self->matrix, out);\n}\n\nTSK_WARN_UNUSED int\ntsk_viterbi_matrix_add_recombination_required(\n    tsk_viterbi_matrix_t *self, tsk_id_t site, tsk_id_t node, bool required)\n{\n    int ret = 0;\n    tsk_recomb_required_record *record;\n\n    if (self->num_recomb_records == self->max_recomb_records) {\n        self->max_recomb_records *= 2;\n        ret = tsk_viterbi_matrix_expand_recomb_records(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    record = self->recombination_required + self->num_recomb_records;\n    record->site = site;\n    record->node = node;\n    record->required = required;\n    self->num_recomb_records++;\nout:\n    return ret;\n}\n\nstatic tsk_id_t\ntsk_viterbi_matrix_choose_sample(\n    tsk_viterbi_matrix_t *self, tsk_id_t site, tsk_tree_t *tree)\n{\n    tsk_id_t ret;\n    tsk_id_t u = TSK_NULL;\n    const tsk_flags_t *node_flags = self->matrix.tree_sequence->tables->nodes.flags;\n    const tsk_size_t num_transitions = self->matrix.num_transitions[site];\n    const tsk_id_t *transition_nodes = self->matrix.nodes[site];\n    const double *transition_values = self->matrix.values[site];\n    double max_value = -1;\n    tsk_size_t j;\n    tsk_id_t v;\n    bool found;\n\n    if (num_transitions == 0) {\n        ret = tsk_trace_error(TSK_ERR_NULL_VITERBI_MATRIX);\n        goto out;\n    }\n    for (j = 0; j < num_transitions; j++) {\n        if (max_value < transition_values[j]) {\n            u = transition_nodes[j];\n            max_value = transition_values[j];\n        }\n    }\n    tsk_bug_assert(u != TSK_NULL);\n\n    while (!(node_flags[u] & TSK_NODE_IS_SAMPLE)) {\n        found = false;\n        for (v = tree->left_child[u]; v != TSK_NULL; v = tree->right_sib[v]) {\n            /* Choose the first child that is not in the list of transition nodes */\n            for (j = 0; j < num_transitions; j++) {\n                if (transition_nodes[j] == v) {\n                    break;\n                }\n            }\n            if (j == num_transitions) {\n                u = v;\n                found = true;\n                break;\n            }\n        }\n        /* TODO: should remove this once we're sure this is robust */\n        tsk_bug_assert(found);\n    }\n    ret = u;\nout:\n    return ret;\n}\n\nint\ntsk_viterbi_matrix_traceback(\n    tsk_viterbi_matrix_t *self, tsk_id_t *path, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_site_t site;\n    tsk_id_t u, site_id, current_node;\n    tsk_recomb_required_record *rr_record, *rr_record_tmp;\n    const tsk_id_t num_sites = (tsk_id_t) self->matrix.num_sites;\n    const tsk_id_t num_nodes\n        = (tsk_id_t) tsk_treeseq_get_num_nodes(self->matrix.tree_sequence);\n    tsk_tree_t tree;\n    tsk_id_t *recombination_tree\n        = tsk_malloc((size_t) num_nodes * sizeof(*recombination_tree));\n\n    ret = tsk_tree_init(&tree, self->matrix.tree_sequence, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (recombination_tree == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    /* Initialise the path an recombination_tree to contain TSK_NULL */\n    tsk_memset(path, 0xff, ((size_t) num_sites) * sizeof(*path));\n    tsk_memset(recombination_tree, 0xff, ((size_t) num_nodes) * sizeof(*path));\n\n    current_node = TSK_NULL;\n    rr_record = &self->recombination_required[self->num_recomb_records - 1];\n    ret = tsk_tree_last(&tree);\n    if (ret < 0) {\n        goto out;\n    }\n\n    for (site_id = num_sites - 1; site_id >= 0; site_id--) {\n        ret = tsk_treeseq_get_site(self->matrix.tree_sequence, site_id, &site);\n        if (ret != 0) {\n            goto out;\n        }\n        while (tree.interval.left > site.position) {\n            ret = tsk_tree_prev(&tree);\n            if (ret < 0) {\n                goto out;\n            }\n        }\n        tsk_bug_assert(tree.interval.left <= site.position);\n        tsk_bug_assert(site.position < tree.interval.right);\n\n        /* Fill in the recombination tree */\n        rr_record_tmp = rr_record;\n        while (rr_record->site == site.id) {\n            recombination_tree[rr_record->node] = rr_record->required;\n            rr_record--;\n        }\n        if (current_node == TSK_NULL) {\n            current_node = tsk_viterbi_matrix_choose_sample(self, site.id, &tree);\n            if (current_node < 0) {\n                ret = (int) current_node;\n                goto out;\n            }\n        }\n        path[site.id] = current_node;\n        /* Now traverse up the tree from the current node. The\n         * first marked node tells us whether we need to recombine */\n        u = current_node;\n        while (u != TSK_NULL && recombination_tree[u] == TSK_NULL) {\n            u = tree.parent[u];\n        }\n        tsk_bug_assert(u != TSK_NULL);\n        if (recombination_tree[u] == 1) {\n            /* Switch at the next site */\n            current_node = TSK_NULL;\n        }\n\n        /* Reset in the recombination tree */\n        rr_record = rr_record_tmp;\n        while (rr_record->site == site.id) {\n            recombination_tree[rr_record->node] = TSK_NULL;\n            rr_record--;\n        }\n    }\n    ret = 0;\nout:\n    tsk_tree_free(&tree);\n    tsk_safe_free(recombination_tree);\n    return ret;\n}\n"
  },
  {
    "path": "c/tskit/haplotype_matching.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef TSK_HAPLOTYPE_MATCHING_H\n#define TSK_HAPLOTYPE_MATCHING_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <tskit/trees.h>\n\n/* Seems like we might use this somewhere else as well, so putting it into the middle\n * of the flags space */\n#define TSK_ALLELES_ACGT (1 << 16)\n\ntypedef struct {\n    tsk_id_t tree_node;\n    tsk_id_t value_index;\n    double value;\n} tsk_value_transition_t;\n\ntypedef struct {\n    tsk_size_t index;\n    double value;\n} tsk_argsort_t;\n\ntypedef struct {\n    tsk_id_t tree_node;\n    tsk_id_t old_state;\n    tsk_id_t new_state;\n    tsk_id_t transition_parent;\n} tsk_transition_stack_t;\n\ntypedef struct {\n    double normalisation_factor;\n    double *value;\n    tsk_id_t *node;\n    tsk_size_t num_values;\n} tsk_site_probability_t;\n\ntypedef struct {\n    tsk_treeseq_t *tree_sequence;\n    tsk_flags_t options;\n    tsk_size_t num_sites;\n    tsk_size_t num_samples;\n    double *normalisation_factor;\n    tsk_size_t *num_transitions;\n    double **values;\n    tsk_id_t **nodes;\n    tsk_blkalloc_t memory;\n} tsk_compressed_matrix_t;\n\ntypedef struct {\n    tsk_id_t site;\n    tsk_id_t node;\n    bool required;\n} tsk_recomb_required_record;\n\ntypedef struct {\n    tsk_compressed_matrix_t matrix;\n    tsk_recomb_required_record *recombination_required;\n    tsk_size_t num_recomb_records;\n    tsk_size_t max_recomb_records;\n} tsk_viterbi_matrix_t;\n\ntypedef struct _tsk_ls_hmm_t {\n    /* input */\n    tsk_treeseq_t *tree_sequence;\n    double *recombination_rate;\n    double *mutation_rate;\n    const char ***alleles;\n    unsigned int precision;\n    uint32_t *num_alleles;\n    tsk_size_t num_samples;\n    tsk_size_t num_sites;\n    tsk_size_t num_nodes;\n    /* state */\n    tsk_tree_t tree;\n    tsk_id_t *parent;\n    /* The probability value transitions on the tree */\n    tsk_value_transition_t *transitions;\n    tsk_value_transition_t *transitions_copy;\n    /* Stack used when distributing transitions on the tree */\n    tsk_transition_stack_t *transition_stack;\n    /* Map of node_id to index in the transitions list */\n    tsk_id_t *transition_index;\n    /* Buffer used to argsort the transitions by node time */\n    tsk_argsort_t *transition_time_order;\n    tsk_size_t num_transitions;\n    tsk_size_t max_transitions;\n    /* The distinct values in the transitions */\n    double *values;\n    tsk_size_t num_values;\n    tsk_size_t max_values;\n    tsk_size_t max_parsimony_words;\n    /* Number of machine words per node optimal value set. */\n    tsk_size_t num_optimal_value_set_words;\n    uint64_t *optimal_value_sets;\n    /* The parent transition; used during compression */\n    tsk_id_t *transition_parent;\n    /* The number of samples directly subtended by a transition */\n    tsk_size_t *num_transition_samples;\n    int32_t *allelic_state;\n    /* Algorithms set these values before they are run */\n    int (*next_probability)(\n        struct _tsk_ls_hmm_t *, tsk_id_t, double, bool, tsk_id_t, double *);\n    double (*compute_normalisation_factor)(struct _tsk_ls_hmm_t *);\n    void *output;\n} tsk_ls_hmm_t;\n\n/* TODO constify these APIs */\nint tsk_ls_hmm_init(tsk_ls_hmm_t *self, tsk_treeseq_t *tree_sequence,\n    double *recombination_rate, double *mutation_rate, tsk_flags_t options);\nint tsk_ls_hmm_set_precision(tsk_ls_hmm_t *self, unsigned int precision);\nint tsk_ls_hmm_free(tsk_ls_hmm_t *self);\nvoid tsk_ls_hmm_print_state(tsk_ls_hmm_t *self, FILE *out);\nint tsk_ls_hmm_forward(tsk_ls_hmm_t *self, int32_t *haplotype,\n    tsk_compressed_matrix_t *output, tsk_flags_t options);\nint tsk_ls_hmm_backward(tsk_ls_hmm_t *self, int32_t *haplotype,\n    const double *forward_norm, tsk_compressed_matrix_t *output, tsk_flags_t options);\nint tsk_ls_hmm_viterbi(tsk_ls_hmm_t *self, int32_t *haplotype,\n    tsk_viterbi_matrix_t *output, tsk_flags_t options);\n\nint tsk_compressed_matrix_init(tsk_compressed_matrix_t *self,\n    tsk_treeseq_t *tree_sequence, tsk_size_t block_size, tsk_flags_t options);\nint tsk_compressed_matrix_free(tsk_compressed_matrix_t *self);\nint tsk_compressed_matrix_clear(tsk_compressed_matrix_t *self);\nvoid tsk_compressed_matrix_print_state(tsk_compressed_matrix_t *self, FILE *out);\nint tsk_compressed_matrix_store_site(tsk_compressed_matrix_t *self, tsk_id_t site,\n    double normalisation_factor, tsk_size_t num_transitions,\n    const tsk_value_transition_t *transitions);\nint tsk_compressed_matrix_decode(tsk_compressed_matrix_t *self, double *values);\n\nint tsk_viterbi_matrix_init(tsk_viterbi_matrix_t *self, tsk_treeseq_t *tree_sequence,\n    tsk_size_t block_size, tsk_flags_t options);\nint tsk_viterbi_matrix_free(tsk_viterbi_matrix_t *self);\nint tsk_viterbi_matrix_clear(tsk_viterbi_matrix_t *self);\nvoid tsk_viterbi_matrix_print_state(tsk_viterbi_matrix_t *self, FILE *out);\nint tsk_viterbi_matrix_add_recombination_required(\n    tsk_viterbi_matrix_t *self, tsk_id_t site, tsk_id_t node, bool required);\nint tsk_viterbi_matrix_traceback(\n    tsk_viterbi_matrix_t *self, tsk_id_t *path, tsk_flags_t options);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "c/tskit/stats.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2018-2025 Tskit Developers\n * Copyright (c) 2016-2017 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <string.h>\n#include <stdlib.h>\n#include <math.h>\n\n#include <tskit/stats.h>\n\nvoid\ntsk_ld_calc_print_state(const tsk_ld_calc_t *self, FILE *out)\n{\n    fprintf(out, \"tree = %p\\n\", (const void *) &self->tree);\n    fprintf(out, \"max_sites = %d\\n\", (int) self->max_sites);\n    fprintf(out, \"max_distance = %f\\n\", self->max_distance);\n}\n\nint TSK_WARN_UNUSED\ntsk_ld_calc_init(tsk_ld_calc_t *self, const tsk_treeseq_t *tree_sequence)\n{\n    int ret = 0;\n    tsk_memset(self, 0, sizeof(*self));\n\n    ret = tsk_tree_init(&self->tree, tree_sequence, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    self->tree_sequence = tree_sequence;\n    self->total_samples = tsk_treeseq_get_num_samples(self->tree_sequence);\n\n    self->sample_buffer = tsk_malloc(self->total_samples * sizeof(*self->sample_buffer));\n    if (self->sample_buffer == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_ld_calc_free(tsk_ld_calc_t *self)\n{\n    tsk_tree_free(&self->tree);\n    tsk_safe_free(self->sample_buffer);\n    return 0;\n}\n\nstatic int\ntsk_ld_calc_check_site(tsk_ld_calc_t *TSK_UNUSED(self), const tsk_site_t *site)\n{\n    int ret = 0;\n\n    /* These are both limitations in the current implementation, there's no\n     * fundamental reason why we can't support them */\n    if (site->mutations_length != 1) {\n        ret = tsk_trace_error(TSK_ERR_ONLY_INFINITE_SITES);\n        goto out;\n    }\n    if (site->ancestral_state_length == site->mutations[0].derived_state_length\n        && tsk_memcmp(site->ancestral_state, site->mutations[0].derived_state,\n               site->ancestral_state_length)\n               == 0) {\n        ret = tsk_trace_error(TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ld_calc_set_focal_samples(tsk_ld_calc_t *self)\n{\n    int ret = 0;\n    tsk_id_t focal_node = self->focal_site.mutations[0].node;\n\n    ret = tsk_tree_track_descendant_samples(&self->tree, focal_node);\n    if (ret != 0) {\n        goto out;\n    }\n    self->focal_samples = self->tree.num_tracked_samples[focal_node];\nout:\n    return ret;\n}\n\nstatic int\ntsk_ld_calc_initialise(tsk_ld_calc_t *self, tsk_id_t a)\n{\n    int ret = 0;\n\n    ret = tsk_treeseq_get_site(self->tree_sequence, a, &self->focal_site);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ld_calc_check_site(self, &self->focal_site);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_seek(&self->tree, self->focal_site.position, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ld_calc_set_focal_samples(self);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ld_calc_compute_r2(tsk_ld_calc_t *self, const tsk_site_t *target_site, double *r2)\n{\n    const double n = (double) self->total_samples;\n    double f_a, f_b, f_ab, D, denom;\n    tsk_id_t node;\n    int ret = tsk_ld_calc_check_site(self, target_site);\n\n    if (ret != 0) {\n        goto out;\n    }\n    node = target_site->mutations[0].node;\n    f_a = ((double) self->focal_samples) / n;\n    f_b = ((double) self->tree.num_samples[node]) / n;\n    f_ab = ((double) self->tree.num_tracked_samples[node]) / n;\n    D = f_ab - f_a * f_b;\n    denom = f_a * f_b * (1 - f_a) * (1 - f_b);\n    *r2 = (D * D) / denom;\nout:\n    return ret;\n}\n\nstatic int\ntsk_ld_calc_compute_and_append(\n    tsk_ld_calc_t *self, const tsk_site_t *target_site, bool *ret_done)\n{\n    int ret = 0;\n    double r2;\n    double distance = fabs(self->focal_site.position - target_site->position);\n    bool done = true;\n\n    if (distance <= self->max_distance && self->result_length < self->max_sites) {\n        ret = tsk_ld_calc_compute_r2(self, target_site, &r2);\n        if (ret != 0) {\n            goto out;\n        }\n        self->result[self->result_length] = r2;\n        self->result_length++;\n        done = false;\n    }\n    *ret_done = done;\nout:\n    return ret;\n}\n\nstatic int\ntsk_ld_calc_run_forward(tsk_ld_calc_t *self)\n{\n    int ret = 0;\n    tsk_size_t j;\n    bool done = false;\n\n    for (j = 0; j < self->tree.sites_length; j++) {\n        if (self->tree.sites[j].id > self->focal_site.id) {\n            ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);\n            if (ret != 0) {\n                goto out;\n            }\n            if (done) {\n                break;\n            }\n        }\n    }\n    while (((ret = tsk_tree_next(&self->tree)) == TSK_TREE_OK) && !done) {\n        for (j = 0; j < self->tree.sites_length; j++) {\n            ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);\n            if (ret != 0) {\n                goto out;\n            }\n            if (done) {\n                break;\n            }\n        }\n    }\n    if (ret < 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ntsk_ld_calc_run_reverse(tsk_ld_calc_t *self)\n{\n    int ret = 0;\n    tsk_id_t j;\n    bool done = false;\n\n    for (j = (tsk_id_t) self->tree.sites_length - 1; j >= 0; j--) {\n        if (self->tree.sites[j].id < self->focal_site.id) {\n            ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);\n            if (ret != 0) {\n                goto out;\n            }\n            if (done) {\n                break;\n            }\n        }\n    }\n    while (((ret = tsk_tree_prev(&self->tree)) == TSK_TREE_OK) && !done) {\n        for (j = (tsk_id_t) self->tree.sites_length - 1; j >= 0; j--) {\n            ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);\n            if (ret != 0) {\n                goto out;\n            }\n            if (done) {\n                break;\n            }\n        }\n    }\n    if (ret < 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nint\ntsk_ld_calc_get_r2(tsk_ld_calc_t *self, tsk_id_t a, tsk_id_t b, double *r2)\n{\n    int ret = 0;\n    tsk_site_t target_site;\n\n    ret = tsk_ld_calc_initialise(self, a);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_get_site(self->tree_sequence, b, &target_site);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_seek(&self->tree, target_site.position, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ld_calc_compute_r2(self, &target_site, r2);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_ld_calc_get_r2_array(tsk_ld_calc_t *self, tsk_id_t a, int direction,\n    tsk_size_t max_sites, double max_distance, double *r2, tsk_size_t *num_r2_values)\n{\n    int ret = tsk_ld_calc_initialise(self, a);\n\n    if (ret != 0) {\n        goto out;\n    }\n\n    self->max_sites = max_sites;\n    self->max_distance = max_distance;\n    self->result_length = 0;\n    self->result = r2;\n\n    if (direction == TSK_DIR_FORWARD) {\n        ret = tsk_ld_calc_run_forward(self);\n    } else if (direction == TSK_DIR_REVERSE) {\n        ret = tsk_ld_calc_run_reverse(self);\n    } else {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n    }\n    if (ret != 0) {\n        goto out;\n    }\n    *num_r2_values = self->result_length;\nout:\n    return ret;\n}\n"
  },
  {
    "path": "c/tskit/stats.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2021 Tskit Developers\n * Copyright (c) 2016-2017 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#ifndef TSK_STATS_H\n#define TSK_STATS_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <tskit/trees.h>\n\ntypedef struct {\n    const tsk_treeseq_t *tree_sequence;\n    tsk_site_t focal_site;\n    tsk_size_t total_samples;\n    tsk_size_t focal_samples;\n    double max_distance;\n    tsk_size_t max_sites;\n    tsk_tree_t tree;\n    tsk_id_t *sample_buffer;\n    double *result;\n    tsk_size_t result_length;\n} tsk_ld_calc_t;\n\nint tsk_ld_calc_init(tsk_ld_calc_t *self, const tsk_treeseq_t *tree_sequence);\nint tsk_ld_calc_free(tsk_ld_calc_t *self);\nvoid tsk_ld_calc_print_state(const tsk_ld_calc_t *self, FILE *out);\nint tsk_ld_calc_get_r2(tsk_ld_calc_t *self, tsk_id_t a, tsk_id_t b, double *r2);\nint tsk_ld_calc_get_r2_array(tsk_ld_calc_t *self, tsk_id_t a, int direction,\n    tsk_size_t max_sites, double max_distance, double *r2, tsk_size_t *num_r2_values);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "c/tskit/tables.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n * Copyright (c) 2017-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <assert.h>\n#include <stdio.h>\n#include <stddef.h>\n#include <string.h>\n#include <stdbool.h>\n#include <stdlib.h>\n#include <float.h>\n#include <math.h>\n\n#include <tskit/tables.h>\n\n#define TABLE_SEP \"-----------------------------------------\\n\"\n\n#define TSK_COL_OPTIONAL (1 << 0)\n\ntypedef struct {\n    const char *name;\n    void **array_dest;\n    int type;\n    tsk_flags_t options;\n} read_table_col_t;\n\ntypedef struct {\n    const char *name;\n    void **data_array_dest;\n    tsk_size_t *data_len_dest;\n    int data_type;\n    tsk_size_t **offset_array_dest;\n    tsk_flags_t options;\n} read_table_ragged_col_t;\n\ntypedef struct {\n    const char *name;\n    void **array_dest;\n    tsk_size_t *len_dest;\n    int type;\n    tsk_flags_t options;\n} read_table_property_t;\n\ntypedef struct {\n    const char *name;\n    const void *array;\n    tsk_size_t len;\n    int type;\n} write_table_col_t;\n\ntypedef struct {\n    const char *name;\n    const void *data_array;\n    tsk_size_t data_len;\n    int data_type;\n    const tsk_size_t *offset_array;\n    tsk_size_t num_rows;\n} write_table_ragged_col_t;\n\n/* Returns true if adding the specified number of rows would result in overflow.\n * Tables can support indexes from 0 to TSK_MAX_ID, and therefore could have at most\n * TSK_MAX_ID + 1 rows. However we limit to TSK_MAX_ID rows so that counts of rows\n * can fit in a tsk_id_t. */\nstatic bool\ncheck_table_overflow(tsk_size_t current_size, tsk_size_t additional_rows)\n{\n    tsk_size_t max_val = TSK_MAX_ID;\n    return additional_rows > max_val || current_size > (max_val - additional_rows);\n}\n\n/* Returns true if adding the specified number of elements would result in overflow\n * of an offset column.\n */\nstatic bool\ncheck_offset_overflow(tsk_size_t current_size, tsk_size_t additional_elements)\n{\n    tsk_size_t max_val = TSK_MAX_SIZE;\n    return additional_elements > max_val\n           || current_size > (max_val - additional_elements);\n}\n\n#define TSK_NUM_ROWS_UNSET   ((tsk_size_t) - 1)\n#define TSK_MAX_COL_NAME_LEN 64\n\nstatic int\nread_table_cols(kastore_t *store, tsk_size_t *num_rows, read_table_col_t *cols,\n    tsk_flags_t TSK_UNUSED(flags))\n{\n    int ret = 0;\n    size_t len;\n    int type;\n    read_table_col_t *col;\n\n    for (col = cols; col->name != NULL; col++) {\n        ret = kastore_containss(store, col->name);\n        if (ret < 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        if (ret == 1) {\n            ret = kastore_gets(store, col->name, col->array_dest, &len, &type);\n            if (ret != 0) {\n                ret = tsk_set_kas_error(ret);\n                goto out;\n            }\n            if (*num_rows == TSK_NUM_ROWS_UNSET) {\n                *num_rows = (tsk_size_t) len;\n            } else {\n                if (*num_rows != (tsk_size_t) len) {\n                    ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n                    goto out;\n                }\n            }\n            if (type != col->type) {\n                ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);\n                goto out;\n            }\n        } else if (!(col->options & TSK_COL_OPTIONAL)) {\n            ret = tsk_trace_error(TSK_ERR_REQUIRED_COL_NOT_FOUND);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ncast_offset_array(read_table_ragged_col_t *col, uint32_t *source, tsk_size_t num_rows)\n{\n    int ret = 0;\n    tsk_size_t len = num_rows + 1;\n    tsk_size_t j;\n    uint64_t *dest = tsk_malloc(len * sizeof(*dest));\n\n    if (dest == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    *col->offset_array_dest = dest;\n    for (j = 0; j < len; j++) {\n        dest[j] = source[j];\n    }\nout:\n    return ret;\n}\n\nstatic int\nread_table_ragged_cols(kastore_t *store, tsk_size_t *num_rows,\n    read_table_ragged_col_t *cols, tsk_flags_t TSK_UNUSED(flags))\n{\n    int ret = 0;\n    size_t data_len = 0; // initial value unused, just to keep the compiler happy.\n    size_t offset_len;\n    int type;\n    read_table_ragged_col_t *col;\n    char offset_col_name[TSK_MAX_COL_NAME_LEN];\n    bool data_col_present, offset_col_present;\n    void *store_offset_array = NULL;\n    tsk_size_t *offset_array;\n\n    for (col = cols; col->name != NULL; col++) {\n        ret = kastore_containss(store, col->name);\n        if (ret < 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        data_col_present = false;\n        if (ret == 1) {\n            ret = kastore_gets(store, col->name, col->data_array_dest, &data_len, &type);\n            if (ret != 0) {\n                ret = tsk_set_kas_error(ret);\n                goto out;\n            }\n            if (type != col->data_type) {\n                ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);\n                goto out;\n            }\n            *col->data_len_dest = (tsk_size_t) data_len;\n            data_col_present = true;\n        } else if (!(col->options & TSK_COL_OPTIONAL)) {\n            ret = tsk_trace_error(TSK_ERR_REQUIRED_COL_NOT_FOUND);\n            goto out;\n        }\n\n        assert(strlen(col->name) + strlen(\"_offset\") + 2 < sizeof(offset_col_name));\n        strcpy(offset_col_name, col->name);\n        strcat(offset_col_name, \"_offset\");\n\n        ret = kastore_containss(store, offset_col_name);\n        if (ret < 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        offset_col_present = ret == 1;\n        if (offset_col_present != data_col_present) {\n            ret = tsk_trace_error(TSK_ERR_BOTH_COLUMNS_REQUIRED);\n            goto out;\n        }\n        if (offset_col_present) {\n            ret = kastore_gets(\n                store, offset_col_name, &store_offset_array, &offset_len, &type);\n            if (ret != 0) {\n                ret = tsk_set_kas_error(ret);\n                goto out;\n            }\n            /* A table with zero rows will still have an offset length of 1;\n             * catching this here prevents underflows in the logic below */\n            if (offset_len == 0) {\n                ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n                goto out;\n            }\n            /* Some tables have only ragged columns */\n            if (*num_rows == TSK_NUM_ROWS_UNSET) {\n                *num_rows = (tsk_size_t) offset_len - 1;\n            } else {\n                if (*num_rows != (tsk_size_t) offset_len - 1) {\n                    ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n                    goto out;\n                }\n            }\n            if (type == KAS_UINT64) {\n                *col->offset_array_dest = (uint64_t *) store_offset_array;\n                store_offset_array = NULL;\n            } else if (type == KAS_UINT32) {\n                ret = cast_offset_array(col, (uint32_t *) store_offset_array, *num_rows);\n                if (ret != 0) {\n                    goto out;\n                }\n                tsk_safe_free(store_offset_array);\n                store_offset_array = NULL;\n            } else {\n                ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);\n                goto out;\n            }\n            offset_array = *col->offset_array_dest;\n            if (offset_array[*num_rows] != (tsk_size_t) data_len) {\n                ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);\n                goto out;\n            }\n        }\n    }\nout:\n    tsk_safe_free(store_offset_array);\n    return ret;\n}\n\nstatic int\nread_table_properties(\n    kastore_t *store, read_table_property_t *properties, tsk_flags_t TSK_UNUSED(flags))\n{\n    int ret = 0;\n    size_t len;\n    int type;\n    read_table_property_t *property;\n\n    for (property = properties; property->name != NULL; property++) {\n        ret = kastore_containss(store, property->name);\n        if (ret < 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        if (ret == 1) {\n            ret = kastore_gets(store, property->name, property->array_dest, &len, &type);\n            if (ret != 0) {\n                ret = tsk_set_kas_error(ret);\n                assert(ret != 0); /* Tell static analysers that we're handling errors */\n                goto out;\n            }\n            if (type != property->type) {\n                ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);\n                goto out;\n            }\n            *property->len_dest = (tsk_size_t) len;\n        }\n        assert(property->options & TSK_COL_OPTIONAL);\n    }\nout:\n    return ret;\n}\n\nstatic int\nread_table(kastore_t *store, tsk_size_t *num_rows, read_table_col_t *cols,\n    read_table_ragged_col_t *ragged_cols, read_table_property_t *properties,\n    tsk_flags_t options)\n{\n    int ret = 0;\n\n    *num_rows = TSK_NUM_ROWS_UNSET;\n    if (cols != NULL) {\n        ret = read_table_cols(store, num_rows, cols, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (ragged_cols != NULL) {\n        ret = read_table_ragged_cols(store, num_rows, ragged_cols, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (*num_rows == TSK_NUM_ROWS_UNSET) {\n        ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n        goto out;\n    }\n    if (properties != NULL) {\n        ret = read_table_properties(store, properties, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic void\nfree_read_table_mem(read_table_col_t *cols, read_table_ragged_col_t *ragged_cols,\n    read_table_property_t *properties)\n{\n    read_table_col_t *col;\n    read_table_ragged_col_t *ragged_col;\n    read_table_property_t *property;\n\n    if (cols != NULL) {\n        for (col = cols; col->name != NULL; col++) {\n            tsk_safe_free(*(col->array_dest));\n        }\n    }\n    if (ragged_cols != NULL) {\n        for (ragged_col = ragged_cols; ragged_col->name != NULL; ragged_col++) {\n            tsk_safe_free(*(ragged_col->data_array_dest));\n            tsk_safe_free(*(ragged_col->offset_array_dest));\n        }\n    }\n    if (properties != NULL) {\n        for (property = properties; property->name != NULL; property++) {\n            tsk_safe_free(*(property->array_dest));\n        }\n    }\n}\n\nstatic int\nwrite_offset_col(\n    kastore_t *store, const write_table_ragged_col_t *col, tsk_flags_t options)\n{\n    int ret = 0;\n    char offset_col_name[TSK_MAX_COL_NAME_LEN];\n    uint32_t *offset32 = NULL;\n    tsk_size_t len = col->num_rows + 1;\n    tsk_size_t j;\n    int32_t put_flags = 0;\n    int type;\n    const void *data;\n    bool needs_64 = col->offset_array[col->num_rows] > UINT32_MAX;\n\n    assert(strlen(col->name) + strlen(\"_offset\") + 2 < sizeof(offset_col_name));\n    strcpy(offset_col_name, col->name);\n    strcat(offset_col_name, \"_offset\");\n\n    if (options & TSK_DUMP_FORCE_OFFSET_64 || needs_64) {\n        type = KAS_UINT64;\n        data = col->offset_array;\n        put_flags = KAS_BORROWS_ARRAY;\n    } else {\n        offset32 = tsk_malloc(len * sizeof(*offset32));\n        if (offset32 == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        for (j = 0; j < len; j++) {\n            offset32[j] = (uint32_t) col->offset_array[j];\n        }\n        type = KAS_UINT32;\n        data = offset32;\n        /* We've just allocated a temp buffer, so kas can't borrow so leave put_flags=0*/\n    }\n    ret = kastore_puts(store, offset_col_name, data, (size_t) len, type, put_flags);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\nout:\n    tsk_safe_free(offset32);\n    return ret;\n}\n\nstatic int\nwrite_table_ragged_cols(\n    kastore_t *store, const write_table_ragged_col_t *write_cols, tsk_flags_t options)\n{\n    int ret = 0;\n    const write_table_ragged_col_t *col;\n\n    for (col = write_cols; col->name != NULL; col++) {\n        ret = kastore_puts(store, col->name, col->data_array, (size_t) col->data_len,\n            col->data_type, KAS_BORROWS_ARRAY);\n        if (ret != 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        ret = write_offset_col(store, col, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\nwrite_table_cols(kastore_t *store, const write_table_col_t *write_cols,\n    tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    const write_table_col_t *col;\n\n    for (col = write_cols; col->name != NULL; col++) {\n        ret = kastore_puts(store, col->name, col->array, (size_t) col->len, col->type,\n            KAS_BORROWS_ARRAY);\n        if (ret != 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\nwrite_table(kastore_t *store, const write_table_col_t *cols,\n    const write_table_ragged_col_t *ragged_cols, tsk_flags_t options)\n{\n    int ret = write_table_cols(store, cols, options);\n\n    if (ret != 0) {\n        goto out;\n    }\n    ret = write_table_ragged_cols(store, ragged_cols, options);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\n/* Checks that the specified list of offsets is well-formed. */\nstatic int\ncheck_offsets(\n    tsk_size_t num_rows, const tsk_size_t *offsets, tsk_size_t length, bool check_length)\n{\n    int ret = 0;\n    tsk_size_t j;\n\n    if (offsets[0] != 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);\n        goto out;\n    }\n    if (check_length && offsets[num_rows] != length) {\n        ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        if (offsets[j] > offsets[j + 1]) {\n            ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ncalculate_max_rows(tsk_size_t num_rows, tsk_size_t max_rows,\n    tsk_size_t max_rows_increment, tsk_size_t additional_rows,\n    tsk_size_t *ret_new_max_rows)\n{\n    tsk_size_t new_max_rows;\n    int ret = 0;\n\n    if (check_table_overflow(num_rows, additional_rows)) {\n        ret = tsk_trace_error(TSK_ERR_TABLE_OVERFLOW);\n        goto out;\n    }\n\n    if (num_rows + additional_rows <= max_rows) {\n        new_max_rows = max_rows;\n    } else {\n        if (max_rows_increment == 0) {\n            /* Doubling by default */\n            new_max_rows = TSK_MIN(max_rows * 2, TSK_MAX_ID + (tsk_size_t) 1);\n            /* Add some constraints to prevent very small allocations */\n            if (new_max_rows < 1024) {\n                new_max_rows = 1024;\n            }\n            /* Prevent allocating more than ~2 million additional rows unless needed*/\n            if (new_max_rows - max_rows > 2097152) {\n                new_max_rows = max_rows + 2097152;\n            }\n        } else {\n            /* Use user increment value */\n            if (check_table_overflow(max_rows, max_rows_increment)) {\n                ret = tsk_trace_error(TSK_ERR_TABLE_OVERFLOW);\n                goto out;\n            }\n            new_max_rows = max_rows + max_rows_increment;\n        }\n        new_max_rows = TSK_MAX(new_max_rows, num_rows + additional_rows);\n    }\n    *ret_new_max_rows = new_max_rows;\nout:\n    return ret;\n}\n\nstatic int\ncalculate_max_length(tsk_size_t current_length, tsk_size_t max_length,\n    tsk_size_t max_length_increment, tsk_size_t additional_length,\n    tsk_size_t *ret_new_max_length)\n{\n    tsk_size_t new_max_length;\n    int ret = 0;\n\n    if (check_offset_overflow(current_length, additional_length)) {\n        ret = tsk_trace_error(TSK_ERR_COLUMN_OVERFLOW);\n        goto out;\n    }\n\n    if (current_length + additional_length <= max_length) {\n        new_max_length = max_length;\n    } else {\n        if (max_length_increment == 0) {\n            /* Doubling by default */\n            new_max_length = TSK_MIN(max_length * 2, TSK_MAX_SIZE);\n            /* Add some constraints to prevent very small allocations */\n            if (new_max_length < 65536) {\n                new_max_length = 65536;\n            }\n            /* Prevent allocating more than 100MB additional unless needed*/\n            if (new_max_length - max_length > 104857600) {\n                new_max_length = max_length + 104857600;\n            }\n            new_max_length = TSK_MAX(new_max_length, current_length + additional_length);\n        } else {\n            /* Use user increment value */\n            if (check_offset_overflow(max_length, max_length_increment)) {\n                /* Here we could allocate to the maximum size.\n                 * Instead we are erroring out as this is much easier to test.\n                 * The cost is that (at most) the last \"max_length_increment\"-1\n                 * bytes of the possible array space can't be used. */\n                ret = tsk_trace_error(TSK_ERR_COLUMN_OVERFLOW);\n                goto out;\n            }\n            new_max_length = max_length + max_length_increment;\n        }\n        new_max_length = TSK_MAX(new_max_length, current_length + additional_length);\n    }\n    *ret_new_max_length = new_max_length;\nout:\n    return ret;\n}\n\nstatic int\nexpand_column(void **column, tsk_size_t new_max_rows, size_t element_size)\n{\n    int ret = 0;\n    void *tmp;\n\n    tmp = tsk_realloc((void **) *column, new_max_rows * element_size);\n    if (tmp == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    *column = tmp;\nout:\n    return ret;\n}\n\nstatic int\nexpand_ragged_column(tsk_size_t current_length, tsk_size_t additional_length,\n    tsk_size_t max_length_increment, tsk_size_t *max_length, void **column,\n    size_t element_size)\n{\n    int ret = 0;\n    tsk_size_t new_max_length;\n\n    ret = calculate_max_length(current_length, *max_length, max_length_increment,\n        additional_length, &new_max_length);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (new_max_length > *max_length) {\n        ret = expand_column(column, new_max_length, element_size);\n        if (ret != 0) {\n            goto out;\n        }\n        *max_length = new_max_length;\n    }\nout:\n    return ret;\n}\n\n/* TODO rename to copy_string or replace_and_copy_string */\nstatic int\nreplace_string(\n    char **str, tsk_size_t *len, const char *new_str, const tsk_size_t new_len)\n{\n    int ret = 0;\n    tsk_safe_free(*str);\n    *str = NULL;\n    *len = new_len;\n    if (new_len > 0) {\n        *str = tsk_malloc(new_len * sizeof(char));\n        if (*str == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        tsk_memcpy(*str, new_str, new_len * sizeof(char));\n    }\nout:\n    return ret;\n}\n\nstatic int\ntakeset_string(char **str, tsk_size_t *len, char *new_str, const tsk_size_t new_len)\n{\n    tsk_safe_free(*str);\n    *str = new_str;\n    *len = new_len;\n    return 0;\n}\n\nstatic int\nalloc_empty_ragged_column(tsk_size_t num_rows, void **data_col, tsk_size_t **offset_col)\n{\n    int ret = 0;\n\n    *data_col = tsk_malloc(1);\n    *offset_col = tsk_calloc(num_rows + 1, sizeof(tsk_size_t));\n    if (*data_col == NULL || *offset_col == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ncheck_ragged_column(tsk_size_t num_rows, void *data, tsk_size_t *offset)\n{\n    int ret = 0;\n    if ((data == NULL) != (offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (data != NULL) {\n        ret = check_offsets(num_rows, offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ntakeset_ragged_column(tsk_size_t num_rows, void *data, tsk_size_t *offset,\n    void **data_dest, tsk_size_t **offset_dest, tsk_size_t *length_dest)\n{\n    int ret = 0;\n    if (data == NULL) {\n        ret = alloc_empty_ragged_column(num_rows, (void *) data_dest, offset_dest);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        *data_dest = data;\n        *offset_dest = offset;\n    }\n    *length_dest = (*offset_dest)[num_rows];\nout:\n    return ret;\n}\n\nstatic int\ntakeset_optional_id_column(tsk_size_t num_rows, tsk_id_t *input, tsk_id_t **dest)\n{\n    int ret = 0;\n    tsk_size_t buffsize;\n    tsk_id_t *buff;\n\n    if (input == NULL) {\n        buffsize = num_rows * sizeof(*buff);\n        buff = tsk_malloc(buffsize);\n        if (buff == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        *dest = buff;\n        tsk_memset(buff, 0xff, buffsize);\n    } else {\n        *dest = input;\n    }\nout:\n    return ret;\n}\n\nstatic int\nwrite_metadata_schema_header(\n    FILE *out, const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    const char *fmt = \"#metadata_schema#\\n\"\n                      \"%.*s\\n\"\n                      \"#end#metadata_schema\\n\" TABLE_SEP;\n    return fprintf(out, fmt, (int) metadata_schema_length, metadata_schema);\n}\n\n/* Utilities for in-place subsetting columns */\n\nstatic tsk_size_t\ncount_true(tsk_size_t num_rows, const tsk_bool_t *restrict keep)\n{\n    tsk_size_t j;\n    tsk_size_t count = 0;\n\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            count++;\n        }\n    }\n    return count;\n}\n\nstatic void\nkeep_mask_to_id_map(\n    tsk_size_t num_rows, const tsk_bool_t *restrict keep, tsk_id_t *restrict id_map)\n{\n    tsk_size_t j;\n    tsk_id_t next_id = 0;\n\n    for (j = 0; j < num_rows; j++) {\n        id_map[j] = TSK_NULL;\n        if (keep[j]) {\n            id_map[j] = next_id;\n            next_id++;\n        }\n    }\n}\n\nstatic tsk_size_t\nsubset_remap_id_column(tsk_id_t *restrict column, tsk_size_t num_rows,\n    const tsk_bool_t *restrict keep, const tsk_id_t *restrict id_map)\n{\n    tsk_size_t j, k;\n    tsk_id_t value;\n\n    k = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            value = column[j];\n            if (value != TSK_NULL) {\n                value = id_map[value];\n            }\n            column[k] = value;\n            k++;\n        }\n    }\n    return k;\n}\n\n/* Trigger warning: C++ programmers should look away... This may be one of the\n * few cases where some macro funkiness is warranted, as these are exact\n * duplicates of the same function with just the type of the column\n * parameter changed. */\n\nstatic tsk_size_t\nsubset_id_column(\n    tsk_id_t *restrict column, tsk_size_t num_rows, const tsk_bool_t *restrict keep)\n{\n    tsk_size_t j, k;\n\n    k = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            column[k] = column[j];\n            k++;\n        }\n    }\n    return k;\n}\n\nstatic tsk_size_t\nsubset_flags_column(\n    tsk_flags_t *restrict column, tsk_size_t num_rows, const tsk_bool_t *restrict keep)\n{\n    tsk_size_t j, k;\n\n    k = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            column[k] = column[j];\n            k++;\n        }\n    }\n    return k;\n}\n\nstatic tsk_size_t\nsubset_double_column(\n    double *restrict column, tsk_size_t num_rows, const tsk_bool_t *restrict keep)\n{\n    tsk_size_t j, k;\n\n    k = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            column[k] = column[j];\n            k++;\n        }\n    }\n    return k;\n}\n\nstatic tsk_size_t\nsubset_ragged_char_column(char *restrict data, tsk_size_t *restrict offset_col,\n    tsk_size_t num_rows, const tsk_bool_t *restrict keep)\n{\n    tsk_size_t j, k, i, offset;\n\n    k = 0;\n    offset = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            offset_col[k] = offset;\n            /* Note: Unclear whether it's worth calling memcpy instead here?\n             * Need to be careful since the regions are overlapping */\n            for (i = offset_col[j]; i < offset_col[j + 1]; i++) {\n                data[offset] = data[i];\n                offset++;\n            }\n            k++;\n        }\n    }\n    offset_col[k] = offset;\n    return offset;\n}\n\nstatic tsk_size_t\nsubset_ragged_double_column(double *restrict data, tsk_size_t *restrict offset_col,\n    tsk_size_t num_rows, const tsk_bool_t *restrict keep)\n{\n    tsk_size_t j, k, i, offset;\n\n    k = 0;\n    offset = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            offset_col[k] = offset;\n            /* Note: Unclear whether it's worth calling memcpy instead here?\n             * Need to be careful since the regions are overlapping */\n            for (i = offset_col[j]; i < offset_col[j + 1]; i++) {\n                data[offset] = data[i];\n                offset++;\n            }\n            k++;\n        }\n    }\n    offset_col[k] = offset;\n    return offset;\n}\n\nstatic tsk_size_t\nsubset_remap_ragged_id_column(tsk_id_t *restrict data, tsk_size_t *restrict offset_col,\n    tsk_size_t num_rows, const tsk_bool_t *restrict keep,\n    const tsk_id_t *restrict id_map)\n{\n    tsk_size_t j, k, i, offset;\n    tsk_id_t di;\n\n    k = 0;\n    offset = 0;\n    for (j = 0; j < num_rows; j++) {\n        if (keep[j]) {\n            offset_col[k] = offset;\n            for (i = offset_col[j]; i < offset_col[j + 1]; i++) {\n                di = data[i];\n                if (di != TSK_NULL) {\n                    di = id_map[di];\n                }\n                data[offset] = di;\n                offset++;\n            }\n            k++;\n        }\n    }\n    offset_col[k] = offset;\n    return offset;\n}\n\n/*************************\n * reference sequence\n *************************/\n\nint\ntsk_reference_sequence_init(\n    tsk_reference_sequence_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    tsk_memset(self, 0, sizeof(*self));\n    return 0;\n}\n\nint\ntsk_reference_sequence_free(tsk_reference_sequence_t *self)\n{\n    tsk_safe_free(self->data);\n    tsk_safe_free(self->url);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nbool\ntsk_reference_sequence_is_null(const tsk_reference_sequence_t *self)\n{\n    return self->data_length == 0 && self->url_length == 0 && self->metadata_length == 0\n           && self->metadata_schema_length == 0;\n}\n\nbool\ntsk_reference_sequence_equals(const tsk_reference_sequence_t *self,\n    const tsk_reference_sequence_t *other, tsk_flags_t options)\n{\n    int ret\n        = self->data_length == other->data_length\n          && self->url_length == other->url_length\n          && tsk_memcmp(self->data, other->data, self->data_length * sizeof(char)) == 0\n          && tsk_memcmp(self->url, other->url, self->url_length * sizeof(char)) == 0;\n\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_reference_sequence_copy(const tsk_reference_sequence_t *self,\n    tsk_reference_sequence_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_reference_sequence_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    if (tsk_reference_sequence_is_null(self)) {\n        /* This is a simple way to get any input into the NULL state */\n        tsk_reference_sequence_free(dest);\n    } else {\n        ret = tsk_reference_sequence_set_data(dest, self->data, self->data_length);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_reference_sequence_set_url(dest, self->url, self->url_length);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_reference_sequence_set_metadata(\n            dest, self->metadata, self->metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_reference_sequence_set_metadata_schema(\n            dest, self->metadata_schema, self->metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_reference_sequence_set_data(\n    tsk_reference_sequence_t *self, const char *data, tsk_size_t data_length)\n{\n    return replace_string(&self->data, &self->data_length, data, data_length);\n}\n\nint\ntsk_reference_sequence_set_url(\n    tsk_reference_sequence_t *self, const char *url, tsk_size_t url_length)\n{\n    return replace_string(&self->url, &self->url_length, url, url_length);\n}\n\nint\ntsk_reference_sequence_set_metadata(\n    tsk_reference_sequence_t *self, const char *metadata, tsk_size_t metadata_length)\n{\n    return replace_string(\n        &self->metadata, &self->metadata_length, metadata, metadata_length);\n}\n\nint\ntsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_reference_sequence_takeset_data(\n    tsk_reference_sequence_t *self, char *data, tsk_size_t data_length)\n{\n    return takeset_string(&self->data, &self->data_length, data, data_length);\n}\n\nint\ntsk_reference_sequence_takeset_metadata(\n    tsk_reference_sequence_t *self, char *metadata, tsk_size_t metadata_length)\n{\n    return takeset_string(\n        &self->metadata, &self->metadata_length, metadata, metadata_length);\n}\n\n/*************************\n * individual table\n *************************/\n\nstatic void\ntsk_individual_table_free_columns(tsk_individual_table_t *self)\n{\n    tsk_safe_free(self->flags);\n    tsk_safe_free(self->location);\n    tsk_safe_free(self->location_offset);\n    tsk_safe_free(self->parents);\n    tsk_safe_free(self->parents_offset);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_individual_table_free(tsk_individual_table_t *self)\n{\n    tsk_individual_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_individual_table_expand_main_columns(\n    tsk_individual_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column((void **) &self->flags, new_max_rows, sizeof(tsk_flags_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->location_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->parents_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_individual_table_expand_location(\n    tsk_individual_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->location_length, additional_length,\n        self->max_location_length_increment, &self->max_location_length,\n        (void **) &self->location, sizeof(*self->location));\n}\n\nstatic int\ntsk_individual_table_expand_parents(\n    tsk_individual_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->parents_length, additional_length,\n        self->max_parents_length_increment, &self->max_parents_length,\n        (void **) &self->parents, sizeof(*self->parents));\n}\n\nstatic int\ntsk_individual_table_expand_metadata(\n    tsk_individual_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_individual_table_set_max_rows_increment(\n    tsk_individual_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_individual_table_set_max_metadata_length_increment(\n    tsk_individual_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = (tsk_size_t) max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_individual_table_set_max_location_length_increment(\n    tsk_individual_table_t *self, tsk_size_t max_location_length_increment)\n{\n    self->max_location_length_increment = (tsk_size_t) max_location_length_increment;\n    return 0;\n}\n\nint\ntsk_individual_table_set_max_parents_length_increment(\n    tsk_individual_table_t *self, tsk_size_t max_parents_length_increment)\n{\n    self->max_parents_length_increment = (tsk_size_t) max_parents_length_increment;\n    return 0;\n}\n\nint\ntsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_individual_table_t));\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_location_length_increment = 1;\n    self->max_parents_length_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_individual_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_expand_location(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->location_offset[0] = 0;\n    ret = tsk_individual_table_expand_parents(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->parents_offset[0] = 0;\n    ret = tsk_individual_table_expand_metadata(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->metadata_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_location_length_increment = 0;\n    self->max_parents_length_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_individual_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_individual_table_copy(const tsk_individual_table_t *self,\n    tsk_individual_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_individual_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_individual_table_set_columns(dest, self->num_rows, self->flags,\n        self->location, self->location_offset, self->parents, self->parents_offset,\n        self->metadata, self->metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset,\n    const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata,\n    const tsk_size_t *metadata_offset)\n{\n    int ret;\n\n    ret = tsk_individual_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_append_columns(self, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_individual_table_takeset_columns(tsk_individual_table_t *self, tsk_size_t num_rows,\n    tsk_flags_t *flags, double *location, tsk_size_t *location_offset, tsk_id_t *parents,\n    tsk_size_t *parents_offset, char *metadata, tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    ret = check_ragged_column(num_rows, location, location_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, parents, parents_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_individual_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n\n    if (flags == NULL) {\n        /* Flags defaults to all zeros if not specified. The column is often\n         * unused so this is a worthwhile optimisation. */\n        self->flags = tsk_calloc(num_rows, sizeof(*self->flags));\n        if (self->flags == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    } else {\n        self->flags = flags;\n    }\n\n    ret = takeset_ragged_column(num_rows, location, location_offset,\n        (void *) &self->location, &self->location_offset, &self->location_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = takeset_ragged_column(num_rows, parents, parents_offset,\n        (void *) &self->parents, &self->parents_offset, &self->parents_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset,\n    const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata,\n    const tsk_size_t *metadata_offset)\n{\n    int ret;\n    tsk_size_t j, metadata_length, location_length, parents_length;\n\n    if (flags == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((location == NULL) != (location_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((parents == NULL) != (parents_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((metadata == NULL) != (metadata_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = tsk_individual_table_expand_main_columns(self, (tsk_size_t) num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->flags + self->num_rows, flags, num_rows * sizeof(tsk_flags_t));\n    if (location == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->location_offset[self->num_rows + j + 1]\n                = (tsk_size_t) self->location_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, location_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_rows; j++) {\n            self->location_offset[self->num_rows + j]\n                = (tsk_size_t) self->location_length + location_offset[j];\n        }\n        location_length = location_offset[num_rows];\n        ret = tsk_individual_table_expand_location(self, location_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->location + self->location_length, location,\n            location_length * sizeof(double));\n        self->location_length += location_length;\n    }\n    if (parents == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->parents_offset[self->num_rows + j + 1]\n                = (tsk_size_t) self->parents_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, parents_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_rows; j++) {\n            self->parents_offset[self->num_rows + j]\n                = (tsk_size_t) self->parents_length + parents_offset[j];\n        }\n        parents_length = parents_offset[num_rows];\n        ret = tsk_individual_table_expand_parents(self, parents_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->parents + self->parents_length, parents,\n            parents_length * sizeof(tsk_id_t));\n        self->parents_length += parents_length;\n    }\n    if (metadata == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j + 1]\n                = (tsk_size_t) self->metadata_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, metadata_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j]\n                = (tsk_size_t) self->metadata_length + metadata_offset[j];\n        }\n        metadata_length = metadata_offset[num_rows];\n        ret = tsk_individual_table_expand_metadata(self, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->metadata + self->metadata_length, metadata,\n            metadata_length * sizeof(char));\n        self->metadata_length += metadata_length;\n    }\n    self->num_rows += (tsk_size_t) num_rows;\n    self->location_offset[self->num_rows] = self->location_length;\n    self->parents_offset[self->num_rows] = self->parents_length;\n    self->metadata_offset[self->num_rows] = self->metadata_length;\nout:\n    return ret;\n}\n\nstatic tsk_id_t\ntsk_individual_table_add_row_internal(tsk_individual_table_t *self, tsk_flags_t flags,\n    const double *location, tsk_size_t location_length, const tsk_id_t *parents,\n    const tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length)\n{\n    tsk_bug_assert(self->num_rows < self->max_rows);\n    tsk_bug_assert(self->parents_length + parents_length <= self->max_parents_length);\n    tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);\n    tsk_bug_assert(self->location_length + location_length <= self->max_location_length);\n    self->flags[self->num_rows] = flags;\n    tsk_memmove(self->location + self->location_length, location,\n        location_length * sizeof(*self->location));\n    self->location_offset[self->num_rows + 1] = self->location_length + location_length;\n    self->location_length += location_length;\n    tsk_memmove(self->parents + self->parents_length, parents,\n        parents_length * sizeof(*self->parents));\n    self->parents_offset[self->num_rows + 1] = self->parents_length + parents_length;\n    self->parents_length += parents_length;\n    tsk_memmove(self->metadata + self->metadata_length, metadata,\n        metadata_length * sizeof(*self->metadata));\n    self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;\n    self->metadata_length += metadata_length;\n    self->num_rows++;\n    return (tsk_id_t) self->num_rows - 1;\n}\n\ntsk_id_t\ntsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags,\n    const double *location, tsk_size_t location_length, const tsk_id_t *parents,\n    tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n\n    ret = tsk_individual_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_expand_location(self, location_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_expand_parents(self, parents_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_add_row_internal(self, flags, location, location_length,\n        parents, parents_length, metadata, metadata_length);\nout:\n    return ret;\n}\n\nstatic int\ntsk_individual_table_update_row_rewrite(tsk_individual_table_t *self, tsk_id_t index,\n    tsk_flags_t flags, const double *location, tsk_size_t location_length,\n    const tsk_id_t *parents, tsk_size_t parents_length, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_individual_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_individual_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_individual_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_individual_table_add_row(self, flags, location, location_length,\n        parents, parents_length, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_individual_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_individual_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_individual_table_update_row(tsk_individual_table_t *self, tsk_id_t index,\n    tsk_flags_t flags, const double *location, tsk_size_t location_length,\n    const tsk_id_t *parents, tsk_size_t parents_length, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_individual_t current_row;\n\n    ret = tsk_individual_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.location_length == location_length\n        && current_row.parents_length == parents_length\n        && current_row.metadata_length == metadata_length) {\n        self->flags[index] = flags;\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->location[self->location_offset[index]], location,\n            location_length * sizeof(*location));\n        tsk_memmove(&self->parents[self->parents_offset[index]], parents,\n            parents_length * sizeof(*parents));\n        tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n            metadata_length * sizeof(*metadata));\n    } else {\n        ret = tsk_individual_table_update_row_rewrite(self, index, flags, location,\n            location_length, parents, parents_length, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_individual_table_clear(tsk_individual_table_t *self)\n{\n    return tsk_individual_table_truncate(self, 0);\n}\n\nint\ntsk_individual_table_truncate(tsk_individual_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    self->location_length = self->location_offset[num_rows];\n    self->parents_length = self->parents_offset[num_rows];\n    self->metadata_length = self->metadata_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_individual_table_extend(tsk_individual_table_t *self,\n    const tsk_individual_table_t *other, tsk_size_t num_rows,\n    const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_individual_t individual;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_individual_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_individual_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &individual);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_individual_table_add_row(self, individual.flags,\n            individual.location, individual.location_length, individual.parents,\n            individual.parents_length, individual.metadata, individual.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_individual_table_print_state(const tsk_individual_table_t *self, FILE *out)\n{\n    tsk_size_t j, k;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"tsk_individual_tbl: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows          = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"metadata_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    /* We duplicate the dump_text code here because we want to output\n     * the offset columns. */\n    write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    fprintf(out, \"id\\tflags\\tlocation_offset\\tlocation\\t\");\n    fprintf(out, \"parents_offset\\tparents\\t\");\n    fprintf(out, \"metadata_offset\\tmetadata\\n\");\n    for (j = 0; j < self->num_rows; j++) {\n        fprintf(out, \"%lld\\t%lld\\t\", (long long) j, (long long) self->flags[j]);\n        fprintf(out, \"%lld\\t\", (long long) self->location_offset[j]);\n        for (k = self->location_offset[j]; k < self->location_offset[j + 1]; k++) {\n            fprintf(out, \"%f\", self->location[k]);\n            if (k + 1 < self->location_offset[j + 1]) {\n                fprintf(out, \",\");\n            }\n        }\n        fprintf(out, \"\\t\");\n        fprintf(out, \"%lld\\t\", (long long) self->parents_offset[j]);\n        for (k = self->parents_offset[j]; k < self->parents_offset[j + 1]; k++) {\n            fprintf(out, \"%lld\", (long long) self->parents[k]);\n            if (k + 1 < self->parents_offset[j + 1]) {\n                fprintf(out, \",\");\n            }\n        }\n        fprintf(out, \"\\t\");\n        fprintf(out, \"%lld\\t\", (long long) self->metadata_offset[j]);\n        for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) {\n            fprintf(out, \"%c\", self->metadata[k]);\n        }\n        fprintf(out, \"\\n\");\n    }\n}\n\nstatic inline void\ntsk_individual_table_get_row_unsafe(\n    const tsk_individual_table_t *self, tsk_id_t index, tsk_individual_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->flags = self->flags[index];\n    row->location_length\n        = self->location_offset[index + 1] - self->location_offset[index];\n    row->location = self->location + self->location_offset[index];\n    row->parents_length = self->parents_offset[index + 1] - self->parents_offset[index];\n    row->parents = self->parents + self->parents_offset[index];\n    row->metadata_length\n        = self->metadata_offset[index + 1] - self->metadata_offset[index];\n    row->metadata = self->metadata + self->metadata_offset[index];\n    /* Also have referencing individuals here. Should this be a different struct?\n     * See also site. */\n    row->nodes_length = 0;\n    row->nodes = NULL;\n}\n\nint\ntsk_individual_table_get_row(\n    const tsk_individual_table_t *self, tsk_id_t index, tsk_individual_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_individual_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_individual_table_set_metadata_schema(tsk_individual_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out)\n{\n    int ret = TSK_ERR_IO;\n    tsk_size_t j, k;\n    tsk_size_t metadata_len;\n    int err;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"id\\tflags\\tlocation\\tparents\\tmetadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];\n        err = fprintf(out, \"%lld\\t%lld\\t\", (long long) j, (long long) self->flags[j]);\n        if (err < 0) {\n            goto out;\n        }\n        for (k = self->location_offset[j]; k < self->location_offset[j + 1]; k++) {\n            err = fprintf(out, \"%.*g\", TSK_DBL_DECIMAL_DIG, self->location[k]);\n            if (err < 0) {\n                goto out;\n            }\n            if (k + 1 < self->location_offset[j + 1]) {\n                err = fprintf(out, \",\");\n                if (err < 0) {\n                    goto out;\n                }\n            }\n        }\n        err = fprintf(out, \"\\t\");\n        if (err < 0) {\n            goto out;\n        }\n        for (k = self->parents_offset[j]; k < self->parents_offset[j + 1]; k++) {\n            err = fprintf(out, \"%lld\", (long long) self->parents[k]);\n            if (err < 0) {\n                goto out;\n            }\n            if (k + 1 < self->parents_offset[j + 1]) {\n                err = fprintf(out, \",\");\n                if (err < 0) {\n                    goto out;\n                }\n            }\n        }\n        err = fprintf(out, \"\\t%.*s\\n\", (int) metadata_len,\n            self->metadata + self->metadata_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool\ntsk_individual_table_equals(const tsk_individual_table_t *self,\n    const tsk_individual_table_t *other, tsk_flags_t options)\n{\n    bool ret\n        = self->num_rows == other->num_rows\n          && tsk_memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t))\n                 == 0\n          && tsk_memcmp(self->location_offset, other->location_offset,\n                 (self->num_rows + 1) * sizeof(tsk_size_t))\n                 == 0\n          && tsk_memcmp(\n                 self->location, other->location, self->location_length * sizeof(double))\n                 == 0\n          && tsk_memcmp(self->parents_offset, other->parents_offset,\n                 (self->num_rows + 1) * sizeof(tsk_size_t))\n                 == 0\n          && tsk_memcmp(\n                 self->parents, other->parents, self->parents_length * sizeof(tsk_id_t))\n                 == 0;\n\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_individual_table_keep_rows(tsk_individual_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *ret_id_map)\n{\n    int ret = 0;\n    const tsk_size_t current_num_rows = self->num_rows;\n    tsk_size_t j, k, remaining_rows;\n    tsk_id_t pk;\n    tsk_id_t *id_map = ret_id_map;\n    tsk_id_t *restrict parents = self->parents;\n    tsk_size_t *restrict parents_offset = self->parents_offset;\n\n    if (ret_id_map == NULL) {\n        id_map = tsk_malloc(current_num_rows * sizeof(*id_map));\n        if (id_map == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n\n    keep_mask_to_id_map(current_num_rows, keep, id_map);\n\n    /* See notes in tsk_mutation_table_keep_rows for possibilities\n     * on making this more flexible */\n    for (j = 0; j < current_num_rows; j++) {\n        if (keep[j]) {\n            for (k = parents_offset[j]; k < parents_offset[j + 1]; k++) {\n                pk = parents[k];\n                if (pk != TSK_NULL) {\n                    if (pk < 0 || pk >= (tsk_id_t) current_num_rows) {\n                        ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n                        ;\n                        goto out;\n                    }\n                    if (id_map[pk] == TSK_NULL) {\n                        ret = tsk_trace_error(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);\n                        goto out;\n                    }\n                }\n            }\n        }\n    }\n\n    remaining_rows = subset_flags_column(self->flags, current_num_rows, keep);\n    self->parents_length = subset_remap_ragged_id_column(\n        self->parents, self->parents_offset, current_num_rows, keep, id_map);\n    self->location_length = subset_ragged_double_column(\n        self->location, self->location_offset, current_num_rows, keep);\n    if (self->metadata_length > 0) {\n        /* Implementation note: we special case metadata here because\n         * it'll make the common-case of no metadata a bit faster, and\n         * to also potentially support more general use of the\n         * TSK_TABLE_NO_METADATA option. This is done for all the tables\n         * but only commented on here. */\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, current_num_rows, keep);\n    }\n    self->num_rows = remaining_rows;\nout:\n    if (ret_id_map == NULL) {\n        tsk_safe_free(id_map);\n    }\n    return ret;\n}\n\nstatic int\ntsk_individual_table_dump(\n    const tsk_individual_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    const write_table_col_t write_cols[] = {\n        { \"individuals/flags\", (void *) self->flags, self->num_rows,\n            TSK_FLAGS_STORAGE_TYPE },\n        { \"individuals/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"individuals/location\", (void *) self->location, self->location_length,\n            KAS_FLOAT64, self->location_offset, self->num_rows },\n        { \"individuals/parents\", (void *) self->parents, self->parents_length,\n            TSK_ID_STORAGE_TYPE, self->parents_offset, self->num_rows },\n        { \"individuals/metadata\", (void *) self->metadata, self->metadata_length,\n            KAS_UINT8, self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table(store, write_cols, ragged_cols, options);\n}\n\nstatic int\ntsk_individual_table_load(tsk_individual_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    tsk_flags_t *flags = NULL;\n    double *location = NULL;\n    tsk_size_t *location_offset = NULL;\n    tsk_id_t *parents = NULL;\n    tsk_size_t *parents_offset = NULL;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    char *metadata_schema = NULL;\n    tsk_size_t num_rows, location_length, parents_length, metadata_length,\n        metadata_schema_length;\n\n    read_table_col_t cols[] = {\n        { \"individuals/flags\", (void **) &flags, TSK_FLAGS_STORAGE_TYPE, 0 },\n        { .name = NULL },\n    };\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"individuals/location\", (void **) &location, &location_length, KAS_FLOAT64,\n            &location_offset, 0 },\n        { \"individuals/parents\", (void **) &parents, &parents_length,\n            TSK_ID_STORAGE_TYPE, &parents_offset, TSK_COL_OPTIONAL },\n        { \"individuals/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, 0 },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"individuals/metadata_schema\", (void **) &metadata_schema,\n            &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_individual_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_individual_table_takeset_columns(self, num_rows, flags, location,\n        location_offset, parents, parents_offset, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    flags = NULL;\n    location = NULL;\n    location_offset = NULL;\n    parents = NULL;\n    parents_offset = NULL;\n    metadata = NULL;\n    metadata_offset = NULL;\n\nout:\n    free_read_table_mem(cols, ragged_cols, properties);\n    return ret;\n}\n\n/*************************\n * node table\n *************************/\n\nstatic void\ntsk_node_table_free_columns(tsk_node_table_t *self)\n{\n    tsk_safe_free(self->flags);\n    tsk_safe_free(self->time);\n    tsk_safe_free(self->population);\n    tsk_safe_free(self->individual);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_node_table_free(tsk_node_table_t *self)\n{\n    tsk_node_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_node_table_expand_main_columns(tsk_node_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (new_max_rows > self->max_rows) {\n        ret = expand_column((void **) &self->flags, new_max_rows, sizeof(tsk_flags_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->time, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->population, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->individual, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_node_table_expand_metadata(tsk_node_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_node_table_set_max_rows_increment(\n    tsk_node_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_node_table_set_max_metadata_length_increment(\n    tsk_node_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_node_table_init(tsk_node_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_node_table_t));\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_node_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_node_table_expand_metadata(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->metadata_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_node_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_node_table_copy(\n    const tsk_node_table_t *self, tsk_node_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_node_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_node_table_set_columns(dest, self->num_rows, self->flags, self->time,\n        self->population, self->individual, self->metadata, self->metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_node_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_node_table_set_columns(tsk_node_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *time, const tsk_id_t *population,\n    const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n\n    ret = tsk_node_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_node_table_append_columns(\n        self, num_rows, flags, time, population, individual, metadata, metadata_offset);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_node_table_takeset_columns(tsk_node_table_t *self, tsk_size_t num_rows,\n    tsk_flags_t *flags, double *time, tsk_id_t *population, tsk_id_t *individual,\n    char *metadata, tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    if (flags == NULL || time == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_node_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n    self->flags = flags;\n    self->time = time;\n\n    ret = takeset_optional_id_column(num_rows, population, &self->population);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = takeset_optional_id_column(num_rows, individual, &self->individual);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_node_table_append_columns(tsk_node_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *time, const tsk_id_t *population,\n    const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n    tsk_size_t j, metadata_length;\n\n    if (flags == NULL || time == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((metadata == NULL) != (metadata_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = tsk_node_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->time + self->num_rows, time, num_rows * sizeof(double));\n    tsk_memcpy(self->flags + self->num_rows, flags, num_rows * sizeof(tsk_flags_t));\n    if (metadata == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, metadata_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j]\n                = (tsk_size_t) self->metadata_length + metadata_offset[j];\n        }\n        metadata_length = metadata_offset[num_rows];\n        ret = tsk_node_table_expand_metadata(self, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->metadata + self->metadata_length, metadata,\n            metadata_length * sizeof(char));\n        self->metadata_length += metadata_length;\n    }\n    if (population == NULL) {\n        /* Set population to NULL_POPULATION (-1) if not specified */\n        tsk_memset(self->population + self->num_rows, 0xff, num_rows * sizeof(tsk_id_t));\n    } else {\n        tsk_memcpy(\n            self->population + self->num_rows, population, num_rows * sizeof(tsk_id_t));\n    }\n    if (individual == NULL) {\n        /* Set individual to NULL_INDIVIDUAL (-1) if not specified */\n        tsk_memset(self->individual + self->num_rows, 0xff, num_rows * sizeof(tsk_id_t));\n    } else {\n        tsk_memcpy(\n            self->individual + self->num_rows, individual, num_rows * sizeof(tsk_id_t));\n    }\n    self->num_rows += (tsk_size_t) num_rows;\n    self->metadata_offset[self->num_rows] = self->metadata_length;\nout:\n    return ret;\n}\n\nstatic tsk_id_t\ntsk_node_table_add_row_internal(tsk_node_table_t *self, tsk_flags_t flags, double time,\n    tsk_id_t population, tsk_id_t individual, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    tsk_bug_assert(self->num_rows < self->max_rows);\n    tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);\n    tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);\n    self->flags[self->num_rows] = flags;\n    self->time[self->num_rows] = time;\n    self->population[self->num_rows] = population;\n    self->individual[self->num_rows] = individual;\n    self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;\n    self->metadata_length += metadata_length;\n    self->num_rows++;\n    return (tsk_id_t) self->num_rows - 1;\n}\n\ntsk_id_t\ntsk_node_table_add_row(tsk_node_table_t *self, tsk_flags_t flags, double time,\n    tsk_id_t population, tsk_id_t individual, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n\n    ret = tsk_node_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_node_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_node_table_add_row_internal(\n        self, flags, time, population, individual, metadata, metadata_length);\nout:\n    return ret;\n}\n\nstatic int\ntsk_node_table_update_row_rewrite(tsk_node_table_t *self, tsk_id_t index,\n    tsk_flags_t flags, double time, tsk_id_t population, tsk_id_t individual,\n    const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_node_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_node_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_node_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_node_table_add_row(\n        self, flags, time, population, individual, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_node_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_node_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_node_table_update_row(tsk_node_table_t *self, tsk_id_t index, tsk_flags_t flags,\n    double time, tsk_id_t population, tsk_id_t individual, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_node_t current_row;\n\n    ret = tsk_node_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.metadata_length == metadata_length) {\n        self->flags[index] = flags;\n        self->time[index] = time;\n        self->population[index] = population;\n        self->individual[index] = individual;\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n            metadata_length * sizeof(*metadata));\n    } else {\n        ret = tsk_node_table_update_row_rewrite(\n            self, index, flags, time, population, individual, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_node_table_clear(tsk_node_table_t *self)\n{\n    return tsk_node_table_truncate(self, 0);\n}\n\nint\ntsk_node_table_truncate(tsk_node_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    self->metadata_length = self->metadata_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_node_t node;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_node_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_node_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &node);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_node_table_add_row(self, node.flags, node.time, node.population,\n            node.individual, node.metadata, node.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_node_table_print_state(const tsk_node_table_t *self, FILE *out)\n{\n    tsk_size_t j, k;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"tsk_node_tbl: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows          = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"metadata_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    /* We duplicate the dump_text code here for simplicity because we want to output\n     * the flags column directly. */\n    write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    fprintf(out, \"id\\tflags\\ttime\\tpopulation\\tindividual\\tmetadata_offset\\tmetadata\\n\");\n    for (j = 0; j < self->num_rows; j++) {\n        fprintf(out, \"%lld\\t%lld\\t%f\\t%lld\\t%lld\\t%lld\\t\", (long long) j,\n            (long long) self->flags[j], self->time[j], (long long) self->population[j],\n            (long long) self->individual[j], (long long) self->metadata_offset[j]);\n        for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) {\n            fprintf(out, \"%c\", self->metadata[k]);\n        }\n        fprintf(out, \"\\n\");\n    }\n    tsk_bug_assert(self->metadata_offset[0] == 0);\n    tsk_bug_assert(self->metadata_offset[self->num_rows] == self->metadata_length);\n}\n\nint\ntsk_node_table_set_metadata_schema(tsk_node_table_t *self, const char *metadata_schema,\n    tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_node_table_dump_text(const tsk_node_table_t *self, FILE *out)\n{\n    int ret = TSK_ERR_IO;\n    tsk_size_t j;\n    tsk_size_t metadata_len;\n    int err;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"id\\tis_sample\\ttime\\tpopulation\\tindividual\\tmetadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];\n        err = fprintf(out, \"%lld\\t%lld\\t%f\\t%lld\\t%lld\\t%.*s\\n\", (long long) j,\n            (long long) (self->flags[j] & TSK_NODE_IS_SAMPLE), self->time[j],\n            (long long) self->population[j], (long long) self->individual[j],\n            (int) metadata_len, self->metadata + self->metadata_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool\ntsk_node_table_equals(\n    const tsk_node_table_t *self, const tsk_node_table_t *other, tsk_flags_t options)\n{\n    bool ret\n        = self->num_rows == other->num_rows\n          && tsk_memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0\n          && tsk_memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t))\n                 == 0\n          && tsk_memcmp(\n                 self->population, other->population, self->num_rows * sizeof(tsk_id_t))\n                 == 0\n          && tsk_memcmp(\n                 self->individual, other->individual, self->num_rows * sizeof(tsk_id_t))\n                 == 0;\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nstatic inline void\ntsk_node_table_get_row_unsafe(\n    const tsk_node_table_t *self, tsk_id_t index, tsk_node_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->flags = self->flags[index];\n    row->time = self->time[index];\n    row->population = self->population[index];\n    row->individual = self->individual[index];\n    row->metadata_length\n        = self->metadata_offset[index + 1] - self->metadata_offset[index];\n    row->metadata = self->metadata + self->metadata_offset[index];\n}\n\nint\ntsk_node_table_get_row(const tsk_node_table_t *self, tsk_id_t index, tsk_node_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_node_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_node_table_keep_rows(tsk_node_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)\n{\n    int ret = 0;\n    tsk_size_t remaining_rows;\n\n    if (id_map != NULL) {\n        keep_mask_to_id_map(self->num_rows, keep, id_map);\n    }\n\n    remaining_rows = subset_flags_column(self->flags, self->num_rows, keep);\n    subset_double_column(self->time, self->num_rows, keep);\n    subset_id_column(self->population, self->num_rows, keep);\n    subset_id_column(self->individual, self->num_rows, keep);\n    if (self->metadata_length > 0) {\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, self->num_rows, keep);\n    }\n    self->num_rows = remaining_rows;\n    return ret;\n}\n\nstatic int\ntsk_node_table_dump(const tsk_node_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    const write_table_col_t cols[] = {\n        { \"nodes/time\", (void *) self->time, self->num_rows, KAS_FLOAT64 },\n        { \"nodes/flags\", (void *) self->flags, self->num_rows, TSK_FLAGS_STORAGE_TYPE },\n        { \"nodes/population\", (void *) self->population, self->num_rows,\n            TSK_ID_STORAGE_TYPE },\n        { \"nodes/individual\", (void *) self->individual, self->num_rows,\n            TSK_ID_STORAGE_TYPE },\n        { \"nodes/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"nodes/metadata\", (void *) self->metadata, self->metadata_length, KAS_UINT8,\n            self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table(store, cols, ragged_cols, options);\n}\n\nstatic int\ntsk_node_table_load(tsk_node_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    char *metadata_schema = NULL;\n    double *time = NULL;\n    tsk_flags_t *flags = NULL;\n    tsk_id_t *population = NULL;\n    tsk_id_t *individual = NULL;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    tsk_size_t num_rows, metadata_length, metadata_schema_length;\n    read_table_col_t cols[] = {\n        { \"nodes/time\", (void **) &time, KAS_FLOAT64, 0 },\n        { \"nodes/flags\", (void **) &flags, TSK_FLAGS_STORAGE_TYPE, 0 },\n        { \"nodes/population\", (void **) &population, TSK_ID_STORAGE_TYPE, 0 },\n        { \"nodes/individual\", (void **) &individual, TSK_ID_STORAGE_TYPE, 0 },\n        { .name = NULL },\n    };\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"nodes/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, 0 },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"nodes/metadata_schema\", (void **) &metadata_schema, &metadata_schema_length,\n            KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_node_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_node_table_takeset_columns(\n        self, num_rows, flags, time, population, individual, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    flags = NULL;\n    time = NULL;\n    population = NULL;\n    individual = NULL;\n    metadata = NULL;\n    metadata_offset = NULL;\nout:\n    free_read_table_mem(cols, ragged_cols, properties);\n    return ret;\n}\n\n/*************************\n * edge table\n *************************/\n\nstatic void\ntsk_edge_table_free_columns(tsk_edge_table_t *self)\n{\n    tsk_safe_free(self->left);\n    tsk_safe_free(self->right);\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->child);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_edge_table_free(tsk_edge_table_t *self)\n{\n    tsk_edge_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_edge_table_has_metadata(const tsk_edge_table_t *self)\n{\n    return !(self->options & TSK_TABLE_NO_METADATA);\n}\n\nstatic int\ntsk_edge_table_expand_main_columns(tsk_edge_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column((void **) &self->left, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->right, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->parent, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->child, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        if (tsk_edge_table_has_metadata(self)) {\n            ret = expand_column(\n                (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n            if (ret != 0) {\n                goto out;\n            }\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_edge_table_expand_metadata(tsk_edge_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_edge_table_set_max_rows_increment(\n    tsk_edge_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_edge_table_set_max_metadata_length_increment(\n    tsk_edge_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_edge_table_init(tsk_edge_table_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(*self));\n    self->options = options;\n\n    /* Allocate space for one row initially, ensuring we always have valid\n     * pointers even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_edge_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    if (tsk_edge_table_has_metadata(self)) {\n        ret = tsk_edge_table_expand_metadata(self, 1);\n        if (ret != 0) {\n            goto out;\n        }\n        self->metadata_offset[0] = 0;\n    }\n    self->max_rows_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_edge_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\ntsk_id_t\ntsk_edge_table_add_row(tsk_edge_table_t *self, double left, double right,\n    tsk_id_t parent, tsk_id_t child, const char *metadata, tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n\n    if (metadata_length > 0 && !tsk_edge_table_has_metadata(self)) {\n        ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);\n        goto out;\n    }\n\n    ret = tsk_edge_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_bug_assert(self->num_rows < self->max_rows);\n    self->left[self->num_rows] = left;\n    self->right[self->num_rows] = right;\n    self->parent[self->num_rows] = parent;\n    self->child[self->num_rows] = child;\n\n    if (tsk_edge_table_has_metadata(self)) {\n        ret = tsk_edge_table_expand_metadata(self, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_bug_assert(\n            self->metadata_length + metadata_length <= self->max_metadata_length);\n        tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);\n        self->metadata_offset[self->num_rows + 1]\n            = self->metadata_length + metadata_length;\n        self->metadata_length += metadata_length;\n    }\n    ret = (tsk_id_t) self->num_rows;\n    self->num_rows++;\nout:\n    return ret;\n}\n\nstatic int\ntsk_edge_table_update_row_rewrite(tsk_edge_table_t *self, tsk_id_t index, double left,\n    double right, tsk_id_t parent, tsk_id_t child, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_edge_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_edge_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_edge_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_edge_table_add_row(\n        self, left, right, parent, child, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_edge_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_edge_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_edge_table_update_row(tsk_edge_table_t *self, tsk_id_t index, double left,\n    double right, tsk_id_t parent, tsk_id_t child, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_edge_t current_row;\n\n    ret = tsk_edge_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.metadata_length == metadata_length) {\n        self->left[index] = left;\n        self->right[index] = right;\n        self->parent[index] = parent;\n        self->child[index] = child;\n        if (tsk_edge_table_has_metadata(self)) {\n            /* Note: important to use tsk_memmove here as we may be provided pointers\n             * to the column memory as input via get_row */\n            tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n                metadata_length * sizeof(*metadata));\n        }\n    } else {\n        ret = tsk_edge_table_update_row_rewrite(\n            self, index, left, right, parent, child, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_edge_table_copy(\n    const tsk_edge_table_t *self, tsk_edge_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_edge_table_init(dest, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    /* We can't use TSK_TABLE_NO_METADATA in dest if metadata_length is non-zero.\n     * This also captures the case where TSK_TABLE_NO_METADATA is set on this table.\n     */\n    if (self->metadata_length > 0 && !tsk_edge_table_has_metadata(dest)) {\n        ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);\n        goto out;\n    }\n    if (tsk_edge_table_has_metadata(dest)) {\n        metadata = self->metadata;\n        metadata_offset = self->metadata_offset;\n    }\n    ret = tsk_edge_table_set_columns(dest, self->num_rows, self->left, self->right,\n        self->parent, self->child, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint\ntsk_edge_table_set_columns(tsk_edge_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *parent,\n    const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    ret = tsk_edge_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_append_columns(\n        self, num_rows, left, right, parent, child, metadata, metadata_offset);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_edge_table_takeset_columns(tsk_edge_table_t *self, tsk_size_t num_rows, double *left,\n    double *right, tsk_id_t *parent, tsk_id_t *child, char *metadata,\n    tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    if (left == NULL || right == NULL || parent == NULL || child == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (metadata != NULL && !tsk_edge_table_has_metadata(self)) {\n        ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_edge_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n    self->left = left;\n    self->right = right;\n    self->parent = parent;\n    self->child = child;\n\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_edge_table_append_columns(tsk_edge_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *parent,\n    const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n    tsk_size_t j, metadata_length;\n\n    if (left == NULL || right == NULL || parent == NULL || child == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((metadata == NULL) != (metadata_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (metadata != NULL && !tsk_edge_table_has_metadata(self)) {\n        ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);\n        goto out;\n    }\n\n    ret = tsk_edge_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->left + self->num_rows, left, num_rows * sizeof(double));\n    tsk_memcpy(self->right + self->num_rows, right, num_rows * sizeof(double));\n    tsk_memcpy(self->parent + self->num_rows, parent, num_rows * sizeof(tsk_id_t));\n    tsk_memcpy(self->child + self->num_rows, child, num_rows * sizeof(tsk_id_t));\n    if (tsk_edge_table_has_metadata(self)) {\n        if (metadata == NULL) {\n            for (j = 0; j < num_rows; j++) {\n                self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;\n            }\n        } else {\n            ret = check_offsets(num_rows, metadata_offset, 0, false);\n            if (ret != 0) {\n                goto out;\n            }\n            for (j = 0; j < num_rows; j++) {\n                self->metadata_offset[self->num_rows + j]\n                    = (tsk_size_t) self->metadata_length + metadata_offset[j];\n            }\n            metadata_length = metadata_offset[num_rows];\n            ret = tsk_edge_table_expand_metadata(self, metadata_length);\n            if (ret != 0) {\n                goto out;\n            }\n            tsk_memcpy(self->metadata + self->metadata_length, metadata,\n                metadata_length * sizeof(char));\n            self->metadata_length += metadata_length;\n        }\n        self->num_rows += num_rows;\n        self->metadata_offset[self->num_rows] = self->metadata_length;\n    } else {\n        self->num_rows += num_rows;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_edge_table_clear(tsk_edge_table_t *self)\n{\n    return tsk_edge_table_truncate(self, 0);\n}\n\nint\ntsk_edge_table_truncate(tsk_edge_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    if (tsk_edge_table_has_metadata(self)) {\n        self->metadata_length = self->metadata_offset[num_rows];\n    }\nout:\n    return ret;\n}\n\nint\ntsk_edge_table_extend(tsk_edge_table_t *self, const tsk_edge_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_edge_t edge;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_edge_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_edge_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &edge);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_edge_table_add_row(self, edge.left, edge.right, edge.parent,\n            edge.child, edge.metadata, edge.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic inline void\ntsk_edge_table_get_row_unsafe(\n    const tsk_edge_table_t *self, tsk_id_t index, tsk_edge_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->left = self->left[index];\n    row->right = self->right[index];\n    row->parent = self->parent[index];\n    row->child = self->child[index];\n    if (tsk_edge_table_has_metadata(self)) {\n        row->metadata_length\n            = self->metadata_offset[index + 1] - self->metadata_offset[index];\n        row->metadata = self->metadata + self->metadata_offset[index];\n    } else {\n        row->metadata_length = 0;\n        row->metadata = NULL;\n    }\n}\n\nint\ntsk_edge_table_get_row(const tsk_edge_table_t *self, tsk_id_t index, tsk_edge_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_edge_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nvoid\ntsk_edge_table_print_state(const tsk_edge_table_t *self, FILE *out)\n{\n    int ret;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"edge_table: %p:\\n\", (const void *) self);\n    fprintf(out, \"options         = 0x%X\\n\", self->options);\n    fprintf(out, \"num_rows        = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"metadata_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    ret = tsk_edge_table_dump_text(self, out);\n    tsk_bug_assert(ret == 0);\n}\n\nint\ntsk_edge_table_set_metadata_schema(tsk_edge_table_t *self, const char *metadata_schema,\n    tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_edge_table_dump_text(const tsk_edge_table_t *self, FILE *out)\n{\n    tsk_id_t j;\n    int ret = TSK_ERR_IO;\n    tsk_edge_t row;\n    int err;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"id\\tleft\\tright\\tparent\\tchild\\tmetadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < (tsk_id_t) self->num_rows; j++) {\n        tsk_edge_table_get_row_unsafe(self, j, &row);\n        err = fprintf(out, \"%lld\\t%.3f\\t%.3f\\t%lld\\t%lld\\t%.*s\\n\", (long long) j,\n            row.left, row.right, (long long) row.parent, (long long) row.child,\n            (int) row.metadata_length, row.metadata);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool\ntsk_edge_table_equals(\n    const tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_flags_t options)\n{\n    bool metadata_equal;\n    bool ret\n        = self->num_rows == other->num_rows\n          && tsk_memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0\n          && tsk_memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0\n          && tsk_memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t))\n                 == 0\n          && tsk_memcmp(self->child, other->child, self->num_rows * sizeof(tsk_id_t))\n                 == 0;\n\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n        metadata_equal = false;\n        if (self->metadata_length == other->metadata_length) {\n            if (tsk_edge_table_has_metadata(self)\n                && tsk_edge_table_has_metadata(other)) {\n                metadata_equal\n                    = tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                          (self->num_rows + 1) * sizeof(tsk_size_t))\n                          == 0\n                      && tsk_memcmp(self->metadata, other->metadata,\n                             self->metadata_length * sizeof(char))\n                             == 0;\n            } else {\n                /* The only way that the metadata lengths can be equal (which\n                 * we've already tested) and either one or the other of the tables\n                 * hasn't got metadata is if they are both zero length. */\n                tsk_bug_assert(self->metadata_length == 0);\n                metadata_equal = true;\n            }\n        }\n        ret = ret && metadata_equal;\n    }\n    return ret;\n}\n\nint\ntsk_edge_table_keep_rows(tsk_edge_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)\n{\n    int ret = 0;\n    tsk_size_t remaining_rows;\n\n    if (id_map != NULL) {\n        keep_mask_to_id_map(self->num_rows, keep, id_map);\n    }\n    remaining_rows = subset_double_column(self->left, self->num_rows, keep);\n    subset_double_column(self->right, self->num_rows, keep);\n    subset_id_column(self->parent, self->num_rows, keep);\n    subset_id_column(self->child, self->num_rows, keep);\n    if (self->metadata_length > 0) {\n        tsk_bug_assert(!(self->options & TSK_TABLE_NO_METADATA));\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, self->num_rows, keep);\n    }\n    self->num_rows = remaining_rows;\n    return ret;\n}\n\nstatic int\ntsk_edge_table_dump(const tsk_edge_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    int ret = 0;\n    const write_table_col_t write_cols[] = {\n        { \"edges/left\", (void *) self->left, self->num_rows, KAS_FLOAT64 },\n        { \"edges/right\", (void *) self->right, self->num_rows, KAS_FLOAT64 },\n        { \"edges/parent\", (void *) self->parent, self->num_rows, TSK_ID_STORAGE_TYPE },\n        { \"edges/child\", (void *) self->child, self->num_rows, TSK_ID_STORAGE_TYPE },\n        { \"edges/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"edges/metadata\", (void *) self->metadata, self->metadata_length, KAS_UINT8,\n            self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    /* TODO when the general code has been updated to only write out the\n     * column when the lenght of ragged columns is > 0 we can get rid of\n     * this special case here and use write_table. */\n    ret = write_table_cols(store, write_cols, options);\n    if (ret != 0) {\n        goto out;\n    }\n    if (tsk_edge_table_has_metadata(self)) {\n        ret = write_table_ragged_cols(store, ragged_cols, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_edge_table_load(tsk_edge_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    char *metadata_schema = NULL;\n    double *left = NULL;\n    double *right = NULL;\n    tsk_id_t *parent = NULL;\n    tsk_id_t *child = NULL;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    tsk_size_t num_rows, metadata_length, metadata_schema_length;\n\n    read_table_col_t cols[] = {\n        { \"edges/left\", (void **) &left, KAS_FLOAT64, 0 },\n        { \"edges/right\", (void **) &right, KAS_FLOAT64, 0 },\n        { \"edges/parent\", (void **) &parent, TSK_ID_STORAGE_TYPE, 0 },\n        { \"edges/child\", (void **) &child, TSK_ID_STORAGE_TYPE, 0 },\n        { .name = NULL },\n    };\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"edges/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"edges/metadata_schema\", (void **) &metadata_schema, &metadata_schema_length,\n            KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_edge_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_edge_table_takeset_columns(\n        self, num_rows, left, right, parent, child, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    left = NULL;\n    right = NULL;\n    parent = NULL;\n    child = NULL;\n    metadata = NULL;\n    metadata_offset = NULL;\nout:\n    free_read_table_mem(cols, ragged_cols, properties);\n    return ret;\n}\n\nint\ntsk_edge_table_squash(tsk_edge_table_t *self)\n{\n    int k;\n    int ret = 0;\n    tsk_edge_t *edges = NULL;\n    tsk_size_t num_output_edges;\n\n    if (self->metadata_length > 0) {\n        ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n        goto out;\n    }\n\n    edges = tsk_malloc(self->num_rows * sizeof(tsk_edge_t));\n    if (edges == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (k = 0; k < (int) self->num_rows; k++) {\n        edges[k].left = self->left[k];\n        edges[k].right = self->right[k];\n        edges[k].parent = self->parent[k];\n        edges[k].child = self->child[k];\n        edges[k].metadata_length = 0;\n    }\n\n    ret = tsk_squash_edges(edges, self->num_rows, &num_output_edges);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_edge_table_clear(self);\n    tsk_bug_assert(num_output_edges <= self->max_rows);\n    self->num_rows = num_output_edges;\n    for (k = 0; k < (int) num_output_edges; k++) {\n        self->left[k] = edges[k].left;\n        self->right[k] = edges[k].right;\n        self->parent[k] = edges[k].parent;\n        self->child[k] = edges[k].child;\n    }\nout:\n    tsk_safe_free(edges);\n    return ret;\n}\n\n/*************************\n * site table\n *************************/\n\nstatic void\ntsk_site_table_free_columns(tsk_site_table_t *self)\n{\n    tsk_safe_free(self->position);\n    tsk_safe_free(self->ancestral_state);\n    tsk_safe_free(self->ancestral_state_offset);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_site_table_free(tsk_site_table_t *self)\n{\n    tsk_site_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_site_table_expand_main_columns(tsk_site_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column((void **) &self->position, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->ancestral_state_offset, new_max_rows + 1,\n            sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_site_table_expand_ancestral_state(\n    tsk_site_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->ancestral_state_length, additional_length,\n        self->max_ancestral_state_length_increment, &self->max_ancestral_state_length,\n        (void **) &self->ancestral_state, sizeof(*self->ancestral_state));\n}\n\nstatic int\ntsk_site_table_expand_metadata(tsk_site_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_site_table_set_max_rows_increment(\n    tsk_site_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_site_table_set_max_metadata_length_increment(\n    tsk_site_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_site_table_set_max_ancestral_state_length_increment(\n    tsk_site_table_t *self, tsk_size_t max_ancestral_state_length_increment)\n{\n    self->max_ancestral_state_length_increment = max_ancestral_state_length_increment;\n    return 0;\n}\n\nint\ntsk_site_table_init(tsk_site_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_site_table_t));\n\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_ancestral_state_length_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_site_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_expand_ancestral_state(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_expand_metadata(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->ancestral_state_offset[0] = 0;\n    self->metadata_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_ancestral_state_length_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_site_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\ntsk_id_t\ntsk_site_table_add_row(tsk_site_table_t *self, double position,\n    const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n    tsk_size_t ancestral_state_offset, metadata_offset;\n\n    ret = tsk_site_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->position[self->num_rows] = position;\n\n    ancestral_state_offset = (tsk_size_t) self->ancestral_state_length;\n    tsk_bug_assert(\n        self->ancestral_state_offset[self->num_rows] == ancestral_state_offset);\n    ret = tsk_site_table_expand_ancestral_state(self, ancestral_state_length);\n    if (ret != 0) {\n        goto out;\n    }\n    self->ancestral_state_length += ancestral_state_length;\n    tsk_memmove(self->ancestral_state + ancestral_state_offset, ancestral_state,\n        ancestral_state_length);\n    self->ancestral_state_offset[self->num_rows + 1] = self->ancestral_state_length;\n\n    metadata_offset = (tsk_size_t) self->metadata_length;\n    tsk_bug_assert(self->metadata_offset[self->num_rows] == metadata_offset);\n    ret = tsk_site_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    self->metadata_length += metadata_length;\n    tsk_memmove(self->metadata + metadata_offset, metadata, metadata_length);\n    self->metadata_offset[self->num_rows + 1] = self->metadata_length;\n\n    ret = (tsk_id_t) self->num_rows;\n    self->num_rows++;\nout:\n    return ret;\n}\n\nstatic int\ntsk_site_table_update_row_rewrite(tsk_site_table_t *self, tsk_id_t index,\n    double position, const char *ancestral_state, tsk_size_t ancestral_state_length,\n    const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_site_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_site_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_site_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_site_table_add_row(self, position, ancestral_state,\n        ancestral_state_length, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_site_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_site_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_site_table_update_row(tsk_site_table_t *self, tsk_id_t index, double position,\n    const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_site_t current_row;\n\n    ret = tsk_site_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.metadata_length == metadata_length\n        && current_row.ancestral_state_length == ancestral_state_length) {\n        self->position[index] = position;\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->ancestral_state[self->ancestral_state_offset[index]],\n            ancestral_state, ancestral_state_length * sizeof(*ancestral_state));\n        tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n            metadata_length * sizeof(*metadata));\n    } else {\n        ret = tsk_site_table_update_row_rewrite(self, index, position, ancestral_state,\n            ancestral_state_length, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_site_table_append_columns(tsk_site_table_t *self, tsk_size_t num_rows,\n    const double *position, const char *ancestral_state,\n    const tsk_size_t *ancestral_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n    tsk_size_t j, ancestral_state_length, metadata_length;\n\n    if (position == NULL || ancestral_state == NULL || ancestral_state_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((metadata == NULL) != (metadata_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    ret = tsk_site_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->position + self->num_rows, position, num_rows * sizeof(double));\n\n    /* Metadata column */\n    if (metadata == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, metadata_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        metadata_length = metadata_offset[num_rows];\n        ret = tsk_site_table_expand_metadata(self, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->metadata + self->metadata_length, metadata,\n            metadata_length * sizeof(char));\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j]\n                = self->metadata_length + metadata_offset[j];\n        }\n        self->metadata_length += metadata_length;\n    }\n    self->metadata_offset[self->num_rows + num_rows] = self->metadata_length;\n\n    /* Ancestral state column */\n    ret = check_offsets(num_rows, ancestral_state_offset, 0, false);\n    if (ret != 0) {\n        goto out;\n    }\n    ancestral_state_length = ancestral_state_offset[num_rows];\n    ret = tsk_site_table_expand_ancestral_state(self, ancestral_state_length);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->ancestral_state + self->ancestral_state_length, ancestral_state,\n        ancestral_state_length * sizeof(char));\n    for (j = 0; j < num_rows; j++) {\n        self->ancestral_state_offset[self->num_rows + j]\n            = self->ancestral_state_length + ancestral_state_offset[j];\n    }\n    self->ancestral_state_length += ancestral_state_length;\n    self->ancestral_state_offset[self->num_rows + num_rows]\n        = self->ancestral_state_length;\n\n    self->num_rows += num_rows;\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_site_table_copy(\n    const tsk_site_table_t *self, tsk_site_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_site_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_site_table_set_columns(dest, self->num_rows, self->position,\n        self->ancestral_state, self->ancestral_state_offset, self->metadata,\n        self->metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint\ntsk_site_table_set_columns(tsk_site_table_t *self, tsk_size_t num_rows,\n    const double *position, const char *ancestral_state,\n    const tsk_size_t *ancestral_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    ret = tsk_site_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_append_columns(self, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\nout:\n    return ret;\n}\n\nint\ntsk_site_table_takeset_columns(tsk_site_table_t *self, tsk_size_t num_rows,\n    double *position, char *ancestral_state, tsk_size_t *ancestral_state_offset,\n    char *metadata, tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    if (position == NULL || ancestral_state == NULL || ancestral_state_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, ancestral_state, ancestral_state_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_site_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n    self->position = position;\n\n    ret = takeset_ragged_column(num_rows, ancestral_state, ancestral_state_offset,\n        (void *) &self->ancestral_state, &self->ancestral_state_offset,\n        &self->ancestral_state_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nbool\ntsk_site_table_equals(\n    const tsk_site_table_t *self, const tsk_site_table_t *other, tsk_flags_t options)\n{\n    bool ret\n        = self->num_rows == other->num_rows\n          && self->ancestral_state_length == other->ancestral_state_length\n          && tsk_memcmp(self->position, other->position, self->num_rows * sizeof(double))\n                 == 0\n          && tsk_memcmp(self->ancestral_state_offset, other->ancestral_state_offset,\n                 (self->num_rows + 1) * sizeof(tsk_size_t))\n                 == 0\n          && tsk_memcmp(self->ancestral_state, other->ancestral_state,\n                 self->ancestral_state_length * sizeof(char))\n                 == 0;\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_site_table_clear(tsk_site_table_t *self)\n{\n    return tsk_site_table_truncate(self, 0);\n}\n\nint\ntsk_site_table_truncate(tsk_site_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    self->ancestral_state_length = self->ancestral_state_offset[num_rows];\n    self->metadata_length = self->metadata_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_site_table_extend(tsk_site_table_t *self, const tsk_site_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_site_t site;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_site_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_site_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &site);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_site_table_add_row(self, site.position, site.ancestral_state,\n            site.ancestral_state_length, site.metadata, site.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_site_table_print_state(const tsk_site_table_t *self, FILE *out)\n{\n    int ret;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"site_table: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows = %lld\\t(max= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"ancestral_state_length = %lld\\t(max= %lld\\tincrement = %lld)\\n\",\n        (long long) self->ancestral_state_length,\n        (long long) self->max_ancestral_state_length,\n        (long long) self->max_ancestral_state_length_increment);\n    fprintf(out, \"metadata_length = %lld(\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    ret = tsk_site_table_dump_text(self, out);\n    tsk_bug_assert(ret == 0);\n\n    tsk_bug_assert(self->ancestral_state_offset[0] == 0);\n    tsk_bug_assert(\n        self->ancestral_state_length == self->ancestral_state_offset[self->num_rows]);\n    tsk_bug_assert(self->metadata_offset[0] == 0);\n    tsk_bug_assert(self->metadata_length == self->metadata_offset[self->num_rows]);\n}\n\nstatic inline void\ntsk_site_table_get_row_unsafe(\n    const tsk_site_table_t *self, tsk_id_t index, tsk_site_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->position = self->position[index];\n    row->ancestral_state_length\n        = self->ancestral_state_offset[index + 1] - self->ancestral_state_offset[index];\n    row->ancestral_state = self->ancestral_state + self->ancestral_state_offset[index];\n    row->metadata_length\n        = self->metadata_offset[index + 1] - self->metadata_offset[index];\n    row->metadata = self->metadata + self->metadata_offset[index];\n    /* This struct has a placeholder for mutations. Probably should be separate\n     * structs for this (tsk_site_table_row_t?) */\n    row->mutations_length = 0;\n    row->mutations = NULL;\n}\n\nint\ntsk_site_table_get_row(const tsk_site_table_t *self, tsk_id_t index, tsk_site_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_site_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_site_table_set_metadata_schema(tsk_site_table_t *self, const char *metadata_schema,\n    tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_site_table_dump_text(const tsk_site_table_t *self, FILE *out)\n{\n    tsk_size_t j;\n    int ret = TSK_ERR_IO;\n    int err;\n    tsk_size_t ancestral_state_len, metadata_len;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"id\\tposition\\tancestral_state\\tmetadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        ancestral_state_len\n            = self->ancestral_state_offset[j + 1] - self->ancestral_state_offset[j];\n        metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];\n        err = fprintf(out, \"%lld\\t%f\\t%.*s\\t%.*s\\n\", (long long) j, self->position[j],\n            (int) ancestral_state_len,\n            self->ancestral_state + self->ancestral_state_offset[j], (int) metadata_len,\n            self->metadata + self->metadata_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nint\ntsk_site_table_keep_rows(tsk_site_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)\n{\n    int ret = 0;\n    tsk_size_t remaining_rows;\n\n    if (id_map != NULL) {\n        keep_mask_to_id_map(self->num_rows, keep, id_map);\n    }\n\n    remaining_rows = subset_double_column(self->position, self->num_rows, keep);\n    self->ancestral_state_length = subset_ragged_char_column(\n        self->ancestral_state, self->ancestral_state_offset, self->num_rows, keep);\n    if (self->metadata_length > 0) {\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, self->num_rows, keep);\n    }\n    self->num_rows = remaining_rows;\n    return ret;\n}\n\nstatic int\ntsk_site_table_dump(const tsk_site_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    const write_table_col_t cols[] = {\n        { \"sites/position\", (void *) self->position, self->num_rows, KAS_FLOAT64 },\n        { \"sites/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"sites/ancestral_state\", (void *) self->ancestral_state,\n            self->ancestral_state_length, KAS_UINT8, self->ancestral_state_offset,\n            self->num_rows },\n        { \"sites/metadata\", (void *) self->metadata, self->metadata_length, KAS_UINT8,\n            self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table(store, cols, ragged_cols, options);\n}\n\nstatic int\ntsk_site_table_load(tsk_site_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    char *metadata_schema = NULL;\n    double *position = NULL;\n    char *ancestral_state = NULL;\n    tsk_size_t *ancestral_state_offset = NULL;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    tsk_size_t num_rows, ancestral_state_length, metadata_length, metadata_schema_length;\n\n    read_table_col_t cols[] = {\n        { \"sites/position\", (void **) &position, KAS_FLOAT64, 0 },\n        { .name = NULL },\n    };\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"sites/ancestral_state\", (void **) &ancestral_state, &ancestral_state_length,\n            KAS_UINT8, &ancestral_state_offset, 0 },\n        { \"sites/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, 0 },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"sites/metadata_schema\", (void **) &metadata_schema, &metadata_schema_length,\n            KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_site_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_site_table_takeset_columns(self, num_rows, position, ancestral_state,\n        ancestral_state_offset, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    position = NULL;\n    ancestral_state = NULL;\n    ancestral_state_offset = NULL;\n    metadata = NULL;\n    metadata_offset = NULL;\n\nout:\n    free_read_table_mem(cols, ragged_cols, properties);\n    return ret;\n}\n\n/*************************\n * mutation table\n *************************/\n\nstatic void\ntsk_mutation_table_free_columns(tsk_mutation_table_t *self)\n{\n    tsk_safe_free(self->node);\n    tsk_safe_free(self->site);\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->time);\n    tsk_safe_free(self->derived_state);\n    tsk_safe_free(self->derived_state_offset);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_mutation_table_free(tsk_mutation_table_t *self)\n{\n    tsk_mutation_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_mutation_table_expand_main_columns(\n    tsk_mutation_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column((void **) &self->site, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->node, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->parent, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->time, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->derived_state_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_mutation_table_expand_derived_state(\n    tsk_mutation_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->derived_state_length, additional_length,\n        self->max_derived_state_length_increment, &self->max_derived_state_length,\n        (void **) &self->derived_state, sizeof(*self->derived_state));\n}\n\nstatic int\ntsk_mutation_table_expand_metadata(\n    tsk_mutation_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_mutation_table_set_max_rows_increment(\n    tsk_mutation_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_mutation_table_set_max_metadata_length_increment(\n    tsk_mutation_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_mutation_table_set_max_derived_state_length_increment(\n    tsk_mutation_table_t *self, tsk_size_t max_derived_state_length_increment)\n{\n    self->max_derived_state_length_increment = max_derived_state_length_increment;\n    return 0;\n}\n\nint\ntsk_mutation_table_init(tsk_mutation_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_mutation_table_t));\n\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_derived_state_length_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_mutation_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_expand_derived_state(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_expand_metadata(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->derived_state_offset[0] = 0;\n    self->metadata_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_derived_state_length_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_mutation_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\ntsk_id_t\ntsk_mutation_table_add_row(tsk_mutation_table_t *self, tsk_id_t site, tsk_id_t node,\n    tsk_id_t parent, double time, const char *derived_state,\n    tsk_size_t derived_state_length, const char *metadata, tsk_size_t metadata_length)\n{\n    tsk_id_t ret;\n    tsk_size_t derived_state_offset, metadata_offset;\n\n    ret = tsk_mutation_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->site[self->num_rows] = site;\n    self->node[self->num_rows] = node;\n    self->parent[self->num_rows] = parent;\n    self->time[self->num_rows] = time;\n\n    derived_state_offset = self->derived_state_length;\n    tsk_bug_assert(self->derived_state_offset[self->num_rows] == derived_state_offset);\n    ret = tsk_mutation_table_expand_derived_state(self, derived_state_length);\n    if (ret != 0) {\n        goto out;\n    }\n    self->derived_state_length += derived_state_length;\n    tsk_memmove(\n        self->derived_state + derived_state_offset, derived_state, derived_state_length);\n    self->derived_state_offset[self->num_rows + 1] = self->derived_state_length;\n\n    metadata_offset = self->metadata_length;\n    tsk_bug_assert(self->metadata_offset[self->num_rows] == metadata_offset);\n    ret = tsk_mutation_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    self->metadata_length += metadata_length;\n    tsk_memmove(self->metadata + metadata_offset, metadata, metadata_length);\n    self->metadata_offset[self->num_rows + 1] = self->metadata_length;\n\n    ret = (tsk_id_t) self->num_rows;\n    self->num_rows++;\nout:\n    return ret;\n}\n\nstatic int\ntsk_mutation_table_update_row_rewrite(tsk_mutation_table_t *self, tsk_id_t index,\n    tsk_id_t site, tsk_id_t node, tsk_id_t parent, double time,\n    const char *derived_state, tsk_size_t derived_state_length, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_mutation_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_mutation_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_mutation_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_mutation_table_add_row(self, site, node, parent, time, derived_state,\n        derived_state_length, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_mutation_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_mutation_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_mutation_table_update_row(tsk_mutation_table_t *self, tsk_id_t index, tsk_id_t site,\n    tsk_id_t node, tsk_id_t parent, double time, const char *derived_state,\n    tsk_size_t derived_state_length, const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_mutation_t current_row;\n\n    ret = tsk_mutation_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.metadata_length == metadata_length\n        && current_row.derived_state_length == derived_state_length) {\n        self->site[index] = site;\n        self->node[index] = node;\n        self->parent[index] = parent;\n        self->time[index] = time;\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->derived_state[self->derived_state_offset[index]],\n            derived_state, derived_state_length * sizeof(*derived_state));\n        tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n            metadata_length * sizeof(*metadata));\n    } else {\n        ret = tsk_mutation_table_update_row_rewrite(self, index, site, node, parent,\n            time, derived_state, derived_state_length, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_mutation_table_append_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,\n    const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,\n    const double *time, const char *derived_state,\n    const tsk_size_t *derived_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n    tsk_size_t j, derived_state_length, metadata_length;\n\n    if (site == NULL || node == NULL || derived_state == NULL\n        || derived_state_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((metadata == NULL) != (metadata_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    ret = tsk_mutation_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->site + self->num_rows, site, num_rows * sizeof(tsk_id_t));\n    tsk_memcpy(self->node + self->num_rows, node, num_rows * sizeof(tsk_id_t));\n    if (parent == NULL) {\n        /* If parent is NULL, set all parents to the null mutation */\n        tsk_memset(self->parent + self->num_rows, 0xff, num_rows * sizeof(tsk_id_t));\n    } else {\n        tsk_memcpy(self->parent + self->num_rows, parent, num_rows * sizeof(tsk_id_t));\n    }\n    if (time == NULL) {\n        /* If time is NULL, set all times to TSK_UNKNOWN_TIME which is the\n         * default */\n        for (j = 0; j < num_rows; j++) {\n            self->time[self->num_rows + j] = TSK_UNKNOWN_TIME;\n        }\n    } else {\n        tsk_memcpy(self->time + self->num_rows, time, num_rows * sizeof(double));\n    }\n\n    /* Metadata column */\n    if (metadata == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, metadata_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        metadata_length = metadata_offset[num_rows];\n        ret = tsk_mutation_table_expand_metadata(self, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->metadata + self->metadata_length, metadata,\n            metadata_length * sizeof(char));\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j]\n                = self->metadata_length + metadata_offset[j];\n        }\n        self->metadata_length += metadata_length;\n    }\n    self->metadata_offset[self->num_rows + num_rows] = self->metadata_length;\n\n    /* Derived state column */\n    ret = check_offsets(num_rows, derived_state_offset, 0, false);\n    if (ret != 0) {\n        goto out;\n    }\n    derived_state_length = derived_state_offset[num_rows];\n    ret = tsk_mutation_table_expand_derived_state(self, derived_state_length);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->derived_state + self->derived_state_length, derived_state,\n        derived_state_length * sizeof(char));\n    for (j = 0; j < num_rows; j++) {\n        self->derived_state_offset[self->num_rows + j]\n            = self->derived_state_length + derived_state_offset[j];\n    }\n    self->derived_state_length += derived_state_length;\n    self->derived_state_offset[self->num_rows + num_rows] = self->derived_state_length;\n\n    self->num_rows += num_rows;\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_mutation_table_takeset_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,\n    tsk_id_t *site, tsk_id_t *node, tsk_id_t *parent, double *time, char *derived_state,\n    tsk_size_t *derived_state_offset, char *metadata, tsk_size_t *metadata_offset)\n{\n    tsk_size_t j;\n    int ret = 0;\n\n    if (site == NULL || node == NULL || derived_state == NULL\n        || derived_state_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    /* We need to check all the inputs before we start freeing or taking memory */\n    ret = check_ragged_column(num_rows, derived_state, derived_state_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_mutation_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n    self->site = site;\n    self->node = node;\n\n    ret = takeset_optional_id_column(num_rows, parent, &self->parent);\n    if (ret != 0) {\n        goto out;\n    }\n    if (time == NULL) {\n        /* Time defaults to unknown time if not specified. */\n        self->time = tsk_malloc(num_rows * sizeof(*self->time));\n        if (self->time == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        for (j = 0; j < num_rows; j++) {\n            self->time[j] = TSK_UNKNOWN_TIME;\n        }\n\n    } else {\n        self->time = time;\n    }\n\n    ret = takeset_ragged_column(num_rows, derived_state, derived_state_offset,\n        (void *) &self->derived_state, &self->derived_state_offset,\n        &self->derived_state_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_mutation_table_copy(\n    const tsk_mutation_table_t *self, tsk_mutation_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_mutation_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_mutation_table_set_columns(dest, self->num_rows, self->site, self->node,\n        self->parent, self->time, self->derived_state, self->derived_state_offset,\n        self->metadata, self->metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint\ntsk_mutation_table_set_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,\n    const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,\n    const double *time, const char *derived_state,\n    const tsk_size_t *derived_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    ret = tsk_mutation_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_append_columns(self, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\nout:\n    return ret;\n}\n\nbool\ntsk_mutation_table_equals(const tsk_mutation_table_t *self,\n    const tsk_mutation_table_t *other, tsk_flags_t options)\n{\n    bool ret\n        = self->num_rows == other->num_rows\n          && self->derived_state_length == other->derived_state_length\n          && tsk_memcmp(self->site, other->site, self->num_rows * sizeof(tsk_id_t)) == 0\n          && tsk_memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0\n          && tsk_memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t))\n                 == 0\n          && tsk_memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0\n          && tsk_memcmp(self->derived_state_offset, other->derived_state_offset,\n                 (self->num_rows + 1) * sizeof(tsk_size_t))\n                 == 0\n          && tsk_memcmp(self->derived_state, other->derived_state,\n                 self->derived_state_length * sizeof(char))\n                 == 0;\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_mutation_table_clear(tsk_mutation_table_t *self)\n{\n    return tsk_mutation_table_truncate(self, 0);\n}\n\nint\ntsk_mutation_table_truncate(tsk_mutation_table_t *mutations, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > mutations->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    mutations->num_rows = num_rows;\n    mutations->derived_state_length = mutations->derived_state_offset[num_rows];\n    mutations->metadata_length = mutations->metadata_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_mutation_table_extend(tsk_mutation_table_t *self, const tsk_mutation_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_mutation_t mutation;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_mutation_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_mutation_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &mutation);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_mutation_table_add_row(self, mutation.site, mutation.node,\n            mutation.parent, mutation.time, mutation.derived_state,\n            mutation.derived_state_length, mutation.metadata, mutation.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_mutation_table_print_state(const tsk_mutation_table_t *self, FILE *out)\n{\n    int ret;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"mutation_table: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"derived_state_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->derived_state_length,\n        (long long) self->max_derived_state_length,\n        (long long) self->max_derived_state_length_increment);\n    fprintf(out, \"metadata_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    ret = tsk_mutation_table_dump_text(self, out);\n    tsk_bug_assert(ret == 0);\n    tsk_bug_assert(self->derived_state_offset[0] == 0);\n    tsk_bug_assert(\n        self->derived_state_length == self->derived_state_offset[self->num_rows]);\n    tsk_bug_assert(self->metadata_offset[0] == 0);\n    tsk_bug_assert(self->metadata_length == self->metadata_offset[self->num_rows]);\n}\n\nstatic inline void\ntsk_mutation_table_get_row_unsafe(\n    const tsk_mutation_table_t *self, tsk_id_t index, tsk_mutation_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->site = self->site[index];\n    row->node = self->node[index];\n    row->parent = self->parent[index];\n    row->time = self->time[index];\n    row->derived_state_length\n        = self->derived_state_offset[index + 1] - self->derived_state_offset[index];\n    row->derived_state = self->derived_state + self->derived_state_offset[index];\n    row->metadata_length\n        = self->metadata_offset[index + 1] - self->metadata_offset[index];\n    row->metadata = self->metadata + self->metadata_offset[index];\n    row->edge = TSK_NULL;\n}\n\nint\ntsk_mutation_table_get_row(\n    const tsk_mutation_table_t *self, tsk_id_t index, tsk_mutation_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_mutation_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_mutation_table_set_metadata_schema(tsk_mutation_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_mutation_table_dump_text(const tsk_mutation_table_t *self, FILE *out)\n{\n    int ret = TSK_ERR_IO;\n    int err;\n    tsk_size_t j, derived_state_len, metadata_len;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"id\\tsite\\tnode\\tparent\\ttime\\tderived_state\\tmetadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        derived_state_len\n            = self->derived_state_offset[j + 1] - self->derived_state_offset[j];\n        metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];\n        err = fprintf(out, \"%lld\\t%lld\\t%lld\\t%lld\\t%f\\t%.*s\\t%.*s\\n\", (long long) j,\n            (long long) self->site[j], (long long) self->node[j],\n            (long long) self->parent[j], self->time[j], (int) derived_state_len,\n            self->derived_state + self->derived_state_offset[j], (int) metadata_len,\n            self->metadata + self->metadata_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nint\ntsk_mutation_table_keep_rows(tsk_mutation_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *ret_id_map)\n{\n    int ret = 0;\n    const tsk_size_t current_num_rows = self->num_rows;\n    tsk_size_t j, remaining_rows;\n    tsk_id_t pj;\n    tsk_id_t *id_map = ret_id_map;\n    tsk_id_t *restrict parent = self->parent;\n\n    if (ret_id_map == NULL) {\n        id_map = tsk_malloc(current_num_rows * sizeof(*id_map));\n        if (id_map == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n\n    keep_mask_to_id_map(current_num_rows, keep, id_map);\n\n    /* Note: we could add some options to avoid these checks if we wanted.\n     * MAP_DELETED_TO_NULL is an obvious one, and I guess it might be\n     * helpful to also provide NO_REMAP to prevent reference remapping\n     * entirely. */\n    for (j = 0; j < current_num_rows; j++) {\n        if (keep[j]) {\n            pj = parent[j];\n            if (pj != TSK_NULL) {\n                if (pj < 0 || pj >= (tsk_id_t) current_num_rows) {\n                    ret = tsk_trace_error(TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n                    goto out;\n                }\n                if (id_map[pj] == TSK_NULL) {\n                    ret = tsk_trace_error(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);\n                    goto out;\n                }\n            }\n        }\n    }\n\n    remaining_rows = subset_id_column(self->site, current_num_rows, keep);\n    subset_id_column(self->node, current_num_rows, keep);\n    subset_remap_id_column(parent, current_num_rows, keep, id_map);\n    subset_double_column(self->time, current_num_rows, keep);\n    self->derived_state_length = subset_ragged_char_column(\n        self->derived_state, self->derived_state_offset, current_num_rows, keep);\n    if (self->metadata_length > 0) {\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, current_num_rows, keep);\n    }\n    self->num_rows = remaining_rows;\nout:\n    if (ret_id_map == NULL) {\n        tsk_safe_free(id_map);\n    }\n    return ret;\n}\n\nstatic int\ntsk_mutation_table_dump(\n    const tsk_mutation_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    const write_table_col_t cols[] = {\n        { \"mutations/site\", (void *) self->site, self->num_rows, TSK_ID_STORAGE_TYPE },\n        { \"mutations/node\", (void *) self->node, self->num_rows, TSK_ID_STORAGE_TYPE },\n        { \"mutations/parent\", (void *) self->parent, self->num_rows,\n            TSK_ID_STORAGE_TYPE },\n        { \"mutations/time\", (void *) self->time, self->num_rows, KAS_FLOAT64 },\n        { \"mutations/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"mutations/derived_state\", (void *) self->derived_state,\n            self->derived_state_length, KAS_UINT8, self->derived_state_offset,\n            self->num_rows },\n        { \"mutations/metadata\", (void *) self->metadata, self->metadata_length,\n            KAS_UINT8, self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table(store, cols, ragged_cols, options);\n}\n\nstatic int\ntsk_mutation_table_load(tsk_mutation_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    tsk_id_t *node = NULL;\n    tsk_id_t *site = NULL;\n    tsk_id_t *parent = NULL;\n    double *time = NULL;\n    char *derived_state = NULL;\n    tsk_size_t *derived_state_offset = NULL;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    char *metadata_schema = NULL;\n    tsk_size_t num_rows, derived_state_length, metadata_length, metadata_schema_length;\n\n    read_table_col_t cols[] = {\n        { \"mutations/site\", (void **) &site, TSK_ID_STORAGE_TYPE, 0 },\n        { \"mutations/node\", (void **) &node, TSK_ID_STORAGE_TYPE, 0 },\n        { \"mutations/parent\", (void **) &parent, TSK_ID_STORAGE_TYPE, 0 },\n        { \"mutations/time\", (void **) &time, KAS_FLOAT64, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"mutations/derived_state\", (void **) &derived_state, &derived_state_length,\n            KAS_UINT8, &derived_state_offset, 0 },\n        { \"mutations/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, 0 },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"mutations/metadata_schema\", (void **) &metadata_schema,\n            &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_mutation_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_mutation_table_takeset_columns(self, num_rows, site, node, parent, time,\n        derived_state, derived_state_offset, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    site = NULL;\n    node = NULL;\n    parent = NULL;\n    time = NULL;\n    derived_state = NULL;\n    derived_state_offset = NULL;\n    metadata = NULL;\n    metadata_offset = NULL;\n\nout:\n    free_read_table_mem(cols, ragged_cols, properties);\n    return ret;\n}\n\n/*************************\n * migration table\n *************************/\n\nstatic void\ntsk_migration_table_free_columns(tsk_migration_table_t *self)\n{\n    tsk_safe_free(self->left);\n    tsk_safe_free(self->right);\n    tsk_safe_free(self->node);\n    tsk_safe_free(self->source);\n    tsk_safe_free(self->dest);\n    tsk_safe_free(self->time);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_migration_table_free(tsk_migration_table_t *self)\n{\n    tsk_migration_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_migration_table_expand_main_columns(\n    tsk_migration_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column((void **) &self->left, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->right, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->node, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->source, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->dest, new_max_rows, sizeof(tsk_id_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column((void **) &self->time, new_max_rows, sizeof(double));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_migration_table_expand_metadata(\n    tsk_migration_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_migration_table_set_max_rows_increment(\n    tsk_migration_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_migration_table_set_max_metadata_length_increment(\n    tsk_migration_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_migration_table_init(tsk_migration_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_migration_table_t));\n\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_migration_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_expand_metadata(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->metadata_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_migration_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\nint\ntsk_migration_table_append_columns(tsk_migration_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *node,\n    const tsk_id_t *source, const tsk_id_t *dest, const double *time,\n    const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n    tsk_size_t j, metadata_length;\n\n    if (left == NULL || right == NULL || node == NULL || source == NULL || dest == NULL\n        || time == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if ((metadata == NULL) != (metadata_offset == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    ret = tsk_migration_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->left + self->num_rows, left, num_rows * sizeof(double));\n    tsk_memcpy(self->right + self->num_rows, right, num_rows * sizeof(double));\n    tsk_memcpy(self->node + self->num_rows, node, num_rows * sizeof(tsk_id_t));\n    tsk_memcpy(self->source + self->num_rows, source, num_rows * sizeof(tsk_id_t));\n    tsk_memcpy(self->dest + self->num_rows, dest, num_rows * sizeof(tsk_id_t));\n    tsk_memcpy(self->time + self->num_rows, time, num_rows * sizeof(double));\n    if (metadata == NULL) {\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;\n        }\n    } else {\n        ret = check_offsets(num_rows, metadata_offset, 0, false);\n        if (ret != 0) {\n            goto out;\n        }\n        for (j = 0; j < num_rows; j++) {\n            self->metadata_offset[self->num_rows + j]\n                = (tsk_size_t) self->metadata_length + metadata_offset[j];\n        }\n        metadata_length = metadata_offset[num_rows];\n        ret = tsk_migration_table_expand_metadata(self, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        tsk_memcpy(self->metadata + self->metadata_length, metadata,\n            metadata_length * sizeof(char));\n        self->metadata_length += metadata_length;\n    }\n\n    self->num_rows += num_rows;\n    self->metadata_offset[self->num_rows] = self->metadata_length;\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_migration_table_takeset_columns(tsk_migration_table_t *self, tsk_size_t num_rows,\n    double *left, double *right, tsk_id_t *node, tsk_id_t *source, tsk_id_t *dest,\n    double *time, char *metadata, tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    if (left == NULL || right == NULL || node == NULL || source == NULL || dest == NULL\n        || time == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_migration_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n    self->left = left;\n    self->right = right;\n    self->node = node;\n    self->source = source;\n    self->dest = dest;\n    self->time = time;\n\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_migration_table_copy(\n    const tsk_migration_table_t *self, tsk_migration_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_migration_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_migration_table_set_columns(dest, self->num_rows, self->left, self->right,\n        self->node, self->source, self->dest, self->time, self->metadata,\n        self->metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint\ntsk_migration_table_set_columns(tsk_migration_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *node,\n    const tsk_id_t *source, const tsk_id_t *dest, const double *time,\n    const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n\n    ret = tsk_migration_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_append_columns(self, num_rows, left, right, node, source,\n        dest, time, metadata, metadata_offset);\nout:\n    return ret;\n}\n\ntsk_id_t\ntsk_migration_table_add_row(tsk_migration_table_t *self, double left, double right,\n    tsk_id_t node, tsk_id_t source, tsk_id_t dest, double time, const char *metadata,\n    tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n\n    ret = tsk_migration_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_bug_assert(self->num_rows < self->max_rows);\n    tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);\n    tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);\n    self->left[self->num_rows] = left;\n    self->right[self->num_rows] = right;\n    self->node[self->num_rows] = node;\n    self->source[self->num_rows] = source;\n    self->dest[self->num_rows] = dest;\n    self->time[self->num_rows] = time;\n    self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;\n    self->metadata_length += metadata_length;\n\n    ret = (tsk_id_t) self->num_rows;\n    self->num_rows++;\nout:\n    return ret;\n}\n\nstatic int\ntsk_migration_table_update_row_rewrite(tsk_migration_table_t *self, tsk_id_t index,\n    double left, double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest,\n    double time, const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_migration_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_migration_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_migration_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_migration_table_add_row(\n        self, left, right, node, source, dest, time, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_migration_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_migration_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_migration_table_update_row(tsk_migration_table_t *self, tsk_id_t index, double left,\n    double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest, double time,\n    const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_migration_t current_row;\n\n    ret = tsk_migration_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.metadata_length == metadata_length) {\n        self->left[index] = left;\n        self->right[index] = right;\n        self->node[index] = node;\n        self->source[index] = source;\n        self->dest[index] = dest;\n        self->time[index] = time;\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n            metadata_length * sizeof(*metadata));\n    } else {\n        ret = tsk_migration_table_update_row_rewrite(self, index, left, right, node,\n            source, dest, time, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_migration_table_clear(tsk_migration_table_t *self)\n{\n    return tsk_migration_table_truncate(self, 0);\n}\n\nint\ntsk_migration_table_truncate(tsk_migration_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    self->metadata_length = self->metadata_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_migration_table_extend(tsk_migration_table_t *self,\n    const tsk_migration_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes,\n    tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_migration_t migration;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_migration_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_migration_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &migration);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_migration_table_add_row(self, migration.left, migration.right,\n            migration.node, migration.source, migration.dest, migration.time,\n            migration.metadata, migration.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_migration_table_print_state(const tsk_migration_table_t *self, FILE *out)\n{\n    int ret;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"migration_table: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"metadata_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    ret = tsk_migration_table_dump_text(self, out);\n    tsk_bug_assert(ret == 0);\n}\n\nstatic inline void\ntsk_migration_table_get_row_unsafe(\n    const tsk_migration_table_t *self, tsk_id_t index, tsk_migration_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->left = self->left[index];\n    row->right = self->right[index];\n    row->node = self->node[index];\n    row->source = self->source[index];\n    row->dest = self->dest[index];\n    row->time = self->time[index];\n    row->metadata_length\n        = self->metadata_offset[index + 1] - self->metadata_offset[index];\n    row->metadata = self->metadata + self->metadata_offset[index];\n}\n\nint\ntsk_migration_table_get_row(\n    const tsk_migration_table_t *self, tsk_id_t index, tsk_migration_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_migration_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_migration_table_set_metadata_schema(tsk_migration_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_migration_table_dump_text(const tsk_migration_table_t *self, FILE *out)\n{\n    tsk_size_t j;\n    int ret = TSK_ERR_IO;\n    tsk_size_t metadata_len;\n    int err;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"left\\tright\\tnode\\tsource\\tdest\\ttime\\tmetadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];\n        err = fprintf(out, \"%.3f\\t%.3f\\t%lld\\t%lld\\t%lld\\t%f\\t%.*s\\n\", self->left[j],\n            self->right[j], (long long) self->node[j], (long long) self->source[j],\n            (long long) self->dest[j], self->time[j], (int) metadata_len,\n            self->metadata + self->metadata_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool\ntsk_migration_table_equals(const tsk_migration_table_t *self,\n    const tsk_migration_table_t *other, tsk_flags_t options)\n{\n    bool ret\n        = self->num_rows == other->num_rows\n          && tsk_memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0\n          && tsk_memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0\n          && tsk_memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0\n          && tsk_memcmp(self->source, other->source, self->num_rows * sizeof(tsk_id_t))\n                 == 0\n          && tsk_memcmp(self->dest, other->dest, self->num_rows * sizeof(tsk_id_t)) == 0\n          && tsk_memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0;\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_migration_table_keep_rows(tsk_migration_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)\n{\n    int ret = 0;\n    tsk_size_t remaining_rows;\n\n    if (id_map != NULL) {\n        keep_mask_to_id_map(self->num_rows, keep, id_map);\n    }\n\n    remaining_rows = subset_double_column(self->left, self->num_rows, keep);\n    subset_double_column(self->right, self->num_rows, keep);\n    subset_id_column(self->node, self->num_rows, keep);\n    subset_id_column(self->source, self->num_rows, keep);\n    subset_id_column(self->dest, self->num_rows, keep);\n    subset_double_column(self->time, self->num_rows, keep);\n    if (self->metadata_length > 0) {\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, self->num_rows, keep);\n    }\n    self->num_rows = remaining_rows;\n    return ret;\n}\n\nstatic int\ntsk_migration_table_dump(\n    const tsk_migration_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    const write_table_col_t cols[] = {\n        { \"migrations/left\", (void *) self->left, self->num_rows, KAS_FLOAT64 },\n        { \"migrations/right\", (void *) self->right, self->num_rows, KAS_FLOAT64 },\n        { \"migrations/node\", (void *) self->node, self->num_rows, TSK_ID_STORAGE_TYPE },\n        { \"migrations/source\", (void *) self->source, self->num_rows,\n            TSK_ID_STORAGE_TYPE },\n        { \"migrations/dest\", (void *) self->dest, self->num_rows, TSK_ID_STORAGE_TYPE },\n        { \"migrations/time\", (void *) self->time, self->num_rows, KAS_FLOAT64 },\n        { \"migrations/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"migrations/metadata\", (void *) self->metadata, self->metadata_length,\n            KAS_UINT8, self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table(store, cols, ragged_cols, options);\n}\n\nstatic int\ntsk_migration_table_load(tsk_migration_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    tsk_id_t *source = NULL;\n    tsk_id_t *dest = NULL;\n    tsk_id_t *node = NULL;\n    double *left = NULL;\n    double *right = NULL;\n    double *time = NULL;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    char *metadata_schema = NULL;\n    tsk_size_t num_rows, metadata_length, metadata_schema_length;\n\n    read_table_col_t cols[] = {\n        { \"migrations/left\", (void **) &left, KAS_FLOAT64, 0 },\n        { \"migrations/right\", (void **) &right, KAS_FLOAT64, 0 },\n        { \"migrations/node\", (void **) &node, TSK_ID_STORAGE_TYPE, 0 },\n        { \"migrations/source\", (void **) &source, TSK_ID_STORAGE_TYPE, 0 },\n        { \"migrations/dest\", (void **) &dest, TSK_ID_STORAGE_TYPE, 0 },\n        { \"migrations/time\", (void **) &time, KAS_FLOAT64, 0 },\n        { .name = NULL },\n    };\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"migrations/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"migrations/metadata_schema\", (void **) &metadata_schema,\n            &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_migration_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_migration_table_takeset_columns(self, num_rows, left, right, node, source,\n        dest, time, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    left = NULL;\n    right = NULL;\n    node = NULL;\n    source = NULL;\n    dest = NULL;\n    time = NULL;\n    metadata = NULL;\n    metadata_offset = NULL;\n\nout:\n    free_read_table_mem(cols, ragged_cols, properties);\n    return ret;\n}\n\n/*************************\n * population table\n *************************/\n\nstatic void\ntsk_population_table_free_columns(tsk_population_table_t *self)\n{\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_offset);\n}\n\nint\ntsk_population_table_free(tsk_population_table_t *self)\n{\n    tsk_population_table_free_columns(self);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nstatic int\ntsk_population_table_expand_main_columns(\n    tsk_population_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column(\n            (void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_population_table_expand_metadata(\n    tsk_population_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->metadata_length, additional_length,\n        self->max_metadata_length_increment, &self->max_metadata_length,\n        (void **) &self->metadata, sizeof(*self->metadata));\n}\n\nint\ntsk_population_table_set_max_rows_increment(\n    tsk_population_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_population_table_set_max_metadata_length_increment(\n    tsk_population_table_t *self, tsk_size_t max_metadata_length_increment)\n{\n    self->max_metadata_length_increment = max_metadata_length_increment;\n    return 0;\n}\n\nint\ntsk_population_table_init(tsk_population_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_population_table_t));\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_metadata_length_increment = 1;\n    ret = tsk_population_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_expand_metadata(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->metadata_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_metadata_length_increment = 0;\n    tsk_population_table_set_metadata_schema(self, NULL, 0);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_population_table_copy(const tsk_population_table_t *self,\n    tsk_population_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_population_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_population_table_set_columns(\n        dest, self->num_rows, self->metadata, self->metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\nout:\n    return ret;\n}\n\nint\ntsk_population_table_set_columns(tsk_population_table_t *self, tsk_size_t num_rows,\n    const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n\n    ret = tsk_population_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_append_columns(self, num_rows, metadata, metadata_offset);\nout:\n    return ret;\n}\n\nint\ntsk_population_table_append_columns(tsk_population_table_t *self, tsk_size_t num_rows,\n    const char *metadata, const tsk_size_t *metadata_offset)\n{\n    int ret;\n    tsk_size_t j, metadata_length;\n\n    if (metadata == NULL || metadata_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = tsk_population_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = check_offsets(num_rows, metadata_offset, 0, false);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        self->metadata_offset[self->num_rows + j]\n            = self->metadata_length + metadata_offset[j];\n    }\n    metadata_length = metadata_offset[num_rows];\n    ret = tsk_population_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->metadata + self->metadata_length, metadata,\n        metadata_length * sizeof(char));\n    self->metadata_length += metadata_length;\n\n    self->num_rows += num_rows;\n    self->metadata_offset[self->num_rows] = self->metadata_length;\nout:\n    return ret;\n}\n\nint\ntsk_population_table_takeset_columns(tsk_population_table_t *self, tsk_size_t num_rows,\n    char *metadata, tsk_size_t *metadata_offset)\n{\n    int ret = 0;\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    if (metadata == NULL || metadata_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_population_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n\n    ret = takeset_ragged_column(num_rows, metadata, metadata_offset,\n        (void *) &self->metadata, &self->metadata_offset, &self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic tsk_id_t\ntsk_population_table_add_row_internal(\n    tsk_population_table_t *self, const char *metadata, tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n\n    tsk_bug_assert(self->num_rows < self->max_rows);\n    tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);\n    tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);\n    self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;\n    self->metadata_length += metadata_length;\n    ret = (tsk_id_t) self->num_rows;\n    self->num_rows++;\n    return ret;\n}\n\ntsk_id_t\ntsk_population_table_add_row(\n    tsk_population_table_t *self, const char *metadata, tsk_size_t metadata_length)\n{\n    tsk_id_t ret = 0;\n\n    ret = tsk_population_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_expand_metadata(self, metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_add_row_internal(self, metadata, metadata_length);\nout:\n    return ret;\n}\n\nstatic int\ntsk_population_table_update_row_rewrite(tsk_population_table_t *self, tsk_id_t index,\n    const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_population_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_population_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_population_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_population_table_add_row(self, metadata, metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_population_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_population_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_population_table_update_row(tsk_population_table_t *self, tsk_id_t index,\n    const char *metadata, tsk_size_t metadata_length)\n{\n    int ret = 0;\n    tsk_population_t current_row;\n\n    ret = tsk_population_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.metadata_length == metadata_length) {\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,\n            metadata_length * sizeof(*metadata));\n    } else {\n        ret = tsk_population_table_update_row_rewrite(\n            self, index, metadata, metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_population_table_clear(tsk_population_table_t *self)\n{\n    return tsk_population_table_truncate(self, 0);\n}\n\nint\ntsk_population_table_truncate(tsk_population_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    self->metadata_length = self->metadata_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_population_table_extend(tsk_population_table_t *self,\n    const tsk_population_table_t *other, tsk_size_t num_rows,\n    const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_population_t population;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_population_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_population_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &population);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_population_table_add_row(\n            self, population.metadata, population.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_population_table_print_state(const tsk_population_table_t *self, FILE *out)\n{\n    tsk_size_t j, k;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"population_table: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows          = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"metadata_length  = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->metadata_length, (long long) self->max_metadata_length,\n        (long long) self->max_metadata_length_increment);\n    fprintf(out, TABLE_SEP);\n    write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    fprintf(out, \"index\\tmetadata_offset\\tmetadata\\n\");\n    for (j = 0; j < self->num_rows; j++) {\n        fprintf(\n            out, \"%lld\\t%lld\\t\", (long long) j, (long long) self->metadata_offset[j]);\n        for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) {\n            fprintf(out, \"%c\", self->metadata[k]);\n        }\n        fprintf(out, \"\\n\");\n    }\n    tsk_bug_assert(self->metadata_offset[0] == 0);\n    tsk_bug_assert(self->metadata_offset[self->num_rows] == self->metadata_length);\n}\n\nstatic inline void\ntsk_population_table_get_row_unsafe(\n    const tsk_population_table_t *self, tsk_id_t index, tsk_population_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->metadata_length\n        = self->metadata_offset[index + 1] - self->metadata_offset[index];\n    row->metadata = self->metadata + self->metadata_offset[index];\n}\n\nint\ntsk_population_table_get_row(\n    const tsk_population_table_t *self, tsk_id_t index, tsk_population_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_population_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_population_table_set_metadata_schema(tsk_population_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_population_table_dump_text(const tsk_population_table_t *self, FILE *out)\n{\n    int ret = TSK_ERR_IO;\n    int err;\n    tsk_size_t j;\n    tsk_size_t metadata_len;\n\n    err = write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    if (err < 0) {\n        goto out;\n    }\n    err = fprintf(out, \"metadata\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];\n        err = fprintf(out, \"%.*s\\n\", (int) metadata_len,\n            self->metadata + self->metadata_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool\ntsk_population_table_equals(const tsk_population_table_t *self,\n    const tsk_population_table_t *other, tsk_flags_t options)\n{\n    /* Since we only have the metadata column in the table currently, equality\n     * reduces to comparing the number of rows if we disable metadata comparison.\n     */\n    bool ret = self->num_rows == other->num_rows;\n    if (!(options & TSK_CMP_IGNORE_METADATA)) {\n        ret = ret && self->metadata_length == other->metadata_length\n              && self->metadata_schema_length == other->metadata_schema_length\n              && tsk_memcmp(self->metadata_offset, other->metadata_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->metadata, other->metadata,\n                     self->metadata_length * sizeof(char))\n                     == 0\n              && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                     self->metadata_schema_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_population_table_keep_rows(tsk_population_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)\n{\n    int ret = 0;\n\n    if (id_map != NULL) {\n        keep_mask_to_id_map(self->num_rows, keep, id_map);\n    }\n\n    if (self->metadata_length > 0) {\n        self->metadata_length = subset_ragged_char_column(\n            self->metadata, self->metadata_offset, self->num_rows, keep);\n    }\n    self->num_rows = count_true(self->num_rows, keep);\n    return ret;\n}\n\nstatic int\ntsk_population_table_dump(\n    const tsk_population_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    const write_table_col_t cols[] = {\n        { \"populations/metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    const write_table_ragged_col_t ragged_cols[] = {\n        { \"populations/metadata\", (void *) self->metadata, self->metadata_length,\n            KAS_UINT8, self->metadata_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table(store, cols, ragged_cols, options);\n}\n\nstatic int\ntsk_population_table_load(tsk_population_table_t *self, kastore_t *store)\n{\n    int ret = 0;\n    char *metadata = NULL;\n    tsk_size_t *metadata_offset = NULL;\n    char *metadata_schema = NULL;\n    tsk_size_t num_rows, metadata_length, metadata_schema_length;\n\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"populations/metadata\", (void **) &metadata, &metadata_length, KAS_UINT8,\n            &metadata_offset, 0 },\n        { .name = NULL },\n    };\n    read_table_property_t properties[] = {\n        { \"populations/metadata_schema\", (void **) &metadata_schema,\n            &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, NULL, ragged_cols, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_population_table_set_metadata_schema(\n            self, metadata_schema, metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_population_table_takeset_columns(\n        self, num_rows, metadata, metadata_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    metadata = NULL;\n    metadata_offset = NULL;\n\nout:\n    free_read_table_mem(NULL, ragged_cols, properties);\n    return ret;\n}\n\n/*************************\n * provenance table\n *************************/\n\nstatic void\ntsk_provenance_table_free_columns(tsk_provenance_table_t *self)\n{\n    tsk_safe_free(self->timestamp);\n    tsk_safe_free(self->timestamp_offset);\n    tsk_safe_free(self->record);\n    tsk_safe_free(self->record_offset);\n}\n\nint\ntsk_provenance_table_free(tsk_provenance_table_t *self)\n{\n    tsk_provenance_table_free_columns(self);\n    return 0;\n}\n\nstatic int\ntsk_provenance_table_expand_main_columns(\n    tsk_provenance_table_t *self, tsk_size_t additional_rows)\n{\n    int ret = 0;\n    tsk_size_t new_max_rows;\n\n    ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,\n        additional_rows, &new_max_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    if ((self->num_rows + additional_rows) > self->max_rows) {\n        ret = expand_column(\n            (void **) &self->timestamp_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = expand_column(\n            (void **) &self->record_offset, new_max_rows + 1, sizeof(tsk_size_t));\n        if (ret != 0) {\n            goto out;\n        }\n        self->max_rows = new_max_rows;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_provenance_table_expand_timestamp(\n    tsk_provenance_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->timestamp_length, additional_length,\n        self->max_timestamp_length_increment, &self->max_timestamp_length,\n        (void **) &self->timestamp, sizeof(*self->timestamp));\n}\n\nstatic int\ntsk_provenance_table_expand_record(\n    tsk_provenance_table_t *self, tsk_size_t additional_length)\n{\n    return expand_ragged_column(self->record_length, additional_length,\n        self->max_record_length_increment, &self->max_record_length,\n        (void **) &self->record, sizeof(*self->record));\n}\n\nint\ntsk_provenance_table_set_max_rows_increment(\n    tsk_provenance_table_t *self, tsk_size_t max_rows_increment)\n{\n    self->max_rows_increment = max_rows_increment;\n    return 0;\n}\n\nint\ntsk_provenance_table_set_max_timestamp_length_increment(\n    tsk_provenance_table_t *self, tsk_size_t max_timestamp_length_increment)\n{\n    self->max_timestamp_length_increment = max_timestamp_length_increment;\n    return 0;\n}\n\nint\ntsk_provenance_table_set_max_record_length_increment(\n    tsk_provenance_table_t *self, tsk_size_t max_record_length_increment)\n{\n    self->max_record_length_increment = max_record_length_increment;\n    return 0;\n}\n\nint\ntsk_provenance_table_init(tsk_provenance_table_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(tsk_provenance_table_t));\n    /* Allocate space for one row initially, ensuring we always have valid pointers\n     * even if the table is empty */\n    self->max_rows_increment = 1;\n    self->max_timestamp_length_increment = 1;\n    self->max_record_length_increment = 1;\n    ret = tsk_provenance_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_expand_timestamp(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->timestamp_offset[0] = 0;\n    ret = tsk_provenance_table_expand_record(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    self->record_offset[0] = 0;\n    self->max_rows_increment = 0;\n    self->max_timestamp_length_increment = 0;\n    self->max_record_length_increment = 0;\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_provenance_table_copy(const tsk_provenance_table_t *self,\n    tsk_provenance_table_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_provenance_table_init(dest, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_provenance_table_set_columns(dest, self->num_rows, self->timestamp,\n        self->timestamp_offset, self->record, self->record_offset);\nout:\n    return ret;\n}\n\nint\ntsk_provenance_table_set_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,\n    const char *timestamp, const tsk_size_t *timestamp_offset, const char *record,\n    const tsk_size_t *record_offset)\n{\n    int ret;\n\n    ret = tsk_provenance_table_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_append_columns(\n        self, num_rows, timestamp, timestamp_offset, record, record_offset);\nout:\n    return ret;\n}\n\nint\ntsk_provenance_table_append_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,\n    const char *timestamp, const tsk_size_t *timestamp_offset, const char *record,\n    const tsk_size_t *record_offset)\n{\n    int ret;\n    tsk_size_t j, timestamp_length, record_length;\n\n    if (timestamp == NULL || timestamp_offset == NULL || record == NULL\n        || record_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = tsk_provenance_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = check_offsets(num_rows, timestamp_offset, 0, false);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        self->timestamp_offset[self->num_rows + j]\n            = self->timestamp_length + timestamp_offset[j];\n    }\n    timestamp_length = timestamp_offset[num_rows];\n    ret = tsk_provenance_table_expand_timestamp(self, timestamp_length);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->timestamp + self->timestamp_length, timestamp,\n        timestamp_length * sizeof(char));\n    self->timestamp_length += timestamp_length;\n\n    ret = check_offsets(num_rows, record_offset, 0, false);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        self->record_offset[self->num_rows + j] = self->record_length + record_offset[j];\n    }\n    record_length = record_offset[num_rows];\n    ret = tsk_provenance_table_expand_record(self, record_length);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_memcpy(self->record + self->record_length, record, record_length * sizeof(char));\n    self->record_length += record_length;\n\n    self->num_rows += num_rows;\n    self->timestamp_offset[self->num_rows] = self->timestamp_length;\n    self->record_offset[self->num_rows] = self->record_length;\nout:\n    return ret;\n}\n\nint\ntsk_provenance_table_takeset_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,\n    char *timestamp, tsk_size_t *timestamp_offset, char *record,\n    tsk_size_t *record_offset)\n{\n    int ret = 0;\n\n    /* We need to check all the inputs before we start freeing or taking memory */\n    if (timestamp == NULL || timestamp_offset == NULL || record == NULL\n        || record_offset == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, timestamp, timestamp_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_ragged_column(num_rows, record, record_offset);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_provenance_table_free_columns(self);\n    self->num_rows = num_rows;\n    self->max_rows = num_rows;\n\n    ret = takeset_ragged_column(num_rows, timestamp, timestamp_offset,\n        (void *) &self->timestamp, &self->timestamp_offset, &self->timestamp_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = takeset_ragged_column(num_rows, record, record_offset, (void *) &self->record,\n        &self->record_offset, &self->record_length);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic tsk_id_t\ntsk_provenance_table_add_row_internal(tsk_provenance_table_t *self,\n    const char *timestamp, tsk_size_t timestamp_length, const char *record,\n    tsk_size_t record_length)\n{\n    tsk_id_t ret = 0;\n\n    tsk_bug_assert(self->num_rows < self->max_rows);\n    tsk_bug_assert(\n        self->timestamp_length + timestamp_length <= self->max_timestamp_length);\n    tsk_memmove(self->timestamp + self->timestamp_length, timestamp, timestamp_length);\n    self->timestamp_offset[self->num_rows + 1]\n        = self->timestamp_length + timestamp_length;\n    self->timestamp_length += timestamp_length;\n    tsk_bug_assert(self->record_length + record_length <= self->max_record_length);\n    tsk_memmove(self->record + self->record_length, record, record_length);\n    self->record_offset[self->num_rows + 1] = self->record_length + record_length;\n    self->record_length += record_length;\n    ret = (tsk_id_t) self->num_rows;\n    self->num_rows++;\n    return ret;\n}\n\ntsk_id_t\ntsk_provenance_table_add_row(tsk_provenance_table_t *self, const char *timestamp,\n    tsk_size_t timestamp_length, const char *record, tsk_size_t record_length)\n{\n    tsk_id_t ret = 0;\n\n    ret = tsk_provenance_table_expand_main_columns(self, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_expand_timestamp(self, timestamp_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_expand_record(self, record_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_add_row_internal(\n        self, timestamp, timestamp_length, record, record_length);\nout:\n    return ret;\n}\n\nstatic int\ntsk_provenance_table_update_row_rewrite(tsk_provenance_table_t *self, tsk_id_t index,\n    const char *timestamp, tsk_size_t timestamp_length, const char *record,\n    tsk_size_t record_length)\n{\n    int ret = 0;\n    tsk_id_t j, ret_id;\n    tsk_provenance_table_t copy;\n    tsk_size_t num_rows;\n    tsk_id_t *rows = NULL;\n\n    ret = tsk_provenance_table_copy(self, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    rows = tsk_malloc(self->num_rows * sizeof(*rows));\n    if (rows == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_provenance_table_truncate(self, (tsk_size_t) index);\n    tsk_bug_assert(ret == 0);\n    ret_id = tsk_provenance_table_add_row(\n        self, timestamp, timestamp_length, record, record_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    num_rows = 0;\n    for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {\n        rows[num_rows] = j;\n        num_rows++;\n    }\n    ret = tsk_provenance_table_extend(self, &copy, num_rows, rows, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_provenance_table_free(&copy);\n    tsk_safe_free(rows);\n    return ret;\n}\n\nint\ntsk_provenance_table_update_row(tsk_provenance_table_t *self, tsk_id_t index,\n    const char *timestamp, tsk_size_t timestamp_length, const char *record,\n    tsk_size_t record_length)\n{\n    int ret = 0;\n    tsk_provenance_t current_row;\n\n    ret = tsk_provenance_table_get_row(self, index, &current_row);\n    if (ret != 0) {\n        goto out;\n    }\n    if (current_row.timestamp_length == timestamp_length\n        && current_row.record_length == record_length) {\n        /* Note: important to use tsk_memmove here as we may be provided pointers\n         * to the column memory as input via get_row */\n        tsk_memmove(&self->timestamp[self->timestamp_offset[index]], timestamp,\n            timestamp_length * sizeof(*timestamp));\n        tsk_memmove(&self->record[self->record_offset[index]], record,\n            record_length * sizeof(*record));\n    } else {\n        ret = tsk_provenance_table_update_row_rewrite(\n            self, index, timestamp, timestamp_length, record, record_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_provenance_table_clear(tsk_provenance_table_t *self)\n{\n    return tsk_provenance_table_truncate(self, 0);\n}\n\nint\ntsk_provenance_table_truncate(tsk_provenance_table_t *self, tsk_size_t num_rows)\n{\n    int ret = 0;\n\n    if (num_rows > self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);\n        goto out;\n    }\n    self->num_rows = num_rows;\n    self->timestamp_length = self->timestamp_offset[num_rows];\n    self->record_length = self->record_offset[num_rows];\nout:\n    return ret;\n}\n\nint\ntsk_provenance_table_extend(tsk_provenance_table_t *self,\n    const tsk_provenance_table_t *other, tsk_size_t num_rows,\n    const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_provenance_t provenance;\n\n    if (self == other) {\n        ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);\n        goto out;\n    }\n\n    /* We know how much to expand the non-ragged columns, so do it ahead of time */\n    ret = tsk_provenance_table_expand_main_columns(self, num_rows);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_rows; j++) {\n        ret = tsk_provenance_table_get_row(\n            other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &provenance);\n        if (ret != 0) {\n            goto out;\n        }\n        ret_id = tsk_provenance_table_add_row(self, provenance.timestamp,\n            provenance.timestamp_length, provenance.record, provenance.record_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nvoid\ntsk_provenance_table_print_state(const tsk_provenance_table_t *self, FILE *out)\n{\n    tsk_size_t j, k;\n\n    fprintf(out, \"\\n\" TABLE_SEP);\n    fprintf(out, \"provenance_table: %p:\\n\", (const void *) self);\n    fprintf(out, \"num_rows          = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->num_rows, (long long) self->max_rows,\n        (long long) self->max_rows_increment);\n    fprintf(out, \"timestamp_length  = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->timestamp_length, (long long) self->max_timestamp_length,\n        (long long) self->max_timestamp_length_increment);\n    fprintf(out, \"record_length = %lld\\tmax= %lld\\tincrement = %lld)\\n\",\n        (long long) self->record_length, (long long) self->max_record_length,\n        (long long) self->max_record_length_increment);\n    fprintf(out, TABLE_SEP);\n    fprintf(out, \"index\\ttimestamp_offset\\ttimestamp\\trecord_offset\\tprovenance\\n\");\n    for (j = 0; j < self->num_rows; j++) {\n        fprintf(\n            out, \"%lld\\t%lld\\t\", (long long) j, (long long) self->timestamp_offset[j]);\n        for (k = self->timestamp_offset[j]; k < self->timestamp_offset[j + 1]; k++) {\n            fprintf(out, \"%c\", self->timestamp[k]);\n        }\n        fprintf(out, \"\\t%lld\\t\", (long long) self->record_offset[j]);\n        for (k = self->record_offset[j]; k < self->record_offset[j + 1]; k++) {\n            fprintf(out, \"%c\", self->record[k]);\n        }\n        fprintf(out, \"\\n\");\n    }\n    tsk_bug_assert(self->timestamp_offset[0] == 0);\n    tsk_bug_assert(self->timestamp_offset[self->num_rows] == self->timestamp_length);\n    tsk_bug_assert(self->record_offset[0] == 0);\n    tsk_bug_assert(self->record_offset[self->num_rows] == self->record_length);\n}\n\nstatic inline void\ntsk_provenance_table_get_row_unsafe(\n    const tsk_provenance_table_t *self, tsk_id_t index, tsk_provenance_t *row)\n{\n    row->id = (tsk_id_t) index;\n    row->timestamp_length\n        = self->timestamp_offset[index + 1] - self->timestamp_offset[index];\n    row->timestamp = self->timestamp + self->timestamp_offset[index];\n    row->record_length = self->record_offset[index + 1] - self->record_offset[index];\n    row->record = self->record + self->record_offset[index];\n}\n\nint\ntsk_provenance_table_get_row(\n    const tsk_provenance_table_t *self, tsk_id_t index, tsk_provenance_t *row)\n{\n    int ret = 0;\n\n    if (index < 0 || index >= (tsk_id_t) self->num_rows) {\n        ret = tsk_trace_error(TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);\n        goto out;\n    }\n    tsk_provenance_table_get_row_unsafe(self, index, row);\nout:\n    return ret;\n}\n\nint\ntsk_provenance_table_dump_text(const tsk_provenance_table_t *self, FILE *out)\n{\n    int ret = TSK_ERR_IO;\n    int err;\n    tsk_size_t j, timestamp_len, record_len;\n\n    err = fprintf(out, \"record\\ttimestamp\\n\");\n    if (err < 0) {\n        goto out;\n    }\n    for (j = 0; j < self->num_rows; j++) {\n        record_len = self->record_offset[j + 1] - self->record_offset[j];\n        timestamp_len = self->timestamp_offset[j + 1] - self->timestamp_offset[j];\n        err = fprintf(out, \"%.*s\\t%.*s\\n\", (int) record_len,\n            self->record + self->record_offset[j], (int) timestamp_len,\n            self->timestamp + self->timestamp_offset[j]);\n        if (err < 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool\ntsk_provenance_table_equals(const tsk_provenance_table_t *self,\n    const tsk_provenance_table_t *other, tsk_flags_t options)\n{\n    bool ret\n        = self->num_rows == other->num_rows\n          && self->record_length == other->record_length\n          && tsk_memcmp(self->record_offset, other->record_offset,\n                 (self->num_rows + 1) * sizeof(tsk_size_t))\n                 == 0\n          && tsk_memcmp(self->record, other->record, self->record_length * sizeof(char))\n                 == 0;\n    if (!(options & TSK_CMP_IGNORE_TIMESTAMPS)) {\n        ret = ret && self->timestamp_length == other->timestamp_length\n              && tsk_memcmp(self->timestamp_offset, other->timestamp_offset,\n                     (self->num_rows + 1) * sizeof(tsk_size_t))\n                     == 0\n              && tsk_memcmp(self->timestamp, other->timestamp,\n                     self->timestamp_length * sizeof(char))\n                     == 0;\n    }\n    return ret;\n}\n\nint\ntsk_provenance_table_keep_rows(tsk_provenance_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)\n{\n    int ret = 0;\n\n    if (id_map != NULL) {\n        keep_mask_to_id_map(self->num_rows, keep, id_map);\n    }\n    self->timestamp_length = subset_ragged_char_column(\n        self->timestamp, self->timestamp_offset, self->num_rows, keep);\n    self->record_length = subset_ragged_char_column(\n        self->record, self->record_offset, self->num_rows, keep);\n    self->num_rows = count_true(self->num_rows, keep);\n\n    return ret;\n}\n\nstatic int\ntsk_provenance_table_dump(\n    const tsk_provenance_table_t *self, kastore_t *store, tsk_flags_t options)\n{\n    write_table_ragged_col_t ragged_cols[] = {\n        { \"provenances/timestamp\", (void *) self->timestamp, self->timestamp_length,\n            KAS_UINT8, self->timestamp_offset, self->num_rows },\n        { \"provenances/record\", (void *) self->record, self->record_length, KAS_UINT8,\n            self->record_offset, self->num_rows },\n        { .name = NULL },\n    };\n\n    return write_table_ragged_cols(store, ragged_cols, options);\n}\n\nstatic int\ntsk_provenance_table_load(tsk_provenance_table_t *self, kastore_t *store)\n{\n    int ret;\n    char *timestamp = NULL;\n    tsk_size_t *timestamp_offset = NULL;\n    char *record = NULL;\n    tsk_size_t *record_offset = NULL;\n    tsk_size_t num_rows, timestamp_length, record_length;\n\n    read_table_ragged_col_t ragged_cols[] = {\n        { \"provenances/timestamp\", (void **) &timestamp, &timestamp_length, KAS_UINT8,\n            &timestamp_offset, 0 },\n        { \"provenances/record\", (void **) &record, &record_length, KAS_UINT8,\n            &record_offset, 0 },\n        { .name = NULL },\n    };\n\n    ret = read_table(store, &num_rows, NULL, ragged_cols, NULL, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_takeset_columns(\n        self, num_rows, timestamp, timestamp_offset, record, record_offset);\n    if (ret != 0) {\n        goto out;\n    }\n    timestamp = NULL;\n    timestamp_offset = NULL;\n    record = NULL;\n    record_offset = NULL;\n\nout:\n    free_read_table_mem(NULL, ragged_cols, NULL);\n    return ret;\n}\n\n/*************************\n * sort_tables\n *************************/\n\ntypedef struct {\n    double left;\n    double right;\n    tsk_id_t parent;\n    tsk_id_t child;\n    double time;\n    /* It would be a little bit more convenient to store a pointer to the\n     * metadata here in the struct rather than an offset back into the\n     * original array. However, this would increase the size of the struct\n     * from 40 bytes to 48 and we will allocate very large numbers of these.\n     */\n    tsk_size_t metadata_offset;\n    tsk_size_t metadata_length;\n} edge_sort_t;\n\ntypedef struct {\n    tsk_mutation_t mut;\n    int num_descendants;\n    double node_time;\n} mutation_sort_t;\n\ntypedef struct {\n    tsk_individual_t ind;\n    tsk_id_t first_node;\n    tsk_size_t num_descendants;\n} individual_canonical_sort_t;\n\ntypedef struct {\n    double left;\n    double right;\n    tsk_id_t node;\n    tsk_id_t source;\n    tsk_id_t dest;\n    double time;\n    tsk_size_t metadata_offset;\n    tsk_size_t metadata_length;\n} migration_sort_t;\n\nstatic int\ncmp_site(const void *a, const void *b)\n{\n    const tsk_site_t *ia = (const tsk_site_t *) a;\n    const tsk_site_t *ib = (const tsk_site_t *) b;\n    /* Compare sites by position */\n    int ret = (ia->position > ib->position) - (ia->position < ib->position);\n    if (ret == 0) {\n        /* Within a particular position sort by ID.  This ensures that relative\n         * ordering of multiple sites at the same position is maintained; the\n         * redundant sites will get compacted down by clean_tables(), but in the\n         * meantime if the order of the redundant sites changes it will cause the\n         * sort order of mutations to be corrupted, as the mutations will follow\n         * their sites. */\n        ret = (ia->id > ib->id) - (ia->id < ib->id);\n    }\n    return ret;\n}\n\nstatic int\ncmp_mutation(const void *a, const void *b)\n{\n    const mutation_sort_t *ia = (const mutation_sort_t *) a;\n    const mutation_sort_t *ib = (const mutation_sort_t *) b;\n    /* Compare mutations by site */\n    int ret = (ia->mut.site > ib->mut.site) - (ia->mut.site < ib->mut.site);\n\n    /* Within a particular site sort by time if known */\n    if (ret == 0 && !tsk_is_unknown_time(ia->mut.time)\n        && !tsk_is_unknown_time(ib->mut.time)) {\n        ret = (ia->mut.time < ib->mut.time) - (ia->mut.time > ib->mut.time);\n    }\n    /* Or node times when mutation times are unknown or equal */\n    if (ret == 0) {\n        ret = (ia->node_time < ib->node_time) - (ia->node_time > ib->node_time);\n    }\n    /* If node times are equal, sort by number of descendants */\n    if (ret == 0) {\n        ret = (ia->num_descendants < ib->num_descendants)\n              - (ia->num_descendants > ib->num_descendants);\n    }\n    /* If number of descendants are equal, sort by node */\n    if (ret == 0) {\n        ret = (ia->mut.node > ib->mut.node) - (ia->mut.node < ib->mut.node);\n    }\n    /* Final tiebreaker: ID */\n    if (ret == 0) {\n        ret = (ia->mut.id > ib->mut.id) - (ia->mut.id < ib->mut.id);\n    }\n    return ret;\n}\n\nstatic int\ncmp_individual_canonical(const void *a, const void *b)\n{\n    const individual_canonical_sort_t *ia = (const individual_canonical_sort_t *) a;\n    const individual_canonical_sort_t *ib = (const individual_canonical_sort_t *) b;\n    int ret = (ia->num_descendants < ib->num_descendants)\n              - (ia->num_descendants > ib->num_descendants);\n    if (ret == 0) {\n        ret = (ia->first_node > ib->first_node) - (ia->first_node < ib->first_node);\n    }\n    if (ret == 0) {\n        ret = (ia->ind.id > ib->ind.id) - (ia->ind.id < ib->ind.id);\n    }\n    return ret;\n}\n\nstatic int\ncmp_edge(const void *a, const void *b)\n{\n    const edge_sort_t *ca = (const edge_sort_t *) a;\n    const edge_sort_t *cb = (const edge_sort_t *) b;\n\n    int ret = (ca->time > cb->time) - (ca->time < cb->time);\n    /* If time values are equal, sort by the parent node */\n    if (ret == 0) {\n        ret = (ca->parent > cb->parent) - (ca->parent < cb->parent);\n        /* If the parent nodes are equal, sort by the child ID. */\n        if (ret == 0) {\n            ret = (ca->child > cb->child) - (ca->child < cb->child);\n            /* If the child nodes are equal, sort by the left coordinate. */\n            if (ret == 0) {\n                ret = (ca->left > cb->left) - (ca->left < cb->left);\n            }\n        }\n    }\n    return ret;\n}\n\nstatic int\ncmp_migration(const void *a, const void *b)\n{\n    const migration_sort_t *ca = (const migration_sort_t *) a;\n    const migration_sort_t *cb = (const migration_sort_t *) b;\n\n    int ret = (ca->time > cb->time) - (ca->time < cb->time);\n    /* If time values are equal, sort by the source population */\n    if (ret == 0) {\n        ret = (ca->source > cb->source) - (ca->source < cb->source);\n        /* If the source populations are equal, sort by the dest */\n        if (ret == 0) {\n            ret = (ca->dest > cb->dest) - (ca->dest < cb->dest);\n            /* If the dest populations are equal, sort by the left coordinate. */\n            if (ret == 0) {\n                ret = (ca->left > cb->left) - (ca->left < cb->left);\n                /* If everything else is equal, compare by node */\n                if (ret == 0) {\n                    ret = (ca->node > cb->node) - (ca->node < cb->node);\n                }\n            }\n        }\n    }\n    return ret;\n}\n\nstatic int\ntsk_table_sorter_sort_edges(tsk_table_sorter_t *self, tsk_size_t start)\n{\n    int ret = 0;\n    const tsk_edge_table_t *edges = &self->tables->edges;\n    const double *restrict node_time = self->tables->nodes.time;\n    edge_sort_t *e;\n    tsk_size_t j, k, metadata_offset;\n    tsk_size_t n = edges->num_rows - start;\n    edge_sort_t *sorted_edges = tsk_malloc(n * sizeof(*sorted_edges));\n    char *old_metadata = tsk_malloc(edges->metadata_length);\n    bool has_metadata = tsk_edge_table_has_metadata(edges);\n\n    if (sorted_edges == NULL || old_metadata == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memcpy(old_metadata, edges->metadata, edges->metadata_length);\n    for (j = 0; j < n; j++) {\n        e = sorted_edges + j;\n        k = start + j;\n        e->left = edges->left[k];\n        e->right = edges->right[k];\n        e->parent = edges->parent[k];\n        e->child = edges->child[k];\n        e->time = node_time[e->parent];\n        if (has_metadata) {\n            e->metadata_offset = edges->metadata_offset[k];\n            e->metadata_length\n                = edges->metadata_offset[k + 1] - edges->metadata_offset[k];\n        }\n    }\n    qsort(sorted_edges, (size_t) n, sizeof(edge_sort_t), cmp_edge);\n    /* Copy the edges back into the table. */\n    metadata_offset = 0;\n    for (j = 0; j < n; j++) {\n        e = sorted_edges + j;\n        k = start + j;\n        edges->left[k] = e->left;\n        edges->right[k] = e->right;\n        edges->parent[k] = e->parent;\n        edges->child[k] = e->child;\n        if (has_metadata) {\n            tsk_memcpy(edges->metadata + metadata_offset,\n                old_metadata + e->metadata_offset, e->metadata_length);\n            edges->metadata_offset[k] = metadata_offset;\n            metadata_offset += e->metadata_length;\n        }\n    }\nout:\n    tsk_safe_free(sorted_edges);\n    tsk_safe_free(old_metadata);\n    return ret;\n}\n\nstatic int\ntsk_table_sorter_sort_migrations(tsk_table_sorter_t *self, tsk_size_t start)\n{\n    int ret = 0;\n    const tsk_migration_table_t *migrations = &self->tables->migrations;\n    migration_sort_t *m;\n    tsk_size_t j, k, metadata_offset;\n    tsk_size_t n = migrations->num_rows - start;\n    migration_sort_t *sorted_migrations = tsk_malloc(n * sizeof(*sorted_migrations));\n    char *old_metadata = tsk_malloc(migrations->metadata_length);\n\n    if (sorted_migrations == NULL || old_metadata == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memcpy(old_metadata, migrations->metadata, migrations->metadata_length);\n    for (j = 0; j < n; j++) {\n        m = sorted_migrations + j;\n        k = start + j;\n        m->left = migrations->left[k];\n        m->right = migrations->right[k];\n        m->node = migrations->node[k];\n        m->source = migrations->source[k];\n        m->dest = migrations->dest[k];\n        m->time = migrations->time[k];\n        m->metadata_offset = migrations->metadata_offset[k];\n        m->metadata_length\n            = migrations->metadata_offset[k + 1] - migrations->metadata_offset[k];\n    }\n    qsort(sorted_migrations, (size_t) n, sizeof(migration_sort_t), cmp_migration);\n    /* Copy the migrations back into the table. */\n    metadata_offset = 0;\n    for (j = 0; j < n; j++) {\n        m = sorted_migrations + j;\n        k = start + j;\n        migrations->left[k] = m->left;\n        migrations->right[k] = m->right;\n        migrations->node[k] = m->node;\n        migrations->source[k] = m->source;\n        migrations->dest[k] = m->dest;\n        migrations->time[k] = m->time;\n        tsk_memcpy(migrations->metadata + metadata_offset,\n            old_metadata + m->metadata_offset, m->metadata_length);\n        migrations->metadata_offset[k] = metadata_offset;\n        metadata_offset += m->metadata_length;\n    }\nout:\n    tsk_safe_free(sorted_migrations);\n    tsk_safe_free(old_metadata);\n    return ret;\n}\n\nstatic int\ntsk_table_sorter_sort_sites(tsk_table_sorter_t *self)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_site_table_t *sites = &self->tables->sites;\n    tsk_site_table_t copy;\n    tsk_size_t j;\n    tsk_size_t num_sites = sites->num_rows;\n    tsk_site_t *sorted_sites = tsk_malloc(num_sites * sizeof(*sorted_sites));\n\n    ret = tsk_site_table_copy(sites, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (sorted_sites == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (j = 0; j < num_sites; j++) {\n        tsk_site_table_get_row_unsafe(&copy, (tsk_id_t) j, sorted_sites + j);\n    }\n\n    /* Sort the sites by position */\n    qsort(sorted_sites, (size_t) num_sites, sizeof(*sorted_sites), cmp_site);\n\n    /* Build the mapping from old site IDs to new site IDs and copy back into the\n     * table\n     */\n    tsk_site_table_clear(sites);\n    for (j = 0; j < num_sites; j++) {\n        self->site_id_map[sorted_sites[j].id] = (tsk_id_t) j;\n        ret_id = tsk_site_table_add_row(sites, sorted_sites[j].position,\n            sorted_sites[j].ancestral_state, sorted_sites[j].ancestral_state_length,\n            sorted_sites[j].metadata, sorted_sites[j].metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    tsk_safe_free(sorted_sites);\n    tsk_site_table_free(&copy);\n    return ret;\n}\n\nstatic int\ntsk_table_sorter_sort_mutations(tsk_table_sorter_t *self)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t ret_id, parent, mapped_parent, p;\n    tsk_mutation_table_t *mutations = &self->tables->mutations;\n    tsk_node_table_t *nodes = &self->tables->nodes;\n    tsk_size_t num_mutations = mutations->num_rows;\n    tsk_mutation_table_t copy;\n    mutation_sort_t *sorted_mutations\n        = tsk_malloc(num_mutations * sizeof(*sorted_mutations));\n    tsk_id_t *mutation_id_map = tsk_malloc(num_mutations * sizeof(*mutation_id_map));\n\n    ret = tsk_mutation_table_copy(mutations, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (mutation_id_map == NULL || sorted_mutations == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    /* compute numbers of descendants for each mutation */\n    for (j = 0; j < num_mutations; j++) {\n        sorted_mutations[j].num_descendants = 0;\n    }\n    for (j = 0; j < num_mutations; j++) {\n        p = mutations->parent[j];\n        while (p != TSK_NULL) {\n            sorted_mutations[p].num_descendants += 1;\n            if (sorted_mutations[p].num_descendants > (int) num_mutations) {\n                ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_INCONSISTENT);\n                goto out;\n            }\n            p = mutations->parent[p];\n        }\n    }\n\n    for (j = 0; j < num_mutations; j++) {\n        tsk_mutation_table_get_row_unsafe(&copy, (tsk_id_t) j, &sorted_mutations[j].mut);\n        sorted_mutations[j].mut.site = self->site_id_map[sorted_mutations[j].mut.site];\n        sorted_mutations[j].node_time = nodes->time[sorted_mutations[j].mut.node];\n    }\n    ret = tsk_mutation_table_clear(mutations);\n    if (ret != 0) {\n        goto out;\n    }\n\n    qsort(sorted_mutations, (size_t) num_mutations, sizeof(*sorted_mutations),\n        cmp_mutation);\n\n    /* Make a first pass through the sorted mutations to build the ID map. */\n    for (j = 0; j < num_mutations; j++) {\n        mutation_id_map[sorted_mutations[j].mut.id] = (tsk_id_t) j;\n    }\n\n    for (j = 0; j < num_mutations; j++) {\n        mapped_parent = TSK_NULL;\n        parent = sorted_mutations[j].mut.parent;\n        if (parent != TSK_NULL) {\n            mapped_parent = mutation_id_map[parent];\n        }\n        ret_id = tsk_mutation_table_add_row(mutations, sorted_mutations[j].mut.site,\n            sorted_mutations[j].mut.node, mapped_parent, sorted_mutations[j].mut.time,\n            sorted_mutations[j].mut.derived_state,\n            sorted_mutations[j].mut.derived_state_length,\n            sorted_mutations[j].mut.metadata, sorted_mutations[j].mut.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\n\nout:\n    tsk_safe_free(mutation_id_map);\n    tsk_safe_free(sorted_mutations);\n    tsk_mutation_table_free(&copy);\n    return ret;\n}\n\nstatic int\ntsk_individual_table_topological_sort(\n    tsk_individual_table_t *self, tsk_id_t *traversal_order, tsk_size_t *num_descendants)\n{\n    int ret = 0;\n    tsk_id_t i, j, p;\n    tsk_individual_t individual;\n    tsk_size_t num_individuals = self->num_rows;\n    tsk_size_t current_todo = 0;\n    tsk_size_t todo_insertion_point = 0;\n    tsk_size_t *incoming_edge_count\n        = tsk_malloc(num_individuals * sizeof(*incoming_edge_count));\n    bool count_descendants = (num_descendants != NULL);\n\n    if (incoming_edge_count == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (i = 0; i < (tsk_id_t) num_individuals; i++) {\n        incoming_edge_count[i] = 0;\n        traversal_order[i] = TSK_NULL;\n        if (count_descendants) {\n            num_descendants[i] = 0;\n        }\n    }\n\n    /* First find the set of individuals that have no children by creating\n     * an array of incoming edge counts */\n    for (i = 0; i < (tsk_id_t) self->parents_length; i++) {\n        if (self->parents[i] != TSK_NULL) {\n            incoming_edge_count[self->parents[i]]++;\n        }\n    }\n    /* Use these as the starting points for checking all individuals,\n     * doing this in reverse makes the sort stable */\n    for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {\n        if (incoming_edge_count[i] == 0) {\n            traversal_order[todo_insertion_point] = i;\n            todo_insertion_point++;\n        }\n    }\n\n    /* Now process individuals from the set that have no children, updating their\n     * parents' information as we go, and adding their parents to the list if\n     * this was their last child */\n    while (current_todo < todo_insertion_point) {\n        j = traversal_order[current_todo];\n        tsk_individual_table_get_row_unsafe(self, j, &individual);\n        for (i = 0; i < (tsk_id_t) individual.parents_length; i++) {\n            p = individual.parents[i];\n            if (p != TSK_NULL) {\n                incoming_edge_count[p]--;\n                if (count_descendants) {\n                    num_descendants[p] += 1 + num_descendants[j];\n                }\n                if (incoming_edge_count[p] == 0) {\n                    traversal_order[todo_insertion_point] = p;\n                    todo_insertion_point++;\n                }\n            }\n        }\n        current_todo++;\n    }\n\n    /* Any edges left are parts of cycles */\n    for (i = 0; i < (tsk_id_t) num_individuals; i++) {\n        if (incoming_edge_count[i] > 0) {\n            ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_PARENT_CYCLE);\n            goto out;\n        }\n    }\n\nout:\n    tsk_safe_free(incoming_edge_count);\n    return ret;\n}\n\nint\ntsk_table_collection_individual_topological_sort(\n    tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t i, ret_id;\n    tsk_individual_table_t copy;\n    tsk_individual_t individual;\n    tsk_individual_table_t *individuals = &self->individuals;\n    tsk_node_table_t *nodes = &self->nodes;\n    tsk_size_t num_individuals = individuals->num_rows;\n    tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));\n    tsk_id_t *new_id_map = tsk_malloc(num_individuals * sizeof(*new_id_map));\n\n    if (new_id_map == NULL || traversal_order == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(new_id_map, 0xff, num_individuals * sizeof(*new_id_map));\n\n    ret = tsk_individual_table_copy(individuals, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret_id = tsk_table_collection_check_integrity(self, 0);\n    if (ret_id != 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n\n    ret = tsk_individual_table_clear(individuals);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_individual_table_topological_sort(&copy, traversal_order, NULL);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* The sorted individuals are in reverse order */\n    for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {\n        tsk_individual_table_get_row_unsafe(&copy, traversal_order[i], &individual);\n        ret_id = tsk_individual_table_add_row(individuals, individual.flags,\n            individual.location, individual.location_length, individual.parents,\n            individual.parents_length, individual.metadata, individual.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n        new_id_map[traversal_order[i]] = ret_id;\n    }\n\n    /* Rewrite the parent ids */\n    for (i = 0; i < (tsk_id_t) individuals->parents_length; i++) {\n        if (individuals->parents[i] != TSK_NULL) {\n            individuals->parents[i] = new_id_map[individuals->parents[i]];\n        }\n    }\n    /* Rewrite the node individual ids */\n    for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {\n        if (nodes->individual[i] != TSK_NULL) {\n            nodes->individual[i] = new_id_map[nodes->individual[i]];\n        }\n    }\n\n    ret = 0;\nout:\n    tsk_safe_free(traversal_order);\n    tsk_safe_free(new_id_map);\n    tsk_individual_table_free(&copy);\n    return ret;\n}\n\nstatic int\ntsk_table_sorter_sort_individuals_canonical(tsk_table_sorter_t *self)\n{\n    int ret = 0;\n    tsk_id_t ret_id, i, j, parent, mapped_parent;\n    tsk_individual_table_t *individuals = &self->tables->individuals;\n    tsk_node_table_t *nodes = &self->tables->nodes;\n    tsk_individual_table_t copy;\n    tsk_size_t num_individuals = individuals->num_rows;\n    individual_canonical_sort_t *sorted_individuals\n        = tsk_malloc(num_individuals * sizeof(*sorted_individuals));\n    tsk_id_t *individual_id_map\n        = tsk_malloc(num_individuals * sizeof(*individual_id_map));\n    tsk_size_t *num_descendants = tsk_malloc(num_individuals * sizeof(*num_descendants));\n    tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));\n\n    if (individual_id_map == NULL || sorted_individuals == NULL\n        || traversal_order == NULL || num_descendants == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_individual_table_copy(individuals, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_clear(individuals);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_individual_table_topological_sort(&copy, traversal_order, num_descendants);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (i = 0; i < (tsk_id_t) num_individuals; i++) {\n        sorted_individuals[i].num_descendants = num_descendants[i];\n        sorted_individuals[i].first_node = (tsk_id_t) nodes->num_rows;\n    }\n\n    /* find first referring node */\n    for (j = 0; j < (tsk_id_t) nodes->num_rows; j++) {\n        if (nodes->individual[j] != TSK_NULL) {\n            sorted_individuals[nodes->individual[j]].first_node\n                = TSK_MIN(j, sorted_individuals[nodes->individual[j]].first_node);\n        }\n    }\n\n    for (j = 0; j < (tsk_id_t) num_individuals; j++) {\n        tsk_individual_table_get_row_unsafe(\n            &copy, (tsk_id_t) j, &sorted_individuals[j].ind);\n    }\n\n    qsort(sorted_individuals, (size_t) num_individuals, sizeof(*sorted_individuals),\n        cmp_individual_canonical);\n\n    /* Make a first pass through the sorted individuals to build the ID map. */\n    for (j = 0; j < (tsk_id_t) num_individuals; j++) {\n        individual_id_map[sorted_individuals[j].ind.id] = (tsk_id_t) j;\n    }\n\n    for (i = 0; i < (tsk_id_t) num_individuals; i++) {\n        for (j = 0; j < (tsk_id_t) sorted_individuals[i].ind.parents_length; j++) {\n            parent = sorted_individuals[i].ind.parents[j];\n            if (parent != TSK_NULL) {\n                mapped_parent = individual_id_map[parent];\n                sorted_individuals[i].ind.parents[j] = mapped_parent;\n            }\n        }\n        ret_id = tsk_individual_table_add_row(individuals,\n            sorted_individuals[i].ind.flags, sorted_individuals[i].ind.location,\n            sorted_individuals[i].ind.location_length, sorted_individuals[i].ind.parents,\n            sorted_individuals[i].ind.parents_length, sorted_individuals[i].ind.metadata,\n            sorted_individuals[i].ind.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    ret = 0;\n\n    /* remap individuals in the node table */\n    for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {\n        j = nodes->individual[i];\n        if (j != TSK_NULL) {\n            nodes->individual[i] = individual_id_map[j];\n        }\n    }\n\nout:\n    tsk_safe_free(sorted_individuals);\n    tsk_safe_free(individual_id_map);\n    tsk_safe_free(traversal_order);\n    tsk_safe_free(num_descendants);\n    tsk_individual_table_free(&copy);\n    return ret;\n}\n\nint\ntsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)\n{\n    int ret = 0;\n    tsk_size_t edge_start = 0;\n    tsk_size_t migration_start = 0;\n    bool skip_sites = false;\n    bool skip_individuals = false;\n\n    if (start != NULL) {\n        if (start->edges > self->tables->edges.num_rows) {\n            ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        edge_start = start->edges;\n        if (start->migrations > self->tables->migrations.num_rows) {\n            ret = tsk_trace_error(TSK_ERR_MIGRATION_OUT_OF_BOUNDS);\n            goto out;\n        }\n        migration_start = start->migrations;\n\n        /* We only allow sites and mutations to be specified as a way to\n         * skip sorting them entirely. Both sites and mutations must be\n         * equal to the number of rows */\n        if (start->sites == self->tables->sites.num_rows\n            && start->mutations == self->tables->mutations.num_rows) {\n            skip_sites = true;\n        } else if (start->sites != 0 || start->mutations != 0) {\n            ret = tsk_trace_error(TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);\n            goto out;\n        }\n    }\n    /* The indexes will be invalidated, so drop them */\n    ret = tsk_table_collection_drop_index(self->tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (self->sort_edges != NULL) {\n        ret = self->sort_edges(self, edge_start);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    /* Avoid calling sort_migrations in the common case when it's a no-op */\n    if (self->tables->migrations.num_rows > 0) {\n        ret = tsk_table_sorter_sort_migrations(self, migration_start);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (!skip_sites) {\n        ret = tsk_table_sorter_sort_sites(self);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = self->sort_mutations(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (!skip_individuals && self->sort_individuals != NULL) {\n        ret = self->sort_individuals(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_table_sorter_init(\n    tsk_table_sorter_t *self, tsk_table_collection_t *tables, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n\n    tsk_memset(self, 0, sizeof(tsk_table_sorter_t));\n    if (!(options & TSK_NO_CHECK_INTEGRITY)) {\n        ret_id = tsk_table_collection_check_integrity(tables, 0);\n        if (ret_id != 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n    self->tables = tables;\n\n    self->site_id_map = tsk_malloc(self->tables->sites.num_rows * sizeof(tsk_id_t));\n    if (self->site_id_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    /* Set the sort_edges and sort_mutations methods to the default. */\n    self->sort_edges = tsk_table_sorter_sort_edges;\n    self->sort_mutations = tsk_table_sorter_sort_mutations;\n    /* Default sort doesn't touch individuals */\n    self->sort_individuals = NULL;\nout:\n    return ret;\n}\n\nint\ntsk_table_sorter_free(tsk_table_sorter_t *self)\n{\n    tsk_safe_free(self->site_id_map);\n    return 0;\n}\n\n/*************************\n * segment overlapper\n *************************/\n\ntypedef struct _interval_list_t {\n    double left;\n    double right;\n    struct _interval_list_t *next;\n} interval_list_t;\n\ntypedef struct _mutation_id_list_t {\n    tsk_id_t mutation;\n    struct _mutation_id_list_t *next;\n} mutation_id_list_t;\n\ntypedef struct _tsk_segment_t {\n    double left;\n    double right;\n    struct _tsk_segment_t *next;\n    tsk_id_t node;\n} tsk_segment_t;\n\n/* segment overlap finding algorithm */\ntypedef struct {\n    /* The input segments. This buffer is sorted by the algorithm and we also\n     * assume that there is space for an extra element at the end */\n    tsk_segment_t *segments;\n    tsk_size_t num_segments;\n    tsk_size_t index;\n    tsk_size_t num_overlapping;\n    double left;\n    double right;\n    /* Output buffer */\n    tsk_size_t max_overlapping;\n    tsk_segment_t **overlapping;\n} segment_overlapper_t;\n\ntypedef struct {\n    tsk_size_t num_samples;\n    tsk_flags_t options;\n    tsk_table_collection_t *tables;\n    /* Keep a copy of the input tables */\n    tsk_table_collection_t input_tables;\n    /* State for topology */\n    tsk_segment_t **ancestor_map_head;\n    tsk_segment_t **ancestor_map_tail;\n    /* Mapping of input node IDs to output node IDs. */\n    tsk_id_t *node_id_map;\n    bool *is_sample;\n    /* Segments for a particular parent that are processed together */\n    tsk_segment_t *segment_queue;\n    tsk_size_t segment_queue_size;\n    tsk_size_t max_segment_queue_size;\n    segment_overlapper_t segment_overlapper;\n    tsk_blkalloc_t segment_heap;\n    /* Buffer for output edges. For each child we keep a linked list of\n     * intervals, and also store the actual children that have been buffered. */\n    tsk_blkalloc_t interval_list_heap;\n    interval_list_t **child_edge_map_head;\n    interval_list_t **child_edge_map_tail;\n    tsk_id_t *buffered_children;\n    tsk_size_t num_buffered_children;\n    /* For each mutation, map its output node. */\n    tsk_id_t *mutation_node_map;\n    /* Map of input nodes to the list of input mutation IDs */\n    mutation_id_list_t **node_mutation_list_map_head;\n    mutation_id_list_t **node_mutation_list_map_tail;\n    mutation_id_list_t *node_mutation_list_mem;\n    /* When reducing topology, we need a map positions to their corresponding\n     * sites.*/\n    double *position_lookup;\n    int64_t edge_sort_offset;\n} simplifier_t;\n\nstatic int\ncmp_segment(const void *a, const void *b)\n{\n    const tsk_segment_t *ia = (const tsk_segment_t *) a;\n    const tsk_segment_t *ib = (const tsk_segment_t *) b;\n    int ret = (ia->left > ib->left) - (ia->left < ib->left);\n    /* Break ties using the node */\n    if (ret == 0) {\n        ret = (ia->node > ib->node) - (ia->node < ib->node);\n    }\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsegment_overlapper_alloc(segment_overlapper_t *self)\n{\n    int ret = 0;\n\n    tsk_memset(self, 0, sizeof(*self));\n    self->max_overlapping = 8; /* Making sure we call tsk_realloc in tests */\n    self->overlapping = tsk_malloc(self->max_overlapping * sizeof(*self->overlapping));\n    if (self->overlapping == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nsegment_overlapper_free(segment_overlapper_t *self)\n{\n    tsk_safe_free(self->overlapping);\n    return 0;\n}\n\n/* Initialise the segment overlapper for use. Note that the segments\n * array must have space for num_segments + 1 elements!\n */\nstatic int TSK_WARN_UNUSED\nsegment_overlapper_start(\n    segment_overlapper_t *self, tsk_segment_t *segments, tsk_size_t num_segments)\n{\n    int ret = 0;\n    tsk_segment_t *sentinel;\n    void *p;\n\n    if (self->max_overlapping < num_segments) {\n        self->max_overlapping = num_segments;\n        p = tsk_realloc(\n            self->overlapping, self->max_overlapping * sizeof(*self->overlapping));\n        if (p == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->overlapping = p;\n    }\n    self->segments = segments;\n    self->num_segments = num_segments;\n    self->index = 0;\n    self->num_overlapping = 0;\n    self->left = 0;\n    self->right = DBL_MAX;\n\n    /* Sort the segments in the buffer by left coordinate */\n    qsort(\n        self->segments, (size_t) self->num_segments, sizeof(tsk_segment_t), cmp_segment);\n    /* NOTE! We are assuming that there's space for another element on the end\n     * here. This is to insert a sentinel which simplifies the logic. */\n    sentinel = self->segments + self->num_segments;\n    sentinel->left = DBL_MAX;\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsegment_overlapper_next(segment_overlapper_t *self, double *left, double *right,\n    tsk_segment_t ***overlapping, tsk_size_t *num_overlapping)\n{\n    int ret = 0;\n    tsk_size_t j, k;\n    tsk_size_t n = self->num_segments;\n    tsk_segment_t *S = self->segments;\n\n    if (self->index < n) {\n        self->left = self->right;\n        /* Remove any elements of X with right <= left */\n        k = 0;\n        for (j = 0; j < self->num_overlapping; j++) {\n            if (self->overlapping[j]->right > self->left) {\n                self->overlapping[k] = self->overlapping[j];\n                k++;\n            }\n        }\n        self->num_overlapping = k;\n        if (k == 0) {\n            self->left = S[self->index].left;\n        }\n        while (self->index < n && S[self->index].left == self->left) {\n            tsk_bug_assert(self->num_overlapping < self->max_overlapping);\n            self->overlapping[self->num_overlapping] = &S[self->index];\n            self->num_overlapping++;\n            self->index++;\n        }\n        self->index--;\n        self->right = S[self->index + 1].left;\n        for (j = 0; j < self->num_overlapping; j++) {\n            self->right = TSK_MIN(self->right, self->overlapping[j]->right);\n        }\n        tsk_bug_assert(self->left < self->right);\n        self->index++;\n        ret = 1;\n    } else {\n        self->left = self->right;\n        self->right = DBL_MAX;\n        k = 0;\n        for (j = 0; j < self->num_overlapping; j++) {\n            if (self->overlapping[j]->right > self->left) {\n                self->right = TSK_MIN(self->right, self->overlapping[j]->right);\n                self->overlapping[k] = self->overlapping[j];\n                k++;\n            }\n        }\n        self->num_overlapping = k;\n        if (k > 0) {\n            ret = 1;\n        }\n    }\n\n    *left = self->left;\n    *right = self->right;\n    *overlapping = self->overlapping;\n    *num_overlapping = self->num_overlapping;\n    return ret;\n}\n\nstatic int\ncmp_node_id(const void *a, const void *b)\n{\n    const tsk_id_t *ia = (const tsk_id_t *) a;\n    const tsk_id_t *ib = (const tsk_id_t *) b;\n    return (*ia > *ib) - (*ia < *ib);\n}\n\n/*************************\n * Ancestor mapper\n *************************/\n\n/* NOTE: this struct shares a lot with the simplifier_t, mostly in\n * terms of infrastructure for managing the list of intervals, saving\n * edges etc. We should try to abstract the common functionality out\n * into a separate class, which handles this.\n */\ntypedef struct {\n    tsk_id_t *samples;\n    tsk_size_t num_samples;\n    tsk_id_t *ancestors;\n    tsk_size_t num_ancestors;\n    tsk_table_collection_t *tables;\n    tsk_edge_table_t *result;\n    tsk_segment_t **ancestor_map_head;\n    tsk_segment_t **ancestor_map_tail;\n    bool *is_sample;\n    bool *is_ancestor;\n    tsk_segment_t *segment_queue;\n    tsk_size_t segment_queue_size;\n    tsk_size_t max_segment_queue_size;\n    segment_overlapper_t segment_overlapper;\n    tsk_blkalloc_t segment_heap;\n    tsk_blkalloc_t interval_list_heap;\n    interval_list_t **child_edge_map_head;\n    interval_list_t **child_edge_map_tail;\n    tsk_id_t *buffered_children;\n    tsk_size_t num_buffered_children;\n    double sequence_length;\n    double oldest_node_time;\n} ancestor_mapper_t;\n\nstatic tsk_segment_t *TSK_WARN_UNUSED\nancestor_mapper_alloc_segment(\n    ancestor_mapper_t *self, double left, double right, tsk_id_t node)\n{\n    tsk_segment_t *seg = NULL;\n\n    seg = tsk_blkalloc_get(&self->segment_heap, sizeof(*seg));\n    if (seg == NULL) {\n        goto out;\n    }\n    seg->next = NULL;\n    seg->left = left;\n    seg->right = right;\n    seg->node = node;\nout:\n    return seg;\n}\n\nstatic interval_list_t *TSK_WARN_UNUSED\nancestor_mapper_alloc_interval_list(ancestor_mapper_t *self, double left, double right)\n{\n    interval_list_t *x = NULL;\n\n    x = tsk_blkalloc_get(&self->interval_list_heap, sizeof(*x));\n    if (x == NULL) {\n        goto out;\n    }\n    x->next = NULL;\n    x->left = left;\n    x->right = right;\nout:\n    return x;\n}\n\nstatic int\nancestor_mapper_flush_edges(\n    ancestor_mapper_t *self, tsk_id_t parent, tsk_size_t *ret_num_edges)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_id_t child;\n    interval_list_t *x;\n    tsk_size_t num_edges = 0;\n\n    qsort(self->buffered_children, (size_t) self->num_buffered_children,\n        sizeof(tsk_id_t), cmp_node_id);\n    for (j = 0; j < self->num_buffered_children; j++) {\n        child = self->buffered_children[j];\n        for (x = self->child_edge_map_head[child]; x != NULL; x = x->next) {\n            ret_id = tsk_edge_table_add_row(\n                self->result, x->left, x->right, parent, child, NULL, 0);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            num_edges++;\n        }\n        self->child_edge_map_head[child] = NULL;\n        self->child_edge_map_tail[child] = NULL;\n    }\n    self->num_buffered_children = 0;\n    *ret_num_edges = num_edges;\n    ret = tsk_blkalloc_reset(&self->interval_list_heap);\nout:\n    return ret;\n}\n\nstatic int\nancestor_mapper_record_edge(\n    ancestor_mapper_t *self, double left, double right, tsk_id_t child)\n{\n    int ret = 0;\n    interval_list_t *tail, *x;\n\n    tail = self->child_edge_map_tail[child];\n    if (tail == NULL) {\n        tsk_bug_assert(self->num_buffered_children < self->tables->nodes.num_rows);\n        self->buffered_children[self->num_buffered_children] = child;\n        self->num_buffered_children++;\n        x = ancestor_mapper_alloc_interval_list(self, left, right);\n        if (x == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->child_edge_map_head[child] = x;\n        self->child_edge_map_tail[child] = x;\n    } else {\n        if (tail->right == left) {\n            tail->right = right;\n        } else {\n            x = ancestor_mapper_alloc_interval_list(self, left, right);\n            if (x == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            tail->next = x;\n            self->child_edge_map_tail[child] = x;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nancestor_mapper_add_ancestry(ancestor_mapper_t *self, tsk_id_t input_id, double left,\n    double right, tsk_id_t output_id)\n{\n    int ret = 0;\n    tsk_segment_t *tail = self->ancestor_map_tail[input_id];\n    tsk_segment_t *x;\n\n    tsk_bug_assert(left < right);\n    if (tail == NULL) {\n        x = ancestor_mapper_alloc_segment(self, left, right, output_id);\n        if (x == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->ancestor_map_head[input_id] = x;\n        self->ancestor_map_tail[input_id] = x;\n    } else {\n        if (tail->right == left && tail->node == output_id) {\n            tail->right = right;\n        } else {\n            x = ancestor_mapper_alloc_segment(self, left, right, output_id);\n            if (x == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            tail->next = x;\n            self->ancestor_map_tail[input_id] = x;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic void\nancestor_mapper_find_oldest_node(ancestor_mapper_t *self)\n{\n    const double *node_time = self->tables->nodes.time;\n    tsk_size_t j;\n    double max_time = -1;\n\n    for (j = 0; j < self->num_ancestors; j++) {\n        max_time = TSK_MAX(max_time, node_time[self->ancestors[j]]);\n    }\n    for (j = 0; j < self->num_samples; j++) {\n        max_time = TSK_MAX(max_time, node_time[self->samples[j]]);\n    }\n\n    self->oldest_node_time = max_time;\n}\n\nstatic int\nancestor_mapper_init_samples(ancestor_mapper_t *self, tsk_id_t *samples)\n{\n    int ret = 0;\n    tsk_size_t j;\n\n    /* Go through the samples to check for errors. */\n    for (j = 0; j < self->num_samples; j++) {\n        if (samples[j] < 0 || samples[j] > (tsk_id_t) self->tables->nodes.num_rows) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (self->is_sample[samples[j]]) {\n            ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n            goto out;\n        }\n        self->is_sample[samples[j]] = true;\n        ret = ancestor_mapper_add_ancestry(\n            self, samples[j], 0, self->tables->sequence_length, samples[j]);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\nancestor_mapper_init_ancestors(ancestor_mapper_t *self, tsk_id_t *ancestors)\n{\n    int ret = 0;\n    tsk_size_t j;\n\n    /* Go through the samples to check for errors. */\n    for (j = 0; j < self->num_ancestors; j++) {\n        if (ancestors[j] < 0 || ancestors[j] > (tsk_id_t) self->tables->nodes.num_rows) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (self->is_ancestor[ancestors[j]]) {\n            ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n            goto out;\n        }\n        self->is_ancestor[ancestors[j]] = true;\n    }\nout:\n    return ret;\n}\n\nstatic int\nancestor_mapper_init(ancestor_mapper_t *self, tsk_id_t *samples, tsk_size_t num_samples,\n    tsk_id_t *ancestors, tsk_size_t num_ancestors, tsk_table_collection_t *tables,\n    tsk_edge_table_t *result)\n{\n    int ret = 0;\n    tsk_size_t num_nodes;\n\n    tsk_memset(self, 0, sizeof(ancestor_mapper_t));\n    self->num_samples = num_samples;\n    self->num_ancestors = num_ancestors;\n    self->samples = samples;\n    self->ancestors = ancestors;\n    self->tables = tables;\n    self->result = result;\n    self->sequence_length = self->tables->sequence_length;\n\n    if (samples == NULL || num_samples == 0 || ancestors == NULL || num_ancestors == 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    /* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size\n     */\n    ret = tsk_blkalloc_init(&self->segment_heap, 8192);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_blkalloc_init(&self->interval_list_heap, 8192);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = segment_overlapper_alloc(&self->segment_overlapper);\n    if (ret != 0) {\n        goto out;\n    }\n\n    num_nodes = tables->nodes.num_rows;\n    /* Make the maps and set the intial state */\n    self->ancestor_map_head = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));\n    self->ancestor_map_tail = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));\n    self->child_edge_map_head = tsk_calloc(num_nodes, sizeof(interval_list_t *));\n    self->child_edge_map_tail = tsk_calloc(num_nodes, sizeof(interval_list_t *));\n    self->buffered_children = tsk_malloc(num_nodes * sizeof(tsk_id_t));\n    self->is_sample = tsk_calloc(num_nodes, sizeof(bool));\n    self->is_ancestor = tsk_calloc(num_nodes, sizeof(bool));\n    self->max_segment_queue_size = 64;\n    self->segment_queue\n        = tsk_malloc(self->max_segment_queue_size * sizeof(tsk_segment_t));\n    if (self->ancestor_map_head == NULL || self->ancestor_map_tail == NULL\n        || self->child_edge_map_head == NULL || self->child_edge_map_tail == NULL\n        || self->is_sample == NULL || self->is_ancestor == NULL\n        || self->segment_queue == NULL || self->buffered_children == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    // Clear memory.\n    ret = ancestor_mapper_init_samples(self, samples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = ancestor_mapper_init_ancestors(self, ancestors);\n    if (ret != 0) {\n        goto out;\n    }\n    ancestor_mapper_find_oldest_node(self);\n    ret = tsk_edge_table_clear(self->result);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nancestor_mapper_free(ancestor_mapper_t *self)\n{\n    tsk_blkalloc_free(&self->segment_heap);\n    tsk_blkalloc_free(&self->interval_list_heap);\n    segment_overlapper_free(&self->segment_overlapper);\n    tsk_safe_free(self->ancestor_map_head);\n    tsk_safe_free(self->ancestor_map_tail);\n    tsk_safe_free(self->child_edge_map_head);\n    tsk_safe_free(self->child_edge_map_tail);\n    tsk_safe_free(self->segment_queue);\n    tsk_safe_free(self->is_sample);\n    tsk_safe_free(self->is_ancestor);\n    tsk_safe_free(self->buffered_children);\n    return 0;\n}\n\nstatic int TSK_WARN_UNUSED\nancestor_mapper_enqueue_segment(\n    ancestor_mapper_t *self, double left, double right, tsk_id_t node)\n{\n    int ret = 0;\n    tsk_segment_t *seg;\n    void *p;\n\n    tsk_bug_assert(left < right);\n    /* Make sure we always have room for one more segment in the queue so we\n     * can put a tail sentinel on it */\n    if (self->segment_queue_size == self->max_segment_queue_size - 1) {\n        self->max_segment_queue_size *= 2;\n        p = tsk_realloc(self->segment_queue,\n            self->max_segment_queue_size * sizeof(*self->segment_queue));\n        if (p == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->segment_queue = p;\n    }\n    seg = self->segment_queue + self->segment_queue_size;\n    seg->left = left;\n    seg->right = right;\n    seg->node = node;\n    self->segment_queue_size++;\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nancestor_mapper_merge_ancestors(ancestor_mapper_t *self, tsk_id_t input_id)\n{\n    int ret = 0;\n    tsk_segment_t **X, *x;\n    tsk_size_t j, num_overlapping, num_flushed_edges;\n    double left, right, prev_right;\n    bool is_sample = self->is_sample[input_id];\n    bool is_ancestor = self->is_ancestor[input_id];\n\n    if (is_sample) {\n        /* Free up the existing ancestry mapping. */\n        x = self->ancestor_map_tail[input_id];\n        tsk_bug_assert(x->left == 0 && x->right == self->sequence_length);\n        self->ancestor_map_head[input_id] = NULL;\n        self->ancestor_map_tail[input_id] = NULL;\n    }\n    ret = segment_overlapper_start(\n        &self->segment_overlapper, self->segment_queue, self->segment_queue_size);\n    if (ret != 0) {\n        goto out;\n    }\n\n    prev_right = 0;\n    while ((ret = segment_overlapper_next(\n                &self->segment_overlapper, &left, &right, &X, &num_overlapping))\n           == 1) {\n        tsk_bug_assert(left < right);\n        tsk_bug_assert(num_overlapping > 0);\n        if (is_ancestor || is_sample) {\n            for (j = 0; j < num_overlapping; j++) {\n                ret = ancestor_mapper_record_edge(self, left, right, X[j]->node);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n            ret = ancestor_mapper_add_ancestry(self, input_id, left, right, input_id);\n            if (ret != 0) {\n                goto out;\n            }\n            if (is_sample && left != prev_right) {\n                /* Fill in any gaps in ancestry for the sample */\n                ret = ancestor_mapper_add_ancestry(\n                    self, input_id, prev_right, left, input_id);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        } else {\n            for (j = 0; j < num_overlapping; j++) {\n                ret = ancestor_mapper_add_ancestry(\n                    self, input_id, left, right, X[j]->node);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        }\n        prev_right = right;\n    }\n    if (is_sample && prev_right != self->tables->sequence_length) {\n        /* If a trailing gap exists in the sample ancestry, fill it in. */\n        ret = ancestor_mapper_add_ancestry(\n            self, input_id, prev_right, self->sequence_length, input_id);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (input_id != TSK_NULL) {\n        ret = ancestor_mapper_flush_edges(self, input_id, &num_flushed_edges);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nancestor_mapper_process_parent_edges(\n    ancestor_mapper_t *self, tsk_id_t parent, tsk_size_t start, tsk_size_t end)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_segment_t *x;\n    const tsk_edge_table_t *input_edges = &self->tables->edges;\n    tsk_id_t child;\n    double left, right;\n\n    /* Go through the edges and queue up ancestry segments for processing. */\n    self->segment_queue_size = 0;\n    for (j = start; j < end; j++) {\n        tsk_bug_assert(parent == input_edges->parent[j]);\n        child = input_edges->child[j];\n        left = input_edges->left[j];\n        right = input_edges->right[j];\n        // printf(\"C: %i, L: %f, R: %f\\n\", child, left, right);\n        for (x = self->ancestor_map_head[child]; x != NULL; x = x->next) {\n            if (x->right > left && right > x->left) {\n                ret = ancestor_mapper_enqueue_segment(\n                    self, TSK_MAX(x->left, left), TSK_MIN(x->right, right), x->node);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        }\n    }\n    // We can now merge the ancestral segments for the parent\n    ret = ancestor_mapper_merge_ancestors(self, parent);\n    if (ret != 0) {\n        goto out;\n    }\n\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nancestor_mapper_run(ancestor_mapper_t *self)\n{\n    int ret = 0;\n    tsk_size_t j, start;\n    tsk_id_t parent, current_parent;\n    const tsk_edge_table_t *input_edges = &self->tables->edges;\n    tsk_size_t num_edges = input_edges->num_rows;\n    const double *node_time = self->tables->nodes.time;\n    bool early_exit = false;\n\n    if (num_edges > 0) {\n        start = 0;\n        current_parent = input_edges->parent[0];\n        for (j = 0; j < num_edges; j++) {\n            parent = input_edges->parent[j];\n            if (parent != current_parent) {\n                ret = ancestor_mapper_process_parent_edges(\n                    self, current_parent, start, j);\n                if (ret != 0) {\n                    goto out;\n                }\n                start = j;\n                current_parent = parent;\n                if (node_time[current_parent] > self->oldest_node_time) {\n                    early_exit = true;\n                    break;\n                }\n            }\n        }\n        if (!early_exit) {\n            /* If we didn't break out of the loop early, we need to still process\n             * the final parent */\n            ret = ancestor_mapper_process_parent_edges(self, current_parent, start, j);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\nout:\n    return ret;\n}\n\n/*************************\n * IBD Segments\n *************************/\n\n/* This maps two positive integers 0 <= a < b < N into the set\n * {0, ..., N^2}. For us to overflow an int64, N would need to\n * be > sqrt(2^63), ~3 * 10^9. The maximum value for a 32bit int\n * is ~2 * 10^9, so this can't happen here, however it is\n * theoretically possible with 64 bit IDs. It would require\n * a *very* large node table --- assuming 24 bytes per row\n * it would be at least 67GiB. To make sure this eventuality\n * doesn't happen, we have a tsk_bug_assert in the\n * tsk_identity_segments_init.\n */\nstatic inline int64_t\npair_to_integer(tsk_id_t a, tsk_id_t b, tsk_size_t N)\n{\n    tsk_id_t tmp;\n    if (a > b) {\n        tmp = a;\n        a = b;\n        b = tmp;\n    }\n    return ((int64_t) a) * (int64_t) N + (int64_t) b;\n}\n\nstatic inline void\ninteger_to_pair(int64_t index, tsk_size_t N, tsk_id_t *a, tsk_id_t *b)\n{\n    *a = (tsk_id_t) (index / (int64_t) N);\n    *b = (tsk_id_t) (index % (int64_t) N);\n}\n\nstatic int64_t\ntsk_identity_segments_get_key(\n    const tsk_identity_segments_t *self, tsk_id_t a, tsk_id_t b)\n{\n    int64_t ret;\n    tsk_id_t N = (tsk_id_t) self->num_nodes;\n\n    if (a < 0 || b < 0 || a >= N || b >= N) {\n        ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n        goto out;\n    }\n    if (a == b) {\n        ret = tsk_trace_error(TSK_ERR_SAME_NODES_IN_PAIR);\n        goto out;\n    }\n    ret = pair_to_integer(a, b, self->num_nodes);\nout:\n    return ret;\n}\n\nstatic tsk_identity_segment_t *TSK_WARN_UNUSED\ntsk_identity_segments_alloc_segment(\n    tsk_identity_segments_t *self, double left, double right, tsk_id_t node)\n{\n    tsk_identity_segment_t *seg = tsk_blkalloc_get(&self->heap, sizeof(*seg));\n    if (seg == NULL) {\n        goto out;\n    }\n    tsk_bug_assert(left < right);\n    tsk_bug_assert(node >= 0 && node < (tsk_id_t) self->num_nodes);\n\n    seg->next = NULL;\n    seg->left = left;\n    seg->right = right;\n    seg->node = node;\nout:\n    return seg;\n}\n\nstatic tsk_avl_node_int_t *\ntsk_identity_segments_alloc_new_pair(tsk_identity_segments_t *self, int64_t key)\n{\n    tsk_avl_node_int_t *avl_node = tsk_blkalloc_get(&self->heap, sizeof(*avl_node));\n    tsk_identity_segment_list_t *list = tsk_blkalloc_get(&self->heap, sizeof(*list));\n\n    if (avl_node == NULL || list == NULL) {\n        return NULL;\n    }\n    avl_node->key = key;\n    avl_node->value = list;\n    memset(list, 0, sizeof(*list));\n    return avl_node;\n}\n\n/* Deliberately not making this a part of the public interface for now,\n * so we don't have to worry about the signature */\nstatic int\ntsk_identity_segments_init(\n    tsk_identity_segments_t *self, tsk_size_t num_nodes, tsk_flags_t options)\n{\n    int ret = 0;\n    /* Make sure we don't overflow in the ID mapping. See the comments in pair_to_integer\n     * for details. */\n    double max_num_nodes = sqrt(1ULL << 63);\n    tsk_bug_assert((double) num_nodes < max_num_nodes);\n\n    memset(self, 0, sizeof(*self));\n    self->num_nodes = num_nodes;\n    /* Storing segments implies storing pairs */\n    if (options & TSK_IBD_STORE_SEGMENTS) {\n        self->store_pairs = true;\n        self->store_segments = true;\n    } else if (options & TSK_IBD_STORE_PAIRS) {\n        self->store_pairs = true;\n    }\n    ret = tsk_avl_tree_int_init(&self->pair_map);\n    if (ret != 0) {\n        goto out;\n    }\n    /* Allocate heap memory in 1MiB blocks */\n    ret = tsk_blkalloc_init(&self->heap, 1024 * 1024);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nvoid\ntsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out)\n{\n    tsk_avl_node_int_t **nodes = tsk_malloc(self->pair_map.size * sizeof(*nodes));\n    int64_t key;\n    tsk_identity_segment_list_t *value;\n    tsk_identity_segment_t *seg;\n    tsk_size_t j;\n    tsk_id_t a, b;\n\n    tsk_bug_assert(nodes != NULL);\n\n    fprintf(out, \"===\\nIBD Result\\n===\\n\");\n    fprintf(out, \"total_span     = %f\\n\", self->total_span);\n    fprintf(out, \"num_segments   = %lld\\n\", (unsigned long long) self->num_segments);\n    fprintf(out, \"store_pairs    = %d\\n\", self->store_pairs);\n    fprintf(out, \"store_segments = %d\\n\", self->store_segments);\n    if (self->store_pairs) {\n        fprintf(out, \"num_keys       = %d\\n\", (int) self->pair_map.size);\n        tsk_avl_tree_int_ordered_nodes(&self->pair_map, nodes);\n        for (j = 0; j < self->pair_map.size; j++) {\n            key = nodes[j]->key;\n            value = (tsk_identity_segment_list_t *) nodes[j]->value;\n            integer_to_pair(key, self->num_nodes, &a, &b);\n            fprintf(out, \"%lld\\t(%d,%d) n=%d total_span=%f\\t\", (long long) key, (int) a,\n                (int) b, (int) value->num_segments, value->total_span);\n            if (self->store_segments) {\n                for (seg = value->head; seg != NULL; seg = seg->next) {\n                    fprintf(\n                        out, \"(%f, %f)->%d, \", seg->left, seg->right, (int) seg->node);\n                }\n            }\n            fprintf(out, \"\\n\");\n        }\n    }\n    fprintf(out, \"Segment memory\\n\");\n    tsk_blkalloc_print_state(&self->heap, out);\n    tsk_safe_free(nodes);\n}\n\ntsk_size_t\ntsk_identity_segments_get_num_segments(const tsk_identity_segments_t *self)\n{\n    return self->num_segments;\n}\n\ndouble\ntsk_identity_segments_get_total_span(const tsk_identity_segments_t *self)\n{\n    return self->total_span;\n}\n\ntsk_size_t\ntsk_identity_segments_get_num_pairs(const tsk_identity_segments_t *self)\n{\n    return self->pair_map.size;\n}\n\n/* Use an inorder traversal on the AVL tree to get the pairs in order.\n * Recursion is safe here because it's a balanced tree (see the AVL tree\n * code for notes on this).\n */\nstatic int\nget_keys_traverse(tsk_avl_node_int_t *node, int index, tsk_size_t N, tsk_id_t *pairs)\n{\n    tsk_id_t a, b;\n\n    if (node == NULL) {\n        return index;\n    }\n    index = get_keys_traverse(node->llink, index, N, pairs);\n    integer_to_pair(node->key, N, &a, &b);\n    pairs[2 * index] = a;\n    pairs[2 * index + 1] = b;\n    return get_keys_traverse(node->rlink, index + 1, N, pairs);\n}\n\nint\ntsk_identity_segments_get_keys(const tsk_identity_segments_t *self, tsk_id_t *pairs)\n{\n    if (!self->store_pairs) {\n        return TSK_ERR_IBD_PAIRS_NOT_STORED;\n    }\n    get_keys_traverse(\n        tsk_avl_tree_int_get_root(&self->pair_map), 0, self->num_nodes, pairs);\n    return 0;\n}\n\nstatic int\nget_items_traverse(tsk_avl_node_int_t *node, int index, tsk_size_t N, tsk_id_t *pairs,\n    tsk_identity_segment_list_t **lists)\n{\n    tsk_id_t a, b;\n\n    if (node == NULL) {\n        return index;\n    }\n    index = get_items_traverse(node->llink, index, N, pairs, lists);\n    integer_to_pair(node->key, N, &a, &b);\n    pairs[2 * index] = a;\n    pairs[2 * index + 1] = b;\n    lists[index] = node->value;\n    return get_items_traverse(node->rlink, index + 1, N, pairs, lists);\n}\n\nint\ntsk_identity_segments_get_items(const tsk_identity_segments_t *self, tsk_id_t *pairs,\n    tsk_identity_segment_list_t **lists)\n{\n    if (!self->store_pairs) {\n        return TSK_ERR_IBD_PAIRS_NOT_STORED;\n    }\n    get_items_traverse(\n        tsk_avl_tree_int_get_root(&self->pair_map), 0, self->num_nodes, pairs, lists);\n    return 0;\n}\n\nint\ntsk_identity_segments_free(tsk_identity_segments_t *self)\n{\n    tsk_blkalloc_free(&self->heap);\n    tsk_avl_tree_int_free(&self->pair_map);\n    return 0;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_identity_segments_update_pair(tsk_identity_segments_t *self, tsk_id_t a, tsk_id_t b,\n    double left, double right, tsk_id_t node)\n{\n    int ret = 0;\n    tsk_identity_segment_t *x;\n    tsk_identity_segment_list_t *list;\n    /* skip the error checking here since this an internal API */\n    int64_t key = pair_to_integer(a, b, self->num_nodes);\n    tsk_avl_node_int_t *avl_node = tsk_avl_tree_int_search(&self->pair_map, key);\n\n    if (avl_node == NULL) {\n        /* We haven't seen this pair before */\n        avl_node = tsk_identity_segments_alloc_new_pair(self, key);\n        if (avl_node == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        ret = tsk_avl_tree_int_insert(&self->pair_map, avl_node);\n        tsk_bug_assert(ret == 0);\n    }\n    list = (tsk_identity_segment_list_t *) avl_node->value;\n    list->num_segments++;\n    list->total_span += right - left;\n    if (self->store_segments) {\n        x = tsk_identity_segments_alloc_segment(self, left, right, node);\n        if (x == NULL) {\n            goto out;\n        }\n        if (list->tail == NULL) {\n            list->head = x;\n            list->tail = x;\n        } else {\n            list->tail->next = x;\n            list->tail = x;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_identity_segments_add_segment(tsk_identity_segments_t *self, tsk_id_t a, tsk_id_t b,\n    double left, double right, tsk_id_t node)\n{\n    int ret = 0;\n\n    if (self->store_pairs) {\n        ret = tsk_identity_segments_update_pair(self, a, b, left, right, node);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    self->total_span += right - left;\n    self->num_segments++;\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t sample_a,\n    tsk_id_t sample_b, tsk_identity_segment_list_t **ret_list)\n{\n    int ret = 0;\n    int64_t key = tsk_identity_segments_get_key(self, sample_a, sample_b);\n    tsk_avl_node_int_t *avl_node;\n\n    if (key < 0) {\n        ret = (int) key;\n        goto out;\n    }\n    if (!self->store_pairs) {\n        ret = tsk_trace_error(TSK_ERR_IBD_PAIRS_NOT_STORED);\n        goto out;\n    }\n    avl_node = tsk_avl_tree_int_search(&self->pair_map, key);\n    *ret_list = NULL;\n    if (avl_node != NULL) {\n        *ret_list = (tsk_identity_segment_list_t *) avl_node->value;\n    }\nout:\n    return ret;\n}\n\n/*************************\n * IBD finder\n *************************/\n\ntypedef struct {\n    tsk_identity_segments_t *result;\n    double min_span;\n    double max_time;\n    const tsk_table_collection_t *tables;\n    /* Maps nodes to their sample set IDs. Input samples map to set 0\n     * in the \"within\" case. */\n    tsk_id_t *sample_set_id;\n    /* True if we're finding IBD between sample sets, false otherwise. */\n    bool finding_between;\n    tsk_segment_t **ancestor_map_head;\n    tsk_segment_t **ancestor_map_tail;\n    tsk_segment_t *segment_queue;\n    tsk_size_t segment_queue_size;\n    tsk_size_t max_segment_queue_size;\n    tsk_blkalloc_t segment_heap;\n} tsk_ibd_finder_t;\n\nstatic tsk_segment_t *TSK_WARN_UNUSED\ntsk_ibd_finder_alloc_segment(\n    tsk_ibd_finder_t *self, double left, double right, tsk_id_t node)\n{\n    tsk_segment_t *seg = NULL;\n\n    seg = tsk_blkalloc_get(&self->segment_heap, sizeof(*seg));\n    if (seg == NULL) {\n        goto out;\n    }\n    seg->next = NULL;\n    seg->left = left;\n    seg->right = right;\n    seg->node = node;\n\nout:\n    return seg;\n}\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_add_ancestry(tsk_ibd_finder_t *self, tsk_id_t input_id, double left,\n    double right, tsk_id_t output_id)\n{\n    int ret = 0;\n    tsk_segment_t *tail = self->ancestor_map_tail[input_id];\n    tsk_segment_t *x = NULL;\n\n    tsk_bug_assert(left < right);\n    x = tsk_ibd_finder_alloc_segment(self, left, right, output_id);\n    if (x == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (tail == NULL) {\n        self->ancestor_map_head[input_id] = x;\n        self->ancestor_map_tail[input_id] = x;\n    } else {\n        tail->next = x;\n        self->ancestor_map_tail[input_id] = x;\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ibd_finder_init_samples_from_set(\n    tsk_ibd_finder_t *self, const tsk_id_t *samples, tsk_size_t num_samples)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t u;\n\n    for (j = 0; j < num_samples; j++) {\n        u = samples[j];\n\n        if (u < 0 || u > (tsk_id_t) self->tables->nodes.num_rows) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (self->sample_set_id[u] != TSK_NULL) {\n            ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n            goto out;\n        }\n        self->sample_set_id[u] = 0;\n    }\nout:\n    return ret;\n}\n\nstatic void\ntsk_ibd_finder_init_samples_from_nodes(tsk_ibd_finder_t *self)\n{\n    tsk_id_t u;\n    const tsk_id_t num_nodes = (tsk_id_t) self->tables->nodes.num_rows;\n    const tsk_flags_t *restrict flags = self->tables->nodes.flags;\n\n    for (u = 0; u < num_nodes; u++) {\n        if (flags[u] & TSK_NODE_IS_SAMPLE) {\n            self->sample_set_id[u] = 0;\n        }\n    }\n}\n\nstatic int\ntsk_ibd_finder_add_sample_ancestry(tsk_ibd_finder_t *self)\n{\n\n    int ret = 0;\n    tsk_id_t u;\n    const tsk_id_t num_nodes = (tsk_id_t) self->tables->nodes.num_rows;\n    const double L = self->tables->sequence_length;\n\n    for (u = 0; u < num_nodes; u++) {\n        if (self->sample_set_id[u] != TSK_NULL) {\n            ret = tsk_ibd_finder_add_ancestry(self, u, 0, L, u);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_init(tsk_ibd_finder_t *self, const tsk_table_collection_t *tables,\n    tsk_identity_segments_t *result, double min_span, double max_time)\n{\n    int ret = 0;\n    tsk_size_t num_nodes;\n\n    tsk_memset(self, 0, sizeof(tsk_ibd_finder_t));\n\n    if (min_span < 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (max_time < 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    self->tables = tables;\n    self->result = result;\n    self->max_time = max_time;\n    self->min_span = min_span;\n\n    ret = tsk_blkalloc_init(&self->segment_heap, 8192);\n    if (ret != 0) {\n        goto out;\n    }\n\n    num_nodes = tables->nodes.num_rows;\n    self->ancestor_map_head = tsk_calloc(num_nodes, sizeof(*self->ancestor_map_head));\n    self->ancestor_map_tail = tsk_calloc(num_nodes, sizeof(*self->ancestor_map_tail));\n    self->sample_set_id = tsk_malloc(num_nodes * sizeof(*self->sample_set_id));\n    self->segment_queue_size = 0;\n    self->max_segment_queue_size = 64;\n    self->segment_queue\n        = tsk_malloc(self->max_segment_queue_size * sizeof(*self->segment_queue));\n    if (self->ancestor_map_head == NULL || self->ancestor_map_tail == NULL\n        || self->sample_set_id == NULL || self->segment_queue == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(self->sample_set_id, TSK_NULL, num_nodes * sizeof(*self->sample_set_id));\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_enqueue_segment(\n    tsk_ibd_finder_t *self, double left, double right, tsk_id_t node)\n{\n    int ret = 0;\n    tsk_segment_t *seg;\n    void *p;\n\n    if ((right - left) > self->min_span) {\n        /* Make sure we always have room for one more segment in the queue so we\n         * can put a tail sentinel on it */\n        if (self->segment_queue_size == self->max_segment_queue_size - 1) {\n            self->max_segment_queue_size *= 2;\n            p = tsk_realloc(self->segment_queue,\n                self->max_segment_queue_size * sizeof(*self->segment_queue));\n            if (p == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            self->segment_queue = p;\n        }\n        seg = self->segment_queue + self->segment_queue_size;\n        seg->left = left;\n        seg->right = right;\n        seg->node = node;\n        self->segment_queue_size++;\n    }\nout:\n    return ret;\n}\n\nstatic bool\ntsk_ibd_finder_passes_filters(\n    const tsk_ibd_finder_t *self, tsk_id_t a, tsk_id_t b, double left, double right)\n{\n    if (a == b) {\n        return false;\n    }\n    if ((right - left) <= self->min_span) {\n        return false;\n    }\n    if (self->finding_between) {\n        return self->sample_set_id[a] != self->sample_set_id[b];\n    } else {\n        return true;\n    }\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_record_ibd(tsk_ibd_finder_t *self, tsk_id_t parent)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_segment_t *seg0, *seg1;\n    double left, right;\n\n    for (seg0 = self->ancestor_map_head[parent]; seg0 != NULL; seg0 = seg0->next) {\n        for (j = 0; j < self->segment_queue_size; j++) {\n            seg1 = &self->segment_queue[j];\n            left = TSK_MAX(seg0->left, seg1->left);\n            right = TSK_MIN(seg0->right, seg1->right);\n            if (tsk_ibd_finder_passes_filters(\n                    self, seg0->node, seg1->node, left, right)) {\n                ret = tsk_identity_segments_add_segment(\n                    self->result, seg0->node, seg1->node, left, right, parent);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_add_queued_ancestry(tsk_ibd_finder_t *self, tsk_id_t parent)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_segment_t seg;\n\n    for (j = 0; j < self->segment_queue_size; j++) {\n        seg = self->segment_queue[j];\n        ret = tsk_ibd_finder_add_ancestry(self, parent, seg.left, seg.right, seg.node);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    self->segment_queue_size = 0;\nout:\n    return ret;\n}\n\nstatic void\ntsk_ibd_finder_print_state(tsk_ibd_finder_t *self, FILE *out)\n{\n    tsk_size_t j;\n    tsk_segment_t *u = NULL;\n\n    fprintf(out, \"--ibd-finder stats--\\n\");\n    fprintf(out, \"max_time = %f\\n\", self->max_time);\n    fprintf(out, \"min_span = %f\\n\", self->min_span);\n    fprintf(out, \"finding_between = %d\\n\", self->finding_between);\n    fprintf(out, \"===\\nEdges\\n===\\n\");\n    for (j = 0; j < self->tables->edges.num_rows; j++) {\n        fprintf(out, \"L:%f, R:%f, P:%lld, C:%lld\\n\", self->tables->edges.left[j],\n            self->tables->edges.right[j], (long long) self->tables->edges.parent[j],\n            (long long) self->tables->edges.child[j]);\n    }\n    fprintf(out, \"===\\nNodes\\n===\\n\");\n    for (j = 0; j < self->tables->nodes.num_rows; j++) {\n        fprintf(out, \"ID:%d, Time:%f, Flag:%lld Sample set:%d\\n\", (int) j,\n            self->tables->nodes.time[j], (long long) self->tables->nodes.flags[j],\n            (int) self->sample_set_id[j]);\n    }\n    fprintf(out, \"===\\nAncestral map\\n===\\n\");\n    for (j = 0; j < self->tables->nodes.num_rows; j++) {\n        fprintf(out, \"Node %lld: \", (long long) j);\n        for (u = self->ancestor_map_head[j]; u != NULL; u = u->next) {\n            fprintf(out, \"(%f,%f->%lld)\", u->left, u->right, (long long) u->node);\n        }\n        fprintf(out, \"\\n\");\n    }\n    tsk_identity_segments_print_state(self->result, out);\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_init_within(\n    tsk_ibd_finder_t *self, const tsk_id_t *samples, tsk_size_t num_samples)\n{\n    int ret;\n\n    if (samples == NULL) {\n        tsk_ibd_finder_init_samples_from_nodes(self);\n    } else {\n        ret = tsk_ibd_finder_init_samples_from_set(self, samples, num_samples);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    self->finding_between = false;\n    ret = tsk_ibd_finder_add_sample_ancestry(self);\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_init_between(tsk_ibd_finder_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets)\n{\n    int ret = 0;\n    tsk_size_t j, k, index;\n    tsk_id_t u;\n\n    index = 0;\n    for (j = 0; j < num_sample_sets; j++) {\n        for (k = 0; k < sample_set_sizes[j]; k++) {\n            u = sample_sets[index];\n            if (u < 0 || u > (tsk_id_t) self->tables->nodes.num_rows) {\n                ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n                goto out;\n            }\n            if (self->sample_set_id[u] != TSK_NULL) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            self->sample_set_id[u] = (tsk_id_t) j;\n            index++;\n        }\n    }\n    self->finding_between = true;\n    ret = tsk_ibd_finder_add_sample_ancestry(self);\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_ibd_finder_run(tsk_ibd_finder_t *self)\n{\n    const tsk_edge_table_t *input_edges = &self->tables->edges;\n    const tsk_size_t num_edges = input_edges->num_rows;\n    int ret = 0;\n    tsk_size_t j;\n    tsk_segment_t *s;\n    tsk_id_t parent, child;\n    double left, right, intvl_l, intvl_r, time;\n\n    for (j = 0; j < num_edges; j++) {\n        parent = input_edges->parent[j];\n        left = input_edges->left[j];\n        right = input_edges->right[j];\n        child = input_edges->child[j];\n        time = self->tables->nodes.time[parent];\n        if (time > self->max_time) {\n            break;\n        }\n\n        for (s = self->ancestor_map_head[child]; s != NULL; s = s->next) {\n            intvl_l = TSK_MAX(left, s->left);\n            intvl_r = TSK_MIN(right, s->right);\n            ret = tsk_ibd_finder_enqueue_segment(self, intvl_l, intvl_r, s->node);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n        ret = tsk_ibd_finder_record_ibd(self, parent);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_ibd_finder_add_queued_ancestry(self, parent);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_ibd_finder_free(tsk_ibd_finder_t *self)\n{\n    tsk_blkalloc_free(&self->segment_heap);\n    tsk_safe_free(self->sample_set_id);\n    tsk_safe_free(self->ancestor_map_head);\n    tsk_safe_free(self->ancestor_map_tail);\n    tsk_safe_free(self->segment_queue);\n    return 0;\n}\n\n/*************************\n * simplifier\n *************************/\n\nstatic void\nsimplifier_check_state(simplifier_t *self)\n{\n    tsk_size_t j, k;\n    tsk_segment_t *u;\n    mutation_id_list_t *list_node;\n    tsk_id_t site;\n    interval_list_t *int_list;\n    tsk_id_t child;\n    double position, last_position;\n    bool found;\n    tsk_size_t num_intervals;\n\n    for (j = 0; j < self->input_tables.nodes.num_rows; j++) {\n        tsk_bug_assert((self->ancestor_map_head[j] == NULL)\n                       == (self->ancestor_map_tail[j] == NULL));\n        for (u = self->ancestor_map_head[j]; u != NULL; u = u->next) {\n            tsk_bug_assert(u->left < u->right);\n            if (u->next != NULL) {\n                tsk_bug_assert(u->right <= u->next->left);\n                if (u->right == u->next->left) {\n                    tsk_bug_assert(u->node != u->next->node);\n                }\n            } else {\n                tsk_bug_assert(u == self->ancestor_map_tail[j]);\n            }\n        }\n    }\n\n    for (j = 0; j < self->segment_queue_size; j++) {\n        tsk_bug_assert(self->segment_queue[j].left < self->segment_queue[j].right);\n    }\n\n    for (j = 0; j < self->input_tables.nodes.num_rows; j++) {\n        last_position = -1;\n        for (list_node = self->node_mutation_list_map_head[j]; list_node != NULL;\n            list_node = list_node->next) {\n            tsk_bug_assert(\n                self->input_tables.mutations.node[list_node->mutation] == (tsk_id_t) j);\n            site = self->input_tables.mutations.site[list_node->mutation];\n            position = self->input_tables.sites.position[site];\n            tsk_bug_assert(last_position <= position);\n            last_position = position;\n        }\n    }\n\n    /* check the buffered edges */\n    for (j = 0; j < self->input_tables.nodes.num_rows; j++) {\n        tsk_bug_assert((self->child_edge_map_head[j] == NULL)\n                       == (self->child_edge_map_tail[j] == NULL));\n        if (self->child_edge_map_head[j] != NULL) {\n            /* Make sure that the child is in our list */\n            found = false;\n            for (k = 0; k < self->num_buffered_children; k++) {\n                if (self->buffered_children[k] == (tsk_id_t) j) {\n                    found = true;\n                    break;\n                }\n            }\n            tsk_bug_assert(found);\n        }\n    }\n    num_intervals = 0;\n    for (j = 0; j < self->num_buffered_children; j++) {\n        child = self->buffered_children[j];\n        tsk_bug_assert(self->child_edge_map_head[child] != NULL);\n        for (int_list = self->child_edge_map_head[child]; int_list != NULL;\n            int_list = int_list->next) {\n            tsk_bug_assert(int_list->left < int_list->right);\n            if (int_list->next != NULL) {\n                tsk_bug_assert(int_list->right < int_list->next->left);\n            }\n            num_intervals++;\n        }\n    }\n    tsk_bug_assert(\n        num_intervals\n        == self->interval_list_heap.total_allocated / (sizeof(interval_list_t)));\n}\n\nstatic void\nprint_segment_chain(tsk_segment_t *head, FILE *out)\n{\n    tsk_segment_t *u;\n\n    for (u = head; u != NULL; u = u->next) {\n        fprintf(out, \"(%f,%f->%lld)\", u->left, u->right, (long long) u->node);\n    }\n}\n\nstatic void\nsimplifier_print_state(simplifier_t *self, FILE *out)\n{\n    tsk_size_t j;\n    tsk_segment_t *u;\n    mutation_id_list_t *list_node;\n    interval_list_t *int_list;\n    tsk_id_t child;\n\n    fprintf(out, \"--simplifier state--\\n\");\n    fprintf(out, \"options:\\n\");\n    fprintf(out, \"\\tfilter_unreferenced_sites   : %d\\n\",\n        !!(self->options & TSK_SIMPLIFY_FILTER_SITES));\n    fprintf(out, \"\\tno_filter_nodes   : %d\\n\",\n        !!(self->options & TSK_SIMPLIFY_NO_FILTER_NODES));\n    fprintf(out, \"\\treduce_to_site_topology : %d\\n\",\n        !!(self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY));\n    fprintf(out, \"\\tkeep_unary              : %d\\n\",\n        !!(self->options & TSK_SIMPLIFY_KEEP_UNARY));\n    fprintf(out, \"\\tkeep_input_roots        : %d\\n\",\n        !!(self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS));\n    fprintf(out, \"\\tkeep_unary_in_individuals : %d\\n\",\n        !!(self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS));\n\n    fprintf(out, \"===\\nInput tables\\n==\\n\");\n    tsk_table_collection_print_state(&self->input_tables, out);\n    fprintf(out, \"===\\nOutput tables\\n==\\n\");\n    tsk_table_collection_print_state(self->tables, out);\n    fprintf(out, \"===\\nmemory heaps\\n==\\n\");\n    fprintf(out, \"segment_heap:\\n\");\n    tsk_blkalloc_print_state(&self->segment_heap, out);\n    fprintf(out, \"interval_list_heap:\\n\");\n    tsk_blkalloc_print_state(&self->interval_list_heap, out);\n    fprintf(out, \"===\\nancestors\\n==\\n\");\n    for (j = 0; j < self->input_tables.nodes.num_rows; j++) {\n        fprintf(out, \"%lld:\\t\", (long long) j);\n        print_segment_chain(self->ancestor_map_head[j], out);\n        fprintf(out, \"\\n\");\n    }\n    fprintf(out, \"===\\nnode_id map (input->output)\\n==\\n\");\n    for (j = 0; j < self->input_tables.nodes.num_rows; j++) {\n        if (self->node_id_map[j] != TSK_NULL) {\n            fprintf(\n                out, \"%lld->%lld\\n\", (long long) j, (long long) self->node_id_map[j]);\n        }\n    }\n    fprintf(out, \"===\\nsegment queue\\n==\\n\");\n    for (j = 0; j < self->segment_queue_size; j++) {\n        u = &self->segment_queue[j];\n        fprintf(out, \"(%f,%f->%lld)\", u->left, u->right, (long long) u->node);\n        fprintf(out, \"\\n\");\n    }\n    fprintf(out, \"===\\nbuffered children\\n==\\n\");\n    for (j = 0; j < self->num_buffered_children; j++) {\n        child = self->buffered_children[j];\n        fprintf(out, \"%lld -> \", (long long) j);\n        for (int_list = self->child_edge_map_head[child]; int_list != NULL;\n            int_list = int_list->next) {\n            fprintf(out, \"(%f, %f), \", int_list->left, int_list->right);\n        }\n        fprintf(out, \"\\n\");\n    }\n    fprintf(out, \"===\\nmutation node map\\n==\\n\");\n    for (j = 0; j < self->input_tables.mutations.num_rows; j++) {\n        fprintf(out, \"%lld\\t-> %lld\\n\", (long long) j,\n            (long long) self->mutation_node_map[j]);\n    }\n    fprintf(out, \"===\\nnode mutation id list map\\n==\\n\");\n    for (j = 0; j < self->input_tables.nodes.num_rows; j++) {\n        if (self->node_mutation_list_map_head[j] != NULL) {\n            fprintf(out, \"%lld\\t-> [\", (long long) j);\n            for (list_node = self->node_mutation_list_map_head[j]; list_node != NULL;\n                list_node = list_node->next) {\n                fprintf(out, \"%lld,\", (long long) list_node->mutation);\n            }\n            fprintf(out, \"]\\n\");\n        }\n    }\n    if (!!(self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY)) {\n        fprintf(out, \"===\\nposition_lookup\\n==\\n\");\n        for (j = 0; j < self->input_tables.sites.num_rows + 2; j++) {\n            fprintf(out, \"%lld\\t-> %f\\n\", (long long) j, self->position_lookup[j]);\n        }\n    }\n    simplifier_check_state(self);\n}\n\nstatic tsk_segment_t *TSK_WARN_UNUSED\nsimplifier_alloc_segment(simplifier_t *self, double left, double right, tsk_id_t node)\n{\n    tsk_segment_t *seg = NULL;\n\n    seg = tsk_blkalloc_get(&self->segment_heap, sizeof(*seg));\n    if (seg == NULL) {\n        goto out;\n    }\n    seg->next = NULL;\n    seg->left = left;\n    seg->right = right;\n    seg->node = node;\nout:\n    return seg;\n}\n\nstatic interval_list_t *TSK_WARN_UNUSED\nsimplifier_alloc_interval_list(simplifier_t *self, double left, double right)\n{\n    interval_list_t *x = NULL;\n\n    x = tsk_blkalloc_get(&self->interval_list_heap, sizeof(*x));\n    if (x == NULL) {\n        goto out;\n    }\n    x->next = NULL;\n    x->left = left;\n    x->right = right;\nout:\n    return x;\n}\n\n/* Add a new node to the output node table corresponding to the specified input id.\n * Returns the new ID. */\nstatic tsk_id_t TSK_WARN_UNUSED\nsimplifier_record_node(simplifier_t *self, tsk_id_t input_id)\n{\n    tsk_node_t node;\n    bool update_flags = !(self->options & TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS);\n\n    tsk_node_table_get_row_unsafe(&self->input_tables.nodes, (tsk_id_t) input_id, &node);\n    if (update_flags) {\n        /* Zero out the sample bit */\n        node.flags &= (tsk_flags_t) ~TSK_NODE_IS_SAMPLE;\n        if (self->is_sample[input_id]) {\n            node.flags |= TSK_NODE_IS_SAMPLE;\n        }\n    }\n    self->node_id_map[input_id] = (tsk_id_t) self->tables->nodes.num_rows;\n    return tsk_node_table_add_row(&self->tables->nodes, node.flags, node.time,\n        node.population, node.individual, node.metadata, node.metadata_length);\n}\n\n/* Remove the mapping for the last recorded node. */\nstatic int\nsimplifier_rewind_node(simplifier_t *self, tsk_id_t input_id, tsk_id_t output_id)\n{\n    self->node_id_map[input_id] = TSK_NULL;\n    return tsk_node_table_truncate(&self->tables->nodes, (tsk_size_t) output_id);\n}\n\nstatic int\nsimplifier_flush_edges(simplifier_t *self, tsk_id_t parent, tsk_size_t *ret_num_edges)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_id_t child;\n    interval_list_t *x;\n    tsk_size_t num_edges = 0;\n\n    qsort(self->buffered_children, (size_t) self->num_buffered_children,\n        sizeof(tsk_id_t), cmp_node_id);\n    for (j = 0; j < self->num_buffered_children; j++) {\n        child = self->buffered_children[j];\n        for (x = self->child_edge_map_head[child]; x != NULL; x = x->next) {\n            ret_id = tsk_edge_table_add_row(\n                &self->tables->edges, x->left, x->right, parent, child, NULL, 0);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            num_edges++;\n        }\n        self->child_edge_map_head[child] = NULL;\n        self->child_edge_map_tail[child] = NULL;\n    }\n    self->num_buffered_children = 0;\n    *ret_num_edges = num_edges;\n    ret = tsk_blkalloc_reset(&self->interval_list_heap);\nout:\n    return ret;\n}\n\n/* When we are reducing topology down to what is visible at the sites we need a\n * lookup table to find the closest site position for each edge. We do this with\n * a sorted array and binary search */\nstatic int\nsimplifier_init_position_lookup(simplifier_t *self)\n{\n    int ret = 0;\n    tsk_size_t num_sites = self->input_tables.sites.num_rows;\n\n    self->position_lookup = tsk_malloc((num_sites + 2) * sizeof(*self->position_lookup));\n    if (self->position_lookup == NULL) {\n        goto out;\n    }\n    self->position_lookup[0] = 0;\n    self->position_lookup[num_sites + 1] = self->input_tables.sequence_length;\n    tsk_memcpy(self->position_lookup + 1, self->input_tables.sites.position,\n        num_sites * sizeof(double));\nout:\n    return ret;\n}\n/*\n * Find the smallest site position index greater than or equal to left\n * and right, i.e., slide each endpoint of an interval to the right\n * until they hit a site position. If both left and right map to the\n * the same position then we discard this edge. We also discard an\n * edge if left = 0 and right is less than the first site position.\n */\nstatic bool\nsimplifier_map_reduced_coordinates(simplifier_t *self, double *left, double *right)\n{\n    double *X = self->position_lookup;\n    tsk_size_t N = self->input_tables.sites.num_rows + 2;\n    tsk_size_t left_index, right_index;\n    bool skip = false;\n\n    left_index = tsk_search_sorted(X, N, *left);\n    right_index = tsk_search_sorted(X, N, *right);\n    if (left_index == right_index || (left_index == 0 && right_index == 1)) {\n        skip = true;\n    } else {\n        /* Remap back to zero if the left end maps to the first site. */\n        if (left_index == 1) {\n            left_index = 0;\n        }\n        *left = X[left_index];\n        *right = X[right_index];\n    }\n    return skip;\n}\n\n/* Records the specified edge for the current parent by buffering it */\nstatic int\nsimplifier_record_edge(simplifier_t *self, double left, double right, tsk_id_t child)\n{\n    int ret = 0;\n    interval_list_t *tail, *x;\n    bool skip;\n\n    if (self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY) {\n        skip = simplifier_map_reduced_coordinates(self, &left, &right);\n        /* NOTE: we exit early here when reduce_coordindates has told us to\n         * skip this edge, as it is not visible in the reduced tree sequence */\n        if (skip) {\n            goto out;\n        }\n    }\n\n    tail = self->child_edge_map_tail[child];\n    if (tail == NULL) {\n        tsk_bug_assert(self->num_buffered_children < self->input_tables.nodes.num_rows);\n        self->buffered_children[self->num_buffered_children] = child;\n        self->num_buffered_children++;\n        x = simplifier_alloc_interval_list(self, left, right);\n        if (x == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->child_edge_map_head[child] = x;\n        self->child_edge_map_tail[child] = x;\n    } else {\n        if (tail->right == left) {\n            tail->right = right;\n        } else {\n            x = simplifier_alloc_interval_list(self, left, right);\n            if (x == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            tail->next = x;\n            self->child_edge_map_tail[child] = x;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\nsimplifier_init_sites(simplifier_t *self)\n{\n    int ret = 0;\n    tsk_id_t node;\n    mutation_id_list_t *list_node;\n    tsk_size_t j;\n\n    self->mutation_node_map\n        = tsk_calloc(self->input_tables.mutations.num_rows, sizeof(tsk_id_t));\n    self->node_mutation_list_mem\n        = tsk_malloc(self->input_tables.mutations.num_rows * sizeof(mutation_id_list_t));\n    self->node_mutation_list_map_head\n        = tsk_calloc(self->input_tables.nodes.num_rows, sizeof(mutation_id_list_t *));\n    self->node_mutation_list_map_tail\n        = tsk_calloc(self->input_tables.nodes.num_rows, sizeof(mutation_id_list_t *));\n    if (self->mutation_node_map == NULL || self->node_mutation_list_mem == NULL\n        || self->node_mutation_list_map_head == NULL\n        || self->node_mutation_list_map_tail == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(self->mutation_node_map, 0xff,\n        self->input_tables.mutations.num_rows * sizeof(tsk_id_t));\n\n    for (j = 0; j < self->input_tables.mutations.num_rows; j++) {\n        node = self->input_tables.mutations.node[j];\n        list_node = self->node_mutation_list_mem + j;\n        list_node->mutation = (tsk_id_t) j;\n        list_node->next = NULL;\n        if (self->node_mutation_list_map_head[node] == NULL) {\n            self->node_mutation_list_map_head[node] = list_node;\n        } else {\n            self->node_mutation_list_map_tail[node]->next = list_node;\n        }\n        self->node_mutation_list_map_tail[node] = list_node;\n    }\nout:\n    return ret;\n}\n\nstatic void\nsimplifier_map_mutations(\n    simplifier_t *self, tsk_id_t input_id, double left, double right, tsk_id_t output_id)\n{\n    mutation_id_list_t *m_node;\n    double position;\n    tsk_id_t site;\n\n    m_node = self->node_mutation_list_map_head[input_id];\n    while (m_node != NULL) {\n        site = self->input_tables.mutations.site[m_node->mutation];\n        position = self->input_tables.sites.position[site];\n        if (left <= position && position < right) {\n            self->mutation_node_map[m_node->mutation] = output_id;\n        }\n        m_node = m_node->next;\n    }\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_add_ancestry(\n    simplifier_t *self, tsk_id_t input_id, double left, double right, tsk_id_t output_id)\n{\n    int ret = 0;\n    tsk_segment_t *tail = self->ancestor_map_tail[input_id];\n    tsk_segment_t *x;\n\n    tsk_bug_assert(left < right);\n    if (tail == NULL) {\n        x = simplifier_alloc_segment(self, left, right, output_id);\n        if (x == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->ancestor_map_head[input_id] = x;\n        self->ancestor_map_tail[input_id] = x;\n    } else {\n        if (tail->right == left && tail->node == output_id) {\n            tail->right = right;\n        } else {\n            x = simplifier_alloc_segment(self, left, right, output_id);\n            if (x == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            tail->next = x;\n            self->ancestor_map_tail[input_id] = x;\n        }\n    }\n    simplifier_map_mutations(self, input_id, left, right, output_id);\nout:\n    return ret;\n}\n\n/* Sets up the internal working copies of the various tables, as needed\n * depending on the specified options. */\nstatic int\nsimplifier_init_tables(simplifier_t *self)\n{\n    int ret;\n    bool filter_nodes = !(self->options & TSK_SIMPLIFY_NO_FILTER_NODES);\n    bool filter_populations = self->options & TSK_SIMPLIFY_FILTER_POPULATIONS;\n    bool filter_individuals = self->options & TSK_SIMPLIFY_FILTER_INDIVIDUALS;\n    bool filter_sites = self->options & TSK_SIMPLIFY_FILTER_SITES;\n    tsk_bookmark_t rows_to_retain;\n\n    /* NOTE: this is a bit inefficient here as we're taking copies of\n     * the tables even in the no-filter case where the original tables\n     * won't be touched (beyond references to external tables that may\n     * need updating). Future versions may do something a bit more\n     * complicated like temporarily stealing the pointers to the\n     * underlying column memory in these tables, and then being careful\n     * not to free the table at the end.\n     */\n    ret = tsk_table_collection_copy(self->tables, &self->input_tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    memset(&rows_to_retain, 0, sizeof(rows_to_retain));\n    rows_to_retain.provenances = self->tables->provenances.num_rows;\n    if (!filter_nodes) {\n        rows_to_retain.nodes = self->tables->nodes.num_rows;\n    }\n    if (!filter_populations) {\n        rows_to_retain.populations = self->tables->populations.num_rows;\n    }\n    if (!filter_individuals) {\n        rows_to_retain.individuals = self->tables->individuals.num_rows;\n    }\n    if (!filter_sites) {\n        rows_to_retain.sites = self->tables->sites.num_rows;\n    }\n\n    ret = tsk_table_collection_truncate(self->tables, &rows_to_retain);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nsimplifier_init_nodes(simplifier_t *self, const tsk_id_t *samples)\n{\n    int ret = 0;\n    tsk_id_t node_id;\n    tsk_size_t j;\n    const tsk_size_t num_nodes = self->input_tables.nodes.num_rows;\n    bool filter_nodes = !(self->options & TSK_SIMPLIFY_NO_FILTER_NODES);\n    bool update_flags = !(self->options & TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS);\n    tsk_flags_t *node_flags = self->tables->nodes.flags;\n    tsk_id_t *node_id_map = self->node_id_map;\n\n    if (filter_nodes) {\n        tsk_bug_assert(self->tables->nodes.num_rows == 0);\n        /* The node table has been cleared. Add nodes for the samples. */\n        for (j = 0; j < self->num_samples; j++) {\n            node_id = simplifier_record_node(self, samples[j]);\n            if (node_id < 0) {\n                ret = (int) node_id;\n                goto out;\n            }\n        }\n    } else {\n        tsk_bug_assert(self->tables->nodes.num_rows == num_nodes);\n        if (update_flags) {\n            for (j = 0; j < num_nodes; j++) {\n                /* Reset the sample flags */\n                node_flags[j] &= (tsk_flags_t) ~TSK_NODE_IS_SAMPLE;\n                if (self->is_sample[j]) {\n                    node_flags[j] |= TSK_NODE_IS_SAMPLE;\n                }\n            }\n        }\n\n        for (j = 0; j < num_nodes; j++) {\n            node_id_map[j] = (tsk_id_t) j;\n        }\n    }\n    /* Add the initial ancestry */\n    for (j = 0; j < self->num_samples; j++) {\n        node_id = samples[j];\n        ret = simplifier_add_ancestry(self, node_id, 0,\n            self->input_tables.sequence_length, self->node_id_map[node_id]);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\nsimplifier_init(simplifier_t *self, const tsk_id_t *samples, tsk_size_t num_samples,\n    tsk_table_collection_t *tables, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t ret_id;\n    tsk_size_t num_nodes;\n\n    tsk_memset(self, 0, sizeof(simplifier_t));\n    self->num_samples = num_samples;\n    self->options = options;\n    self->tables = tables;\n\n    /* TODO we can add a flag to skip these checks for when we know they are\n     * unnecessary */\n    /* TODO Current unit tests require TSK_CHECK_SITE_DUPLICATES but it's\n     * debateable whether we need it. If we remove, we definitely need explicit\n     * tests to ensure we're doing sensible things with duplicate sites.\n     * (Particularly, re TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY.) */\n    ret_id = tsk_table_collection_check_integrity(tables,\n        TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING | TSK_CHECK_SITE_DUPLICATES);\n    if (ret_id != 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n\n    /* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size\n     */\n    ret = tsk_blkalloc_init(&self->segment_heap, 8192);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_blkalloc_init(&self->interval_list_heap, 8192);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = segment_overlapper_alloc(&self->segment_overlapper);\n    if (ret != 0) {\n        goto out;\n    }\n    num_nodes = tables->nodes.num_rows;\n    /* Make the maps and set the intial state */\n    self->ancestor_map_head = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));\n    self->ancestor_map_tail = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));\n    self->child_edge_map_head = tsk_calloc(num_nodes, sizeof(interval_list_t *));\n    self->child_edge_map_tail = tsk_calloc(num_nodes, sizeof(interval_list_t *));\n    self->node_id_map = tsk_malloc(num_nodes * sizeof(tsk_id_t));\n    self->buffered_children = tsk_malloc(num_nodes * sizeof(tsk_id_t));\n    self->is_sample = tsk_calloc(num_nodes, sizeof(bool));\n    self->max_segment_queue_size = 64;\n    self->segment_queue\n        = tsk_malloc(self->max_segment_queue_size * sizeof(tsk_segment_t));\n    if (self->ancestor_map_head == NULL || self->ancestor_map_tail == NULL\n        || self->child_edge_map_head == NULL || self->child_edge_map_tail == NULL\n        || self->node_id_map == NULL || self->is_sample == NULL\n        || self->segment_queue == NULL || self->buffered_children == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    /* Go through the samples to check for errors before we clear the tables. */\n    for (j = 0; j < self->num_samples; j++) {\n        if (samples[j] < 0 || samples[j] >= (tsk_id_t) num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (self->is_sample[samples[j]]) {\n            ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n            goto out;\n        }\n        self->is_sample[samples[j]] = true;\n    }\n    tsk_memset(self->node_id_map, 0xff, num_nodes * sizeof(tsk_id_t));\n\n    ret = simplifier_init_tables(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = simplifier_init_sites(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = simplifier_init_nodes(self, samples);\n    if (ret != 0) {\n        goto out;\n    }\n    if (self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY) {\n        ret = simplifier_init_position_lookup(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    self->edge_sort_offset = TSK_NULL;\nout:\n    return ret;\n}\n\nstatic int\nsimplifier_free(simplifier_t *self)\n{\n    tsk_table_collection_free(&self->input_tables);\n    tsk_blkalloc_free(&self->segment_heap);\n    tsk_blkalloc_free(&self->interval_list_heap);\n    segment_overlapper_free(&self->segment_overlapper);\n    tsk_safe_free(self->ancestor_map_head);\n    tsk_safe_free(self->ancestor_map_tail);\n    tsk_safe_free(self->child_edge_map_head);\n    tsk_safe_free(self->child_edge_map_tail);\n    tsk_safe_free(self->node_id_map);\n    tsk_safe_free(self->segment_queue);\n    tsk_safe_free(self->is_sample);\n    tsk_safe_free(self->mutation_node_map);\n    tsk_safe_free(self->node_mutation_list_mem);\n    tsk_safe_free(self->node_mutation_list_map_head);\n    tsk_safe_free(self->node_mutation_list_map_tail);\n    tsk_safe_free(self->buffered_children);\n    tsk_safe_free(self->position_lookup);\n    return 0;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_enqueue_segment(simplifier_t *self, double left, double right, tsk_id_t node)\n{\n    int ret = 0;\n    tsk_segment_t *seg;\n    void *p;\n\n    tsk_bug_assert(left < right);\n    /* Make sure we always have room for one more segment in the queue so we\n     * can put a tail sentinel on it */\n    if (self->segment_queue_size == self->max_segment_queue_size - 1) {\n        self->max_segment_queue_size *= 2;\n        p = tsk_realloc(self->segment_queue,\n            self->max_segment_queue_size * sizeof(*self->segment_queue));\n        if (p == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        self->segment_queue = p;\n    }\n    seg = self->segment_queue + self->segment_queue_size;\n    seg->left = left;\n    seg->right = right;\n    seg->node = node;\n    self->segment_queue_size++;\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_merge_ancestors(simplifier_t *self, tsk_id_t input_id)\n{\n    int ret = 0;\n    tsk_segment_t **X, *x;\n    tsk_size_t j, num_overlapping, num_flushed_edges;\n    double left, right, prev_right;\n    tsk_id_t ancestry_node;\n    tsk_id_t output_id = self->node_id_map[input_id];\n    bool is_sample = self->is_sample[input_id];\n    bool filter_nodes = !(self->options & TSK_SIMPLIFY_NO_FILTER_NODES);\n    bool keep_unary = self->options & TSK_SIMPLIFY_KEEP_UNARY;\n\n    if ((self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)\n        && (self->input_tables.nodes.individual[input_id] != TSK_NULL)) {\n        keep_unary = true;\n    }\n\n    if (is_sample) {\n        /* Free up the existing ancestry mapping. */\n        x = self->ancestor_map_tail[input_id];\n        tsk_bug_assert(x->left == 0 && x->right == self->tables->sequence_length);\n        self->ancestor_map_head[input_id] = NULL;\n        self->ancestor_map_tail[input_id] = NULL;\n    }\n\n    ret = segment_overlapper_start(\n        &self->segment_overlapper, self->segment_queue, self->segment_queue_size);\n    if (ret != 0) {\n        goto out;\n    }\n    prev_right = 0;\n    while ((ret = segment_overlapper_next(\n                &self->segment_overlapper, &left, &right, &X, &num_overlapping))\n           == 1) {\n        tsk_bug_assert(left < right);\n        tsk_bug_assert(num_overlapping > 0);\n        if (num_overlapping == 1) {\n            ancestry_node = X[0]->node;\n            if (is_sample) {\n                ret = simplifier_record_edge(self, left, right, ancestry_node);\n                if (ret != 0) {\n                    goto out;\n                }\n                ancestry_node = output_id;\n            } else if (keep_unary) {\n                if (output_id == TSK_NULL) {\n                    output_id = simplifier_record_node(self, input_id);\n                }\n                ret = simplifier_record_edge(self, left, right, ancestry_node);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        } else {\n            if (output_id == TSK_NULL) {\n                output_id = simplifier_record_node(self, input_id);\n                if (output_id < 0) {\n                    ret = (int) output_id;\n                    goto out;\n                }\n            }\n            ancestry_node = output_id;\n            for (j = 0; j < num_overlapping; j++) {\n                ret = simplifier_record_edge(self, left, right, X[j]->node);\n                if (ret != 0) {\n                    goto out;\n                }\n            }\n        }\n        if (is_sample && left != prev_right) {\n            /* Fill in any gaps in ancestry for the sample */\n            ret = simplifier_add_ancestry(self, input_id, prev_right, left, output_id);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n        if (keep_unary) {\n            ancestry_node = output_id;\n        }\n        ret = simplifier_add_ancestry(self, input_id, left, right, ancestry_node);\n        if (ret != 0) {\n            goto out;\n        }\n        prev_right = right;\n    }\n    /* Check for errors occuring in the loop condition */\n    if (ret != 0) {\n        goto out;\n    }\n    if (is_sample && prev_right != self->tables->sequence_length) {\n        /* If a trailing gap exists in the sample ancestry, fill it in. */\n        ret = simplifier_add_ancestry(\n            self, input_id, prev_right, self->tables->sequence_length, output_id);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (output_id != TSK_NULL) {\n        ret = simplifier_flush_edges(self, output_id, &num_flushed_edges);\n        if (ret != 0) {\n            goto out;\n        }\n        if (filter_nodes && (num_flushed_edges == 0) && !is_sample) {\n            ret = simplifier_rewind_node(self, input_id, output_id);\n        }\n    }\nout:\n    return ret;\n}\n\n/* Extract the ancestry for the specified input node over the specified\n * interval and queue it up for merging.\n */\nstatic int TSK_WARN_UNUSED\nsimplifier_extract_ancestry(\n    simplifier_t *self, double left, double right, tsk_id_t input_id)\n{\n    int ret = 0;\n    tsk_segment_t *x = self->ancestor_map_head[input_id];\n    tsk_segment_t y; /* y is the segment that has been removed */\n    tsk_segment_t *x_head, *x_prev, *seg_left, *seg_right;\n\n    x_head = NULL;\n    x_prev = NULL;\n    while (x != NULL) {\n        if (x->right > left && right > x->left) {\n            y.left = TSK_MAX(x->left, left);\n            y.right = TSK_MIN(x->right, right);\n            y.node = x->node;\n            ret = simplifier_enqueue_segment(self, y.left, y.right, y.node);\n            if (ret != 0) {\n                goto out;\n            }\n            seg_left = NULL;\n            seg_right = NULL;\n            if (x->left != y.left) {\n                seg_left = simplifier_alloc_segment(self, x->left, y.left, x->node);\n                if (seg_left == NULL) {\n                    ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                    goto out;\n                }\n                if (x_prev == NULL) {\n                    x_head = seg_left;\n                } else {\n                    x_prev->next = seg_left;\n                }\n                x_prev = seg_left;\n            }\n            if (x->right != y.right) {\n                x->left = y.right;\n                seg_right = x;\n            } else {\n                seg_right = x->next;\n                // TODO free x\n            }\n            if (x_prev == NULL) {\n                x_head = seg_right;\n            } else {\n                x_prev->next = seg_right;\n            }\n            x = seg_right;\n        } else {\n            if (x_prev == NULL) {\n                x_head = x;\n            }\n            x_prev = x;\n            x = x->next;\n        }\n    }\n\n    self->ancestor_map_head[input_id] = x_head;\n    self->ancestor_map_tail[input_id] = x_prev;\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_process_parent_edges(\n    simplifier_t *self, tsk_id_t parent, tsk_size_t start, tsk_size_t end)\n{\n    int ret = 0;\n    tsk_size_t j;\n    const tsk_edge_table_t *input_edges = &self->input_tables.edges;\n    tsk_id_t child;\n    double left, right;\n\n    /* Go through the edges and queue up ancestry segments for processing. */\n    self->segment_queue_size = 0;\n    for (j = start; j < end; j++) {\n        tsk_bug_assert(parent == input_edges->parent[j]);\n        child = input_edges->child[j];\n        left = input_edges->left[j];\n        right = input_edges->right[j];\n        ret = simplifier_extract_ancestry(self, left, right, child);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    /* We can now merge the ancestral segments for the parent */\n    ret = simplifier_merge_ancestors(self, parent);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_finalise_site_references(\n    simplifier_t *self, const bool *site_referenced, tsk_id_t *site_id_map)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_site_t site;\n    const tsk_size_t num_sites = self->input_tables.sites.num_rows;\n\n    if (self->options & TSK_SIMPLIFY_FILTER_SITES) {\n        for (j = 0; j < num_sites; j++) {\n            tsk_site_table_get_row_unsafe(\n                &self->input_tables.sites, (tsk_id_t) j, &site);\n            site_id_map[j] = TSK_NULL;\n            if (site_referenced[j]) {\n                ret_id = tsk_site_table_add_row(&self->tables->sites, site.position,\n                    site.ancestral_state, site.ancestral_state_length, site.metadata,\n                    site.metadata_length);\n                if (ret_id < 0) {\n                    ret = (int) ret_id;\n                    goto out;\n                }\n                site_id_map[j] = ret_id;\n            }\n        }\n    } else {\n        tsk_bug_assert(self->tables->sites.num_rows == num_sites);\n        for (j = 0; j < num_sites; j++) {\n            site_id_map[j] = (tsk_id_t) j;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_finalise_population_references(simplifier_t *self)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t pop_id, ret_id;\n    tsk_population_t pop;\n    tsk_id_t *node_population = self->tables->nodes.population;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_size_t num_populations = self->input_tables.populations.num_rows;\n    bool *population_referenced\n        = tsk_calloc(num_populations, sizeof(*population_referenced));\n    tsk_id_t *population_id_map\n        = tsk_malloc(num_populations * sizeof(*population_id_map));\n\n    tsk_bug_assert(self->options & TSK_SIMPLIFY_FILTER_POPULATIONS);\n\n    if (population_referenced == NULL || population_id_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (j = 0; j < num_nodes; j++) {\n        pop_id = node_population[j];\n        if (pop_id != TSK_NULL) {\n            population_referenced[pop_id] = true;\n        }\n    }\n\n    for (j = 0; j < num_populations; j++) {\n        tsk_population_table_get_row_unsafe(\n            &self->input_tables.populations, (tsk_id_t) j, &pop);\n        population_id_map[j] = TSK_NULL;\n        if (population_referenced[j]) {\n            ret_id = tsk_population_table_add_row(\n                &self->tables->populations, pop.metadata, pop.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            population_id_map[j] = ret_id;\n        }\n    }\n\n    /* Remap the IDs in the node table */\n    for (j = 0; j < num_nodes; j++) {\n        pop_id = node_population[j];\n        if (pop_id != TSK_NULL) {\n            node_population[j] = population_id_map[pop_id];\n        }\n    }\nout:\n    tsk_safe_free(population_id_map);\n    tsk_safe_free(population_referenced);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_finalise_individual_references(simplifier_t *self)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t pop_id, ret_id;\n    tsk_individual_t ind;\n    tsk_id_t *node_individual = self->tables->nodes.individual;\n    tsk_id_t *parents;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_size_t num_individuals = self->input_tables.individuals.num_rows;\n    bool *individual_referenced\n        = tsk_calloc(num_individuals, sizeof(*individual_referenced));\n    tsk_id_t *individual_id_map\n        = tsk_malloc(num_individuals * sizeof(*individual_id_map));\n\n    tsk_bug_assert(self->options & TSK_SIMPLIFY_FILTER_INDIVIDUALS);\n\n    if (individual_referenced == NULL || individual_id_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (j = 0; j < num_nodes; j++) {\n        pop_id = node_individual[j];\n        if (pop_id != TSK_NULL) {\n            individual_referenced[pop_id] = true;\n        }\n    }\n\n    for (j = 0; j < num_individuals; j++) {\n        tsk_individual_table_get_row_unsafe(\n            &self->input_tables.individuals, (tsk_id_t) j, &ind);\n        individual_id_map[j] = TSK_NULL;\n        if (individual_referenced[j]) {\n            /* Can't remap the parents inline here because we have no\n             * guarantees about sortedness */\n            ret_id = tsk_individual_table_add_row(&self->tables->individuals, ind.flags,\n                ind.location, ind.location_length, ind.parents, ind.parents_length,\n                ind.metadata, ind.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            individual_id_map[j] = ret_id;\n        }\n    }\n\n    /* Remap the IDs in the node table */\n    for (j = 0; j < num_nodes; j++) {\n        pop_id = node_individual[j];\n        if (pop_id != TSK_NULL) {\n            node_individual[j] = individual_id_map[pop_id];\n        }\n    }\n\n    /* Remap parent IDs. *\n     * NOTE! must take the pointer reference here as it can change from\n     * the start of the function */\n    parents = self->tables->individuals.parents;\n    for (j = 0; j < self->tables->individuals.parents_length; j++) {\n        if (parents[j] != TSK_NULL) {\n            parents[j] = individual_id_map[parents[j]];\n        }\n    }\n\nout:\n    tsk_safe_free(individual_id_map);\n    tsk_safe_free(individual_referenced);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_output_sites(simplifier_t *self)\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    tsk_mutation_t mutation;\n    const tsk_size_t num_sites = self->input_tables.sites.num_rows;\n    const tsk_size_t num_mutations = self->input_tables.mutations.num_rows;\n    bool *site_referenced = tsk_calloc(num_sites, sizeof(*site_referenced));\n    tsk_id_t *site_id_map = tsk_malloc(num_sites * sizeof(*site_id_map));\n    tsk_id_t *mutation_id_map = tsk_malloc(num_mutations * sizeof(*mutation_id_map));\n    const tsk_id_t *mutation_node_map = self->mutation_node_map;\n    const tsk_id_t *mutation_site = self->input_tables.mutations.site;\n\n    if (site_referenced == NULL || site_id_map == NULL || mutation_id_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (j = 0; j < num_mutations; j++) {\n        if (mutation_node_map[j] != TSK_NULL) {\n            site_referenced[mutation_site[j]] = true;\n        }\n    }\n    ret = simplifier_finalise_site_references(self, site_referenced, site_id_map);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (j = 0; j < num_mutations; j++) {\n        mutation_id_map[j] = TSK_NULL;\n        if (mutation_node_map[j] != TSK_NULL) {\n            tsk_mutation_table_get_row_unsafe(\n                &self->input_tables.mutations, (tsk_id_t) j, &mutation);\n            mutation.node = mutation_node_map[j];\n            mutation.site = site_id_map[mutation.site];\n            if (mutation.parent != TSK_NULL) {\n                mutation.parent = mutation_id_map[mutation.parent];\n            }\n            ret_id = tsk_mutation_table_add_row(&self->tables->mutations, mutation.site,\n                mutation.node, mutation.parent, mutation.time, mutation.derived_state,\n                mutation.derived_state_length, mutation.metadata,\n                mutation.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            mutation_id_map[j] = ret_id;\n        }\n    }\nout:\n    tsk_safe_free(site_referenced);\n    tsk_safe_free(site_id_map);\n    tsk_safe_free(mutation_id_map);\n    return ret;\n}\n\n/* Flush the remaining non-edge and node data in the model to the\n * output tables. */\nstatic int TSK_WARN_UNUSED\nsimplifier_flush_output(simplifier_t *self)\n{\n    int ret = 0;\n\n    /* TODO Migrations fit reasonably neatly into the pattern that we have here. We\n     * can consider references to populations from migration objects in the same way\n     * as from nodes, so that we only remove a population if its referenced by\n     * neither. Mapping the population IDs in migrations is then easy. In principle\n     * nodes are similar, but the semantics are slightly different because we've\n     * already allocated all the nodes by their references from edges. We then\n     * need to decide whether we remove migrations that reference unmapped nodes\n     * or whether to add these nodes back in (probably the former is the correct\n     * approach).*/\n    if (self->input_tables.migrations.num_rows != 0) {\n        ret = tsk_trace_error(TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED);\n        goto out;\n    }\n\n    ret = simplifier_output_sites(self);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (self->options & TSK_SIMPLIFY_FILTER_POPULATIONS) {\n        ret = simplifier_finalise_population_references(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (self->options & TSK_SIMPLIFY_FILTER_INDIVIDUALS) {\n        ret = simplifier_finalise_individual_references(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\nout:\n    return ret;\n}\n\nstatic void\nsimplifier_set_edge_sort_offset(simplifier_t *self, double youngest_root_time)\n{\n    const tsk_edge_table_t edges = self->tables->edges;\n    const double *node_time = self->tables->nodes.time;\n    int64_t offset;\n\n    for (offset = 0; offset < (int64_t) edges.num_rows; offset++) {\n        if (node_time[edges.parent[offset]] >= youngest_root_time) {\n            break;\n        }\n    }\n    self->edge_sort_offset = offset;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_sort_edges(simplifier_t *self)\n{\n    /* designated initialisers are guaranteed to set any missing fields to\n     * zero, so we don't need to set the rest of them. */\n    tsk_bookmark_t bookmark = {\n        .edges = (tsk_size_t) self->edge_sort_offset,\n        .sites = self->tables->sites.num_rows,\n        .mutations = self->tables->mutations.num_rows,\n    };\n    tsk_bug_assert(self->edge_sort_offset >= 0);\n    return tsk_table_collection_sort(self->tables, &bookmark, 0);\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_insert_input_roots(simplifier_t *self)\n{\n    int ret = 0;\n    tsk_id_t input_id, output_id;\n    tsk_segment_t *x;\n    tsk_size_t num_flushed_edges;\n    double youngest_root_time = DBL_MAX;\n    const double *node_time = self->tables->nodes.time;\n\n    for (input_id = 0; input_id < (tsk_id_t) self->input_tables.nodes.num_rows;\n        input_id++) {\n        x = self->ancestor_map_head[input_id];\n        if (x != NULL) {\n            output_id = self->node_id_map[input_id];\n            if (output_id == TSK_NULL) {\n                output_id = simplifier_record_node(self, input_id);\n                if (output_id < 0) {\n                    ret = (int) output_id;\n                    goto out;\n                }\n            }\n            youngest_root_time = TSK_MIN(youngest_root_time, node_time[output_id]);\n            while (x != NULL) {\n                if (x->node != output_id) {\n                    ret = simplifier_record_edge(self, x->left, x->right, x->node);\n                    if (ret != 0) {\n                        goto out;\n                    }\n                    simplifier_map_mutations(\n                        self, input_id, x->left, x->right, output_id);\n                }\n                x = x->next;\n            }\n            ret = simplifier_flush_edges(self, output_id, &num_flushed_edges);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\n    if (youngest_root_time != DBL_MAX) {\n        simplifier_set_edge_sort_offset(self, youngest_root_time);\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\nsimplifier_run(simplifier_t *self, tsk_id_t *node_map)\n{\n    int ret = 0;\n    tsk_size_t j, start;\n    tsk_id_t parent, current_parent;\n    const tsk_edge_table_t *input_edges = &self->input_tables.edges;\n    tsk_size_t num_edges = input_edges->num_rows;\n\n    if (num_edges > 0) {\n        start = 0;\n        current_parent = input_edges->parent[0];\n        for (j = 0; j < num_edges; j++) {\n            parent = input_edges->parent[j];\n            if (parent != current_parent) {\n                ret = simplifier_process_parent_edges(self, current_parent, start, j);\n                if (ret != 0) {\n                    goto out;\n                }\n                current_parent = parent;\n                start = j;\n            }\n        }\n        ret = simplifier_process_parent_edges(self, current_parent, start, num_edges);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS) {\n        ret = simplifier_insert_input_roots(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = simplifier_flush_output(self);\n    if (ret != 0) {\n        goto out;\n    }\n    if (node_map != NULL) {\n        /* Finally, output the new IDs for the nodes, if required. */\n        tsk_memcpy(node_map, self->node_id_map,\n            self->input_tables.nodes.num_rows * sizeof(tsk_id_t));\n    }\n    if (self->edge_sort_offset != TSK_NULL) {\n        tsk_bug_assert(self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS);\n        ret = simplifier_sort_edges(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\n/*************************\n * table_collection\n *************************/\n\ntypedef struct {\n    tsk_id_t index;\n    /* These are the sort keys in order */\n    double first;\n    double second;\n    tsk_id_t third;\n    tsk_id_t fourth;\n} index_sort_t;\n\nstatic int\ncmp_index_sort(const void *a, const void *b)\n{\n    const index_sort_t *ca = (const index_sort_t *) a;\n    const index_sort_t *cb = (const index_sort_t *) b;\n    int ret = (ca->first > cb->first) - (ca->first < cb->first);\n    if (ret == 0) {\n        ret = (ca->second > cb->second) - (ca->second < cb->second);\n        if (ret == 0) {\n            ret = (ca->third > cb->third) - (ca->third < cb->third);\n            if (ret == 0) {\n                ret = (ca->fourth > cb->fourth) - (ca->fourth < cb->fourth);\n            }\n        }\n    }\n    return ret;\n}\n\nstatic int\ntsk_table_collection_check_offsets(const tsk_table_collection_t *self)\n{\n    int ret = 0;\n\n    ret = check_offsets(self->nodes.num_rows, self->nodes.metadata_offset,\n        self->nodes.metadata_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->sites.num_rows, self->sites.ancestral_state_offset,\n        self->sites.ancestral_state_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->sites.num_rows, self->sites.metadata_offset,\n        self->sites.metadata_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->mutations.num_rows, self->mutations.derived_state_offset,\n        self->mutations.derived_state_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->mutations.num_rows, self->mutations.metadata_offset,\n        self->mutations.metadata_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->individuals.num_rows, self->individuals.metadata_offset,\n        self->individuals.metadata_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->provenances.num_rows, self->provenances.timestamp_offset,\n        self->provenances.timestamp_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_offsets(self->provenances.num_rows, self->provenances.record_offset,\n        self->provenances.record_length, true);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ntsk_table_collection_check_node_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n    double node_time;\n    tsk_id_t population, individual;\n    tsk_id_t num_populations = (tsk_id_t) self->populations.num_rows;\n    tsk_id_t num_individuals = (tsk_id_t) self->individuals.num_rows;\n    const bool check_population_refs = !(options & TSK_NO_CHECK_POPULATION_REFS);\n\n    for (j = 0; j < self->nodes.num_rows; j++) {\n        node_time = self->nodes.time[j];\n        if (!tsk_isfinite(node_time)) {\n            ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);\n            goto out;\n        }\n        if (check_population_refs) {\n            population = self->nodes.population[j];\n            if (population < TSK_NULL || population >= num_populations) {\n                ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n                goto out;\n            }\n        }\n        individual = self->nodes.individual[j];\n        if (individual < TSK_NULL || individual >= num_individuals) {\n            ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_table_collection_check_edge_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t parent, last_parent, child, last_child;\n    double left, last_left, right;\n    const double *time = self->nodes.time;\n    const double L = self->sequence_length;\n    const tsk_edge_table_t edges = self->edges;\n    const tsk_id_t num_nodes = (tsk_id_t) self->nodes.num_rows;\n    const bool check_ordering = !!(options & TSK_CHECK_EDGE_ORDERING);\n    bool *parent_seen = NULL;\n\n    if (check_ordering) {\n        parent_seen = tsk_calloc((tsk_size_t) num_nodes, sizeof(*parent_seen));\n        if (parent_seen == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n\n    /* Just keeping compiler happy; these values don't matter. */\n    last_left = 0;\n    last_parent = 0;\n    last_child = 0;\n    for (j = 0; j < edges.num_rows; j++) {\n        parent = edges.parent[j];\n        child = edges.child[j];\n        left = edges.left[j];\n        right = edges.right[j];\n        /* Node ID integrity */\n        if (parent == TSK_NULL) {\n            ret = tsk_trace_error(TSK_ERR_NULL_PARENT);\n            goto out;\n        }\n        if (parent < 0 || parent >= num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (child == TSK_NULL) {\n            ret = tsk_trace_error(TSK_ERR_NULL_CHILD);\n            goto out;\n        }\n        if (child < 0 || child >= num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        /* Spatial requirements for edges */\n        if (!(tsk_isfinite(left) && tsk_isfinite(right))) {\n            ret = tsk_trace_error(TSK_ERR_GENOME_COORDS_NONFINITE);\n            goto out;\n        }\n        if (left < 0) {\n            ret = tsk_trace_error(TSK_ERR_LEFT_LESS_ZERO);\n            goto out;\n        }\n        if (right > L) {\n            ret = tsk_trace_error(TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);\n            goto out;\n        }\n        if (left >= right) {\n            ret = tsk_trace_error(TSK_ERR_BAD_EDGE_INTERVAL);\n            goto out;\n        }\n        /* time[child] must be < time[parent] */\n        if (time[child] >= time[parent]) {\n            ret = tsk_trace_error(TSK_ERR_BAD_NODE_TIME_ORDERING);\n            goto out;\n        }\n\n        if (check_ordering) {\n            if (parent_seen[parent]) {\n                ret = tsk_trace_error(TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS);\n                goto out;\n            }\n            if (j > 0) {\n                /* Input data must sorted by (time[parent], parent, child, left). */\n                if (time[parent] < time[last_parent]) {\n                    ret = tsk_trace_error(TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);\n                    goto out;\n                }\n                if (time[parent] == time[last_parent]) {\n                    if (parent == last_parent) {\n                        if (child < last_child) {\n                            ret = tsk_trace_error(TSK_ERR_EDGES_NOT_SORTED_CHILD);\n                            goto out;\n                        }\n                        if (child == last_child) {\n                            if (left == last_left) {\n                                ret = tsk_trace_error(TSK_ERR_DUPLICATE_EDGES);\n                                goto out;\n                            } else if (left < last_left) {\n                                ret = tsk_trace_error(TSK_ERR_EDGES_NOT_SORTED_LEFT);\n                                goto out;\n                            }\n                        }\n                    } else {\n                        parent_seen[last_parent] = true;\n                    }\n                }\n            }\n            last_parent = parent;\n            last_child = child;\n            last_left = left;\n        }\n    }\nout:\n    tsk_safe_free(parent_seen);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_check_site_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n    double position;\n    const double L = self->sequence_length;\n    const tsk_site_table_t sites = self->sites;\n    const bool check_site_ordering = !!(options & TSK_CHECK_SITE_ORDERING);\n    const bool check_site_duplicates = !!(options & TSK_CHECK_SITE_DUPLICATES);\n\n    for (j = 0; j < sites.num_rows; j++) {\n        position = sites.position[j];\n        /* Spatial requirements */\n        if (!tsk_isfinite(position)) {\n            ret = tsk_trace_error(TSK_ERR_BAD_SITE_POSITION);\n            goto out;\n        }\n        if (position < 0 || position >= L) {\n            ret = tsk_trace_error(TSK_ERR_BAD_SITE_POSITION);\n            goto out;\n        }\n        if (j > 0) {\n            if (check_site_duplicates && sites.position[j - 1] == position) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SITE_POSITION);\n                goto out;\n            }\n            if (check_site_ordering && sites.position[j - 1] > position) {\n                ret = tsk_trace_error(TSK_ERR_UNSORTED_SITES);\n                goto out;\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_check_mutation_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t parent_mut;\n    double mutation_time;\n    double last_known_time = INFINITY;\n    const tsk_mutation_table_t mutations = self->mutations;\n    const tsk_id_t num_nodes = (tsk_id_t) self->nodes.num_rows;\n    const tsk_id_t num_sites = (tsk_id_t) self->sites.num_rows;\n    const tsk_id_t num_mutations = (tsk_id_t) self->mutations.num_rows;\n    const double *node_time = self->nodes.time;\n    const bool check_mutation_ordering = !!(options & TSK_CHECK_MUTATION_ORDERING);\n    bool unknown_time;\n    int num_known_times = 0;\n    int num_unknown_times = 0;\n\n    for (j = 0; j < mutations.num_rows; j++) {\n        /* Basic reference integrity */\n        if (mutations.site[j] < 0 || mutations.site[j] >= num_sites) {\n            ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (mutations.node[j] < 0 || mutations.node[j] >= num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        /* Integrity check for mutation parent */\n        parent_mut = mutations.parent[j];\n        if (parent_mut < TSK_NULL || parent_mut >= num_mutations) {\n            ret = tsk_trace_error(TSK_ERR_MUTATION_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (parent_mut == (tsk_id_t) j) {\n            ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_EQUAL);\n            goto out;\n        }\n        /* Check that time is finite and not more recent than node time */\n        mutation_time = mutations.time[j];\n        unknown_time = tsk_is_unknown_time(mutation_time);\n        if (!unknown_time) {\n            if (!tsk_isfinite(mutation_time)) {\n                ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);\n                goto out;\n            }\n            if (mutation_time < node_time[mutations.node[j]]) {\n                ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);\n                goto out;\n            }\n        }\n\n        /* reset checks when reaching a new site */\n        if (j > 0 && mutations.site[j - 1] != mutations.site[j]) {\n            last_known_time = INFINITY;\n            num_known_times = 0;\n            num_unknown_times = 0;\n        }\n\n        /* Check known/unknown times are not both present on a site */\n        if (unknown_time) {\n            num_unknown_times++;\n        } else {\n            num_known_times++;\n        }\n        if ((num_unknown_times > 0) && (num_known_times > 0)) {\n            ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN);\n            goto out;\n        }\n\n        /* check parent site agrees */\n        if (parent_mut != TSK_NULL) {\n            if (mutations.site[parent_mut] != mutations.site[j]) {\n                ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE);\n                goto out;\n            }\n            /* If this mutation time is known, then the parent time\n             * must also be, or else the\n             * TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN check\n             * above will fail. */\n            if (!unknown_time && mutation_time > mutations.time[parent_mut]) {\n                ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);\n                goto out;\n            }\n        }\n\n        if (check_mutation_ordering) {\n            /* Check site ordering */\n            if (j > 0 && mutations.site[j - 1] > mutations.site[j]) {\n                ret = tsk_trace_error(TSK_ERR_UNSORTED_MUTATIONS);\n                goto out;\n            }\n\n            /* Check if parents are listed before their children */\n            if (parent_mut != TSK_NULL && parent_mut > (tsk_id_t) j) {\n                ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n                goto out;\n            }\n\n            /* Check time ordering. We do this after the other checks above,\n             * so that more specific errors trigger first */\n            if (!unknown_time) {\n                if (mutation_time > last_known_time) {\n                    ret = tsk_trace_error(TSK_ERR_UNSORTED_MUTATIONS);\n                    goto out;\n                }\n                last_known_time = mutation_time;\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_check_migration_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n    double left, right, time;\n    const double L = self->sequence_length;\n    const tsk_migration_table_t migrations = self->migrations;\n    const tsk_id_t num_nodes = (tsk_id_t) self->nodes.num_rows;\n    const tsk_id_t num_populations = (tsk_id_t) self->populations.num_rows;\n    const bool check_population_refs = !(options & TSK_NO_CHECK_POPULATION_REFS);\n    const bool check_migration_ordering = !!(options & TSK_CHECK_MIGRATION_ORDERING);\n\n    for (j = 0; j < migrations.num_rows; j++) {\n        if (migrations.node[j] < 0 || migrations.node[j] >= num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (check_population_refs) {\n            if (migrations.source[j] < 0 || migrations.source[j] >= num_populations) {\n                ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n                goto out;\n            }\n            if (migrations.dest[j] < 0 || migrations.dest[j] >= num_populations) {\n                ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n                goto out;\n            }\n        }\n        time = migrations.time[j];\n        if (!tsk_isfinite(time)) {\n            ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);\n            goto out;\n        }\n        if (j > 0) {\n            if (check_migration_ordering && migrations.time[j - 1] > time) {\n                ret = tsk_trace_error(TSK_ERR_UNSORTED_MIGRATIONS);\n                goto out;\n            }\n        }\n        left = migrations.left[j];\n        right = migrations.right[j];\n        /* Spatial requirements */\n        /* TODO it's a bit misleading to use the edge-specific errors here. */\n        if (!(tsk_isfinite(left) && tsk_isfinite(right))) {\n            ret = tsk_trace_error(TSK_ERR_GENOME_COORDS_NONFINITE);\n            goto out;\n        }\n        if (left < 0) {\n            ret = tsk_trace_error(TSK_ERR_LEFT_LESS_ZERO);\n            goto out;\n        }\n        if (right > L) {\n            ret = tsk_trace_error(TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);\n            goto out;\n        }\n        if (left >= right) {\n            ret = tsk_trace_error(TSK_ERR_BAD_EDGE_INTERVAL);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_check_individual_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j, k;\n    const tsk_individual_table_t individuals = self->individuals;\n    const tsk_id_t num_individuals = (tsk_id_t) individuals.num_rows;\n    const bool check_individual_ordering = options & TSK_CHECK_INDIVIDUAL_ORDERING;\n\n    for (j = 0; j < (tsk_size_t) num_individuals; j++) {\n        for (k = individuals.parents_offset[j]; k < individuals.parents_offset[j + 1];\n            k++) {\n            /* Check parent references are valid */\n            if (individuals.parents[k] != TSK_NULL\n                && (individuals.parents[k] < 0\n                    || individuals.parents[k] >= num_individuals)) {\n                ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);\n                goto out;\n            }\n            /* Check no-one is their own parent */\n            if (individuals.parents[k] == (tsk_id_t) j) {\n                ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_SELF_PARENT);\n                goto out;\n            }\n            /* Check parents are ordered */\n            if (check_individual_ordering && individuals.parents[k] != TSK_NULL\n                && individuals.parents[k] >= (tsk_id_t) j) {\n                ret = tsk_trace_error(TSK_ERR_UNSORTED_INDIVIDUALS);\n                goto out;\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic tsk_id_t TSK_WARN_UNUSED\ntsk_table_collection_check_tree_integrity(const tsk_table_collection_t *self)\n{\n    tsk_id_t ret = 0;\n    tsk_size_t j, k;\n    tsk_id_t e, u, site, mutation;\n    double tree_left, tree_right;\n    const double sequence_length = self->sequence_length;\n    const tsk_id_t num_sites = (tsk_id_t) self->sites.num_rows;\n    const tsk_id_t num_mutations = (tsk_id_t) self->mutations.num_rows;\n    const tsk_size_t num_edges = self->edges.num_rows;\n    const double *restrict site_position = self->sites.position;\n    const tsk_id_t *restrict mutation_site = self->mutations.site;\n    const tsk_id_t *restrict mutation_node = self->mutations.node;\n    const double *restrict mutation_time = self->mutations.time;\n    const double *restrict node_time = self->nodes.time;\n    const tsk_id_t *restrict I = self->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->indexes.edge_removal_order;\n    const double *restrict edge_right = self->edges.right;\n    const double *restrict edge_left = self->edges.left;\n    const tsk_id_t *restrict edge_child = self->edges.child;\n    const tsk_id_t *restrict edge_parent = self->edges.parent;\n    tsk_id_t *restrict parent = NULL;\n    int8_t *restrict used_edges = NULL;\n    tsk_id_t num_trees = 0;\n\n    parent = tsk_malloc(self->nodes.num_rows * sizeof(*parent));\n    used_edges = tsk_malloc(num_edges * sizeof(*used_edges));\n    if (parent == NULL || used_edges == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, self->nodes.num_rows * sizeof(*parent));\n    tsk_memset(used_edges, 0, num_edges * sizeof(*used_edges));\n\n    tree_left = 0;\n    num_trees = 0;\n    j = 0;\n    k = 0;\n    site = 0;\n    mutation = 0;\n    tsk_bug_assert(I != NULL && O != NULL);\n    tsk_bug_assert(self->indexes.num_edges == num_edges);\n\n    while (j < num_edges || tree_left < sequence_length) {\n        while (k < num_edges && edge_right[O[k]] == tree_left) {\n            e = O[k];\n            if (used_edges[e] != 1) {\n                ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);\n                goto out;\n            }\n            parent[edge_child[e]] = TSK_NULL;\n            used_edges[e]++;\n            k++;\n        }\n        while (j < num_edges && edge_left[I[j]] == tree_left) {\n            e = I[j];\n            if (used_edges[e] != 0) {\n                ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);\n                goto out;\n            }\n            used_edges[e]++;\n            u = edge_child[e];\n            if (parent[u] != TSK_NULL) {\n                ret = tsk_trace_error(TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);\n                goto out;\n            }\n            parent[u] = edge_parent[e];\n            j++;\n        }\n        tree_right = sequence_length;\n        if (j < num_edges) {\n            tree_right = TSK_MIN(tree_right, edge_left[I[j]]);\n        }\n        if (k < num_edges) {\n            tree_right = TSK_MIN(tree_right, edge_right[O[k]]);\n        }\n        while (site < num_sites && site_position[site] < tree_right) {\n            while (mutation < num_mutations && mutation_site[mutation] == site) {\n                if (!tsk_is_unknown_time(mutation_time[mutation])\n                    && parent[mutation_node[mutation]] != TSK_NULL\n                    && node_time[parent[mutation_node[mutation]]]\n                           <= mutation_time[mutation]) {\n                    ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE);\n                    goto out;\n                }\n                mutation++;\n            }\n            site++;\n        }\n        if (tree_right <= tree_left) {\n            ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);\n            goto out;\n        }\n        tree_left = tree_right;\n        /* This is technically possible; if we have 2**31 edges each defining\n         * a single tree, and there's a gap between each of these edges we\n         * would overflow this counter. */\n        if (num_trees == TSK_MAX_ID) {\n            ret = tsk_trace_error(TSK_ERR_TREE_OVERFLOW);\n            goto out;\n        }\n        num_trees++;\n    }\n    tsk_bug_assert(j == num_edges);\n    while (k < num_edges) {\n        /* At this point it must be that used_edges[O[k]] == 1,\n         * since otherwise we would have added a different edge twice,\n         * and so hit the error above. */\n        e = O[k];\n        if (edge_right[e] != sequence_length) {\n            ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);\n            goto out;\n        }\n        used_edges[e]++;\n        k++;\n    }\n    ret = num_trees;\nout:\n    /* Can't use tsk_safe_free because of restrict*/\n    if (parent != NULL) {\n        free(parent);\n    }\n    if (used_edges != NULL) {\n        free(used_edges);\n    }\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_check_index_integrity(const tsk_table_collection_t *self)\n{\n    int ret = 0;\n    tsk_id_t j;\n    const tsk_id_t num_edges = (tsk_id_t) self->edges.num_rows;\n    const tsk_id_t *edge_insertion_order = self->indexes.edge_insertion_order;\n    const tsk_id_t *edge_removal_order = self->indexes.edge_removal_order;\n\n    if (!tsk_table_collection_has_index(self, 0)) {\n        ret = tsk_trace_error(TSK_ERR_TABLES_NOT_INDEXED);\n        goto out;\n    }\n    for (j = 0; j < num_edges; j++) {\n        if (edge_insertion_order[j] < 0 || edge_insertion_order[j] >= num_edges) {\n            ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (edge_removal_order[j] < 0 || edge_removal_order[j] >= num_edges) {\n            ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_compute_mutation_parents_to_array(\n    const tsk_table_collection_t *self, tsk_id_t *mutation_parent)\n{\n    int ret = 0;\n    const tsk_id_t *I, *O;\n    const tsk_edge_table_t edges = self->edges;\n    const tsk_node_table_t nodes = self->nodes;\n    const tsk_site_table_t sites = self->sites;\n    const tsk_mutation_table_t mutations = self->mutations;\n    const tsk_id_t M = (tsk_id_t) edges.num_rows;\n    tsk_id_t tj, tk;\n    tsk_id_t *parent = NULL;\n    tsk_id_t *bottom_mutation = NULL;\n    tsk_id_t u;\n    double left, right;\n    tsk_id_t site;\n    /* Using unsigned values here avoids potentially undefined behaviour */\n    tsk_size_t j, mutation, first_mutation;\n\n    parent = tsk_malloc(nodes.num_rows * sizeof(*parent));\n    bottom_mutation = tsk_malloc(nodes.num_rows * sizeof(*bottom_mutation));\n    if (parent == NULL || bottom_mutation == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, nodes.num_rows * sizeof(*parent));\n    tsk_memset(bottom_mutation, 0xff, nodes.num_rows * sizeof(*bottom_mutation));\n    tsk_memset(mutation_parent, 0xff, self->mutations.num_rows * sizeof(tsk_id_t));\n\n    I = self->indexes.edge_insertion_order;\n    O = self->indexes.edge_removal_order;\n    tj = 0;\n    tk = 0;\n    site = 0;\n    mutation = 0;\n    left = 0;\n    while (tj < M || left < self->sequence_length) {\n        while (tk < M && edges.right[O[tk]] == left) {\n            parent[edges.child[O[tk]]] = TSK_NULL;\n            tk++;\n        }\n        while (tj < M && edges.left[I[tj]] == left) {\n            parent[edges.child[I[tj]]] = edges.parent[I[tj]];\n            tj++;\n        }\n        right = self->sequence_length;\n        if (tj < M) {\n            right = TSK_MIN(right, edges.left[I[tj]]);\n        }\n        if (tk < M) {\n            right = TSK_MIN(right, edges.right[O[tk]]);\n        }\n\n        /* Tree is now ready. We look at each site on this tree in turn */\n        while (site < (tsk_id_t) sites.num_rows && sites.position[site] < right) {\n            /* Create a mapping from mutations to nodes. If we see more than one\n             * mutation at a node, the previously seen one must be the parent\n             * of the current since we assume they are in order. */\n            first_mutation = mutation;\n            while (mutation < mutations.num_rows && mutations.site[mutation] == site) {\n                u = mutations.node[mutation];\n                if (bottom_mutation[u] != TSK_NULL) {\n                    mutation_parent[mutation] = bottom_mutation[u];\n                }\n                bottom_mutation[u] = (tsk_id_t) mutation;\n                mutation++;\n            }\n            /* Make the common case of 1 mutation fast */\n            if (mutation > first_mutation + 1) {\n                /* If we have more than one mutation, compute the parent for each\n                 * one by traversing up the tree until we find a node that has a\n                 * mutation. */\n                for (j = first_mutation; j < mutation; j++) {\n                    if (mutation_parent[j] == TSK_NULL) {\n                        u = parent[mutations.node[j]];\n                        while (u != TSK_NULL && bottom_mutation[u] == TSK_NULL) {\n                            u = parent[u];\n                        }\n                        if (u != TSK_NULL) {\n                            mutation_parent[j] = bottom_mutation[u];\n                        }\n                    }\n                }\n            }\n            /* Reset the mapping for the next site */\n            for (j = first_mutation; j < mutation; j++) {\n                u = mutations.node[j];\n                bottom_mutation[u] = TSK_NULL;\n                /* Check that we haven't violated the sortedness property */\n                if (mutation_parent[j] > (tsk_id_t) j) {\n                    ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_AFTER_CHILD);\n                    goto out;\n                }\n            }\n            site++;\n        }\n        /* Move on to the next tree */\n        left = right;\n    }\n\nout:\n    tsk_safe_free(parent);\n    tsk_safe_free(bottom_mutation);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_check_mutation_parents(const tsk_table_collection_t *self)\n{\n    int ret = 0;\n    tsk_mutation_table_t mutations = self->mutations;\n    tsk_id_t *new_parents = NULL;\n    tsk_size_t j;\n\n    if (mutations.num_rows == 0) {\n        return ret;\n    }\n\n    new_parents = tsk_malloc(mutations.num_rows * sizeof(*new_parents));\n    if (new_parents == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_table_collection_compute_mutation_parents_to_array(self, new_parents);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (j = 0; j < mutations.num_rows; j++) {\n        if (mutations.parent[j] != new_parents[j]) {\n            ret = tsk_trace_error(TSK_ERR_BAD_MUTATION_PARENT);\n            goto out;\n        }\n    }\n\nout:\n    tsk_safe_free(new_parents);\n    return ret;\n}\n\ntsk_id_t TSK_WARN_UNUSED\ntsk_table_collection_check_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options)\n{\n    tsk_id_t ret = 0;\n    int mut_ret = 0;\n\n    if (options & TSK_CHECK_MUTATION_PARENTS) {\n        /* If we're checking mutation parents, we need to check the trees first */\n        options |= TSK_CHECK_TREES;\n    }\n\n    if (options & TSK_CHECK_TREES) {\n        /* Checking the trees implies these checks */\n        options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING\n                   | TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING\n                   | TSK_CHECK_MIGRATION_ORDERING | TSK_CHECK_INDEXES;\n    }\n\n    if (!tsk_isfinite(self->sequence_length) || self->sequence_length <= 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_SEQUENCE_LENGTH);\n        goto out;\n    }\n    ret = tsk_table_collection_check_offsets(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_check_node_integrity(self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_check_edge_integrity(self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_check_site_integrity(self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_check_mutation_integrity(self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_check_migration_integrity(self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_check_individual_integrity(self, options);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (options & TSK_CHECK_INDEXES) {\n        ret = tsk_table_collection_check_index_integrity(self);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (options & TSK_CHECK_TREES) {\n        ret = tsk_table_collection_check_tree_integrity(self);\n        if (ret < 0) {\n            goto out;\n        }\n        /* This check requires tree integrity so do it last */\n        if (options & TSK_CHECK_MUTATION_PARENTS) {\n            mut_ret = tsk_table_collection_check_mutation_parents(self);\n            if (mut_ret != 0) {\n                ret = mut_ret;\n                goto out;\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nvoid\ntsk_table_collection_print_state(const tsk_table_collection_t *self, FILE *out)\n{\n    fprintf(out, \"Table collection state\\n\");\n    fprintf(out, \"sequence_length = %f\\n\", self->sequence_length);\n\n    write_metadata_schema_header(\n        out, self->metadata_schema, self->metadata_schema_length);\n    fprintf(out, \"#metadata#\\n\");\n    fprintf(out, \"%.*s\\n\", (int) self->metadata_length, self->metadata);\n    fprintf(out, \"#end#metadata\\n\");\n    fprintf(out, \"#time_units#\\n\");\n    fprintf(out, \"%.*s\\n\", (int) self->time_units_length, self->time_units);\n    fprintf(out, \"#end#time_units\\n\");\n    tsk_individual_table_print_state(&self->individuals, out);\n    tsk_node_table_print_state(&self->nodes, out);\n    tsk_edge_table_print_state(&self->edges, out);\n    tsk_migration_table_print_state(&self->migrations, out);\n    tsk_site_table_print_state(&self->sites, out);\n    tsk_mutation_table_print_state(&self->mutations, out);\n    tsk_population_table_print_state(&self->populations, out);\n    tsk_provenance_table_print_state(&self->provenances, out);\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_init(tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_flags_t edge_options = 0;\n\n    tsk_memset(self, 0, sizeof(*self));\n    if (options & TSK_TC_NO_EDGE_METADATA) {\n        edge_options |= TSK_TABLE_NO_METADATA;\n    }\n\n    /* Set default time_units value */\n    ret = tsk_table_collection_set_time_units(\n        self, TSK_TIME_UNITS_UNKNOWN, strlen(TSK_TIME_UNITS_UNKNOWN));\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_node_table_init(&self->nodes, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_init(&self->edges, edge_options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_init(&self->migrations, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_init(&self->sites, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_init(&self->mutations, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_init(&self->individuals, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_init(&self->populations, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_init(&self->provenances, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_reference_sequence_init(&self->reference_sequence, 0);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint\ntsk_table_collection_free(tsk_table_collection_t *self)\n{\n    tsk_individual_table_free(&self->individuals);\n    tsk_node_table_free(&self->nodes);\n    tsk_edge_table_free(&self->edges);\n    tsk_migration_table_free(&self->migrations);\n    tsk_site_table_free(&self->sites);\n    tsk_mutation_table_free(&self->mutations);\n    tsk_population_table_free(&self->populations);\n    tsk_provenance_table_free(&self->provenances);\n    tsk_reference_sequence_free(&self->reference_sequence);\n    tsk_safe_free(self->indexes.edge_insertion_order);\n    tsk_safe_free(self->indexes.edge_removal_order);\n    tsk_safe_free(self->file_uuid);\n    tsk_safe_free(self->time_units);\n    tsk_safe_free(self->metadata);\n    tsk_safe_free(self->metadata_schema);\n    return 0;\n}\n\nbool\ntsk_table_collection_equals(const tsk_table_collection_t *self,\n    const tsk_table_collection_t *other, tsk_flags_t options)\n{\n    bool ret = self->sequence_length == other->sequence_length\n               && self->time_units_length == other->time_units_length\n               && tsk_memcmp(self->time_units, other->time_units,\n                      self->time_units_length * sizeof(char))\n                      == 0;\n    if (!(options & TSK_CMP_IGNORE_TABLES)) {\n        ret = ret\n              && tsk_individual_table_equals(\n                  &self->individuals, &other->individuals, options)\n              && tsk_node_table_equals(&self->nodes, &other->nodes, options)\n              && tsk_edge_table_equals(&self->edges, &other->edges, options)\n              && tsk_migration_table_equals(\n                  &self->migrations, &other->migrations, options)\n              && tsk_site_table_equals(&self->sites, &other->sites, options)\n              && tsk_mutation_table_equals(&self->mutations, &other->mutations, options)\n              && tsk_population_table_equals(\n                  &self->populations, &other->populations, options);\n        /* TSK_CMP_IGNORE_TABLES implies TSK_CMP_IGNORE_PROVENANCE */\n        if (!(options & TSK_CMP_IGNORE_PROVENANCE)) {\n            ret = ret\n                  && tsk_provenance_table_equals(\n                      &self->provenances, &other->provenances, options);\n        }\n    }\n    /* TSK_CMP_IGNORE_TS_METADATA is implied by TSK_CMP_IGNORE_METADATA */\n    if (options & TSK_CMP_IGNORE_METADATA) {\n        options |= TSK_CMP_IGNORE_TS_METADATA;\n    }\n    if (!(options & TSK_CMP_IGNORE_TS_METADATA)) {\n        ret = ret\n              && (self->metadata_length == other->metadata_length\n                  && self->metadata_schema_length == other->metadata_schema_length\n                  && tsk_memcmp(self->metadata, other->metadata,\n                         self->metadata_length * sizeof(char))\n                         == 0\n                  && tsk_memcmp(self->metadata_schema, other->metadata_schema,\n                         self->metadata_schema_length * sizeof(char))\n                         == 0);\n    }\n\n    if (!(options & TSK_CMP_IGNORE_REFERENCE_SEQUENCE)) {\n        ret = ret\n              && tsk_reference_sequence_equals(\n                  &self->reference_sequence, &other->reference_sequence, options);\n    }\n    return ret;\n}\n\nint\ntsk_table_collection_set_time_units(\n    tsk_table_collection_t *self, const char *time_units, tsk_size_t time_units_length)\n{\n    return replace_string(\n        &self->time_units, &self->time_units_length, time_units, time_units_length);\n}\n\nint\ntsk_table_collection_set_metadata(\n    tsk_table_collection_t *self, const char *metadata, tsk_size_t metadata_length)\n{\n    return replace_string(\n        &self->metadata, &self->metadata_length, metadata, metadata_length);\n}\n\nint\ntsk_table_collection_takeset_metadata(\n    tsk_table_collection_t *self, char *metadata, tsk_size_t metadata_length)\n{\n    return takeset_string(\n        &self->metadata, &self->metadata_length, metadata, metadata_length);\n}\n\nint\ntsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length)\n{\n    return replace_string(&self->metadata_schema, &self->metadata_schema_length,\n        metadata_schema, metadata_schema_length);\n}\n\nint\ntsk_table_collection_set_indexes(tsk_table_collection_t *self,\n    tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order)\n{\n    int ret = 0;\n    tsk_size_t index_size = self->edges.num_rows * sizeof(tsk_id_t);\n\n    tsk_table_collection_drop_index(self, 0);\n    self->indexes.edge_insertion_order = tsk_malloc(index_size);\n    self->indexes.edge_removal_order = tsk_malloc(index_size);\n    if (self->indexes.edge_insertion_order == NULL\n        || self->indexes.edge_removal_order == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memcpy(self->indexes.edge_insertion_order, edge_insertion_order, index_size);\n    tsk_memcpy(self->indexes.edge_removal_order, edge_removal_order, index_size);\n    self->indexes.num_edges = self->edges.num_rows;\nout:\n    return ret;\n}\n\nint\ntsk_table_collection_takeset_indexes(tsk_table_collection_t *self,\n    tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order)\n{\n    int ret = 0;\n\n    if (edge_insertion_order == NULL || edge_removal_order == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    tsk_table_collection_drop_index(self, 0);\n    self->indexes.edge_insertion_order = edge_insertion_order;\n    self->indexes.edge_removal_order = edge_removal_order;\n    self->indexes.num_edges = self->edges.num_rows;\nout:\n    return ret;\n}\n\nbool\ntsk_table_collection_has_index(\n    const tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    return self->indexes.edge_insertion_order != NULL\n           && self->indexes.edge_removal_order != NULL\n           && self->indexes.num_edges == self->edges.num_rows;\n}\n\nbool\ntsk_table_collection_has_reference_sequence(const tsk_table_collection_t *self)\n{\n    return !tsk_reference_sequence_is_null(&self->reference_sequence);\n}\n\nint\ntsk_table_collection_drop_index(\n    tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    tsk_safe_free(self->indexes.edge_insertion_order);\n    tsk_safe_free(self->indexes.edge_removal_order);\n    self->indexes.edge_insertion_order = NULL;\n    self->indexes.edge_removal_order = NULL;\n    self->indexes.num_edges = 0;\n    return 0;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_build_index(\n    tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = TSK_ERR_GENERIC;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    double *time = self->nodes.time;\n    index_sort_t *sort_buff = NULL;\n    tsk_id_t parent;\n\n    /* For build indexes to make sense we must have referential integrity and\n     * sorted edges */\n    ret_id = tsk_table_collection_check_integrity(self, TSK_CHECK_EDGE_ORDERING);\n    if (ret_id != 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n\n    tsk_table_collection_drop_index(self, 0);\n    self->indexes.edge_insertion_order\n        = tsk_malloc(self->edges.num_rows * sizeof(tsk_id_t));\n    self->indexes.edge_removal_order\n        = tsk_malloc(self->edges.num_rows * sizeof(tsk_id_t));\n    sort_buff = tsk_malloc(self->edges.num_rows * sizeof(index_sort_t));\n    if (self->indexes.edge_insertion_order == NULL\n        || self->indexes.edge_removal_order == NULL || sort_buff == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    /* sort by left and increasing time to give us the order in which\n     * records should be inserted */\n    for (j = 0; j < self->edges.num_rows; j++) {\n        sort_buff[j].index = (tsk_id_t) j;\n        sort_buff[j].first = self->edges.left[j];\n        parent = self->edges.parent[j];\n        sort_buff[j].second = time[parent];\n        sort_buff[j].third = parent;\n        sort_buff[j].fourth = self->edges.child[j];\n    }\n    qsort(\n        sort_buff, (size_t) self->edges.num_rows, sizeof(index_sort_t), cmp_index_sort);\n    for (j = 0; j < self->edges.num_rows; j++) {\n        self->indexes.edge_insertion_order[j] = sort_buff[j].index;\n    }\n    /* sort by right and decreasing parent time to give us the order in which\n     * records should be removed. */\n    for (j = 0; j < self->edges.num_rows; j++) {\n        sort_buff[j].index = (tsk_id_t) j;\n        sort_buff[j].first = self->edges.right[j];\n        parent = self->edges.parent[j];\n        sort_buff[j].second = -time[parent];\n        sort_buff[j].third = -parent;\n        sort_buff[j].fourth = -self->edges.child[j];\n    }\n    qsort(\n        sort_buff, (size_t) self->edges.num_rows, sizeof(index_sort_t), cmp_index_sort);\n    for (j = 0; j < self->edges.num_rows; j++) {\n        self->indexes.edge_removal_order[j] = sort_buff[j].index;\n    }\n    self->indexes.num_edges = self->edges.num_rows;\n    ret = 0;\nout:\n    tsk_safe_free(sort_buff);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_set_file_uuid(tsk_table_collection_t *self, const char *uuid)\n{\n    int ret = 0;\n\n    tsk_safe_free(self->file_uuid);\n    self->file_uuid = NULL;\n\n    if (uuid != NULL) {\n        /* Allow space for \\0 so we can print it as a string */\n        self->file_uuid = tsk_malloc(TSK_UUID_SIZE + 1);\n        if (self->file_uuid == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        tsk_memcpy(self->file_uuid, uuid, TSK_UUID_SIZE);\n        self->file_uuid[TSK_UUID_SIZE] = '\\0';\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_copy(const tsk_table_collection_t *self,\n    tsk_table_collection_t *dest, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_table_collection_init(dest, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_node_table_copy(&self->nodes, &dest->nodes, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_copy(&self->edges, &dest->edges, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_copy(&self->migrations, &dest->migrations, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_copy(&self->sites, &dest->sites, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_copy(&self->mutations, &dest->mutations, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_copy(&self->individuals, &dest->individuals, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_copy(&self->populations, &dest->populations, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_copy(&self->provenances, &dest->provenances, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    dest->sequence_length = self->sequence_length;\n    if (tsk_table_collection_has_index(self, 0)) {\n        ret = tsk_table_collection_set_indexes(\n            dest, self->indexes.edge_insertion_order, self->indexes.edge_removal_order);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_table_collection_set_time_units(\n        dest, self->time_units, self->time_units_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_set_metadata(dest, self->metadata, self->metadata_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_set_metadata_schema(\n        dest, self->metadata_schema, self->metadata_schema_length);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_reference_sequence_copy(\n        &self->reference_sequence, &dest->reference_sequence, options);\n    if (ret != 0) {\n        goto out;\n    }\n    if (options & TSK_COPY_FILE_UUID) {\n        /* The UUID should only be generated on writing to a file (see the call\n         * to generate_uuid in tsk_table_collection_write_format_data) and\n         * no other writing access is supported. We only read the value from\n         * the file, and raise an error if it's the wrong length there. Thus,\n         * finding a UUID value of any other length here is undefined behaviour.\n         */\n        tsk_bug_assert(\n            self->file_uuid == NULL || strlen(self->file_uuid) == TSK_UUID_SIZE);\n        ret = tsk_table_collection_set_file_uuid(dest, self->file_uuid);\n        if (ret != 0) {\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_read_format_data(tsk_table_collection_t *self, kastore_t *store)\n{\n    int ret = 0;\n    size_t len;\n    uint32_t *version = NULL;\n    int8_t *format_name = NULL;\n    int8_t *uuid = NULL;\n    double *L = NULL;\n    char *time_units = NULL;\n    char *metadata = NULL;\n    char *metadata_schema = NULL;\n    size_t time_units_length, metadata_length, metadata_schema_length;\n    /* TODO we could simplify this function quite a bit if we use the\n     * read_table_properties infrastructure. We would need to add the\n     * ability to have non-optional columns to that though. */\n\n    ret = kastore_gets_int8(store, \"format/name\", &format_name, &len);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (len != TSK_FILE_FORMAT_NAME_LENGTH) {\n        ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n        goto out;\n    }\n    if (tsk_memcmp(TSK_FILE_FORMAT_NAME, format_name, TSK_FILE_FORMAT_NAME_LENGTH)\n        != 0) {\n        ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n        goto out;\n    }\n\n    ret = kastore_gets_uint32(store, \"format/version\", &version, &len);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (len != 2) {\n        ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n        goto out;\n    }\n    if (version[0] < TSK_FILE_FORMAT_VERSION_MAJOR) {\n        ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_OLD);\n        goto out;\n    }\n    if (version[0] > TSK_FILE_FORMAT_VERSION_MAJOR) {\n        ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_NEW);\n        goto out;\n    }\n\n    ret = kastore_gets_float64(store, \"sequence_length\", &L, &len);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (len != 1) {\n        ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n        goto out;\n    }\n    if (L[0] <= 0.0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_SEQUENCE_LENGTH);\n        goto out;\n    }\n    self->sequence_length = L[0];\n\n    ret = kastore_gets_int8(store, \"uuid\", &uuid, &len);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (len != TSK_UUID_SIZE) {\n        ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n        goto out;\n    }\n    ret = tsk_table_collection_set_file_uuid(self, (const char *) uuid);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = kastore_containss(store, \"time_units\");\n    if (ret < 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (ret == 1) {\n        ret = kastore_gets_int8(\n            store, \"time_units\", (int8_t **) &time_units, &time_units_length);\n        if (ret != 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        ret = tsk_table_collection_set_time_units(\n            self, time_units, (tsk_size_t) time_units_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = kastore_containss(store, \"metadata\");\n    if (ret < 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (ret == 1) {\n        ret = kastore_gets_int8(\n            store, \"metadata\", (int8_t **) &metadata, &metadata_length);\n        if (ret != 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        ret = tsk_table_collection_takeset_metadata(\n            self, metadata, (tsk_size_t) metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        metadata = NULL;\n    }\n\n    ret = kastore_containss(store, \"metadata_schema\");\n    if (ret < 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n    if (ret == 1) {\n        ret = kastore_gets_int8(store, \"metadata_schema\", (int8_t **) &metadata_schema,\n            (size_t *) &metadata_schema_length);\n        if (ret != 0) {\n            ret = tsk_set_kas_error(ret);\n            goto out;\n        }\n        ret = tsk_table_collection_set_metadata_schema(\n            self, metadata_schema, (tsk_size_t) metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\nout:\n    if ((ret ^ (1 << TSK_KAS_ERR_BIT)) == KAS_ERR_KEY_NOT_FOUND) {\n        ret = tsk_trace_error(TSK_ERR_REQUIRED_COL_NOT_FOUND);\n    }\n    tsk_safe_free(version);\n    tsk_safe_free(format_name);\n    tsk_safe_free(uuid);\n    tsk_safe_free(L);\n    tsk_safe_free(time_units);\n    tsk_safe_free(metadata_schema);\n    tsk_safe_free(metadata);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_dump_indexes(const tsk_table_collection_t *self, kastore_t *store,\n    tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    write_table_col_t cols[] = {\n        { \"indexes/edge_insertion_order\", NULL, self->indexes.num_edges,\n            TSK_ID_STORAGE_TYPE },\n        { \"indexes/edge_removal_order\", NULL, self->indexes.num_edges,\n            TSK_ID_STORAGE_TYPE },\n        { .name = NULL },\n    };\n\n    if (tsk_table_collection_has_index(self, 0)) {\n        cols[0].array = self->indexes.edge_insertion_order;\n        cols[1].array = self->indexes.edge_removal_order;\n        ret = write_table_cols(store, cols, 0);\n    }\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_load_indexes(tsk_table_collection_t *self, kastore_t *store)\n{\n    int ret = 0;\n    tsk_id_t *edge_insertion_order = NULL;\n    tsk_id_t *edge_removal_order = NULL;\n    tsk_size_t num_rows;\n\n    read_table_col_t cols[] = {\n        { \"indexes/edge_insertion_order\", (void **) &edge_insertion_order,\n            TSK_ID_STORAGE_TYPE, TSK_COL_OPTIONAL },\n        { \"indexes/edge_removal_order\", (void **) &edge_removal_order,\n            TSK_ID_STORAGE_TYPE, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    num_rows = TSK_NUM_ROWS_UNSET;\n    ret = read_table_cols(store, &num_rows, cols, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if ((edge_insertion_order == NULL) != (edge_removal_order == NULL)) {\n        ret = tsk_trace_error(TSK_ERR_BOTH_COLUMNS_REQUIRED);\n        goto out;\n    }\n    if (edge_insertion_order != NULL) {\n        if (num_rows != self->edges.num_rows) {\n            ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);\n            goto out;\n        }\n        ret = tsk_table_collection_takeset_indexes(\n            self, edge_insertion_order, edge_removal_order);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    edge_insertion_order = NULL;\n    edge_removal_order = NULL;\nout:\n    tsk_safe_free(edge_insertion_order);\n    tsk_safe_free(edge_removal_order);\n    return ret;\n}\n\nstatic int\ntsk_table_collection_load_reference_sequence(\n    tsk_table_collection_t *self, kastore_t *store)\n{\n    int ret = 0;\n    char *data = NULL;\n    char *url = NULL;\n    char *metadata = NULL;\n    char *metadata_schema = NULL;\n    tsk_size_t data_length = 0, url_length, metadata_length, metadata_schema_length;\n\n    read_table_property_t properties[] = {\n        { \"reference_sequence/data\", (void **) &data, &data_length, KAS_UINT8,\n            TSK_COL_OPTIONAL },\n        { \"reference_sequence/url\", (void **) &url, &url_length, KAS_UINT8,\n            TSK_COL_OPTIONAL },\n        { \"reference_sequence/metadata\", (void **) &metadata, &metadata_length,\n            KAS_UINT8, TSK_COL_OPTIONAL },\n        { \"reference_sequence/metadata_schema\", (void **) &metadata_schema,\n            &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },\n        { .name = NULL },\n    };\n\n    ret = read_table_properties(store, properties, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (data != NULL) {\n        ret = tsk_reference_sequence_takeset_data(\n            &self->reference_sequence, data, (tsk_size_t) data_length);\n        if (ret != 0) {\n            goto out;\n        }\n        data = NULL;\n    }\n    if (metadata != NULL) {\n        ret = tsk_reference_sequence_takeset_metadata(\n            &self->reference_sequence, metadata, (tsk_size_t) metadata_length);\n        if (ret != 0) {\n            goto out;\n        }\n        metadata = NULL;\n    }\n    if (metadata_schema != NULL) {\n        ret = tsk_reference_sequence_set_metadata_schema(&self->reference_sequence,\n            metadata_schema, (tsk_size_t) metadata_schema_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (url != NULL) {\n        ret = tsk_reference_sequence_set_url(\n            &self->reference_sequence, url, (tsk_size_t) url_length);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\nout:\n    free_read_table_mem(NULL, NULL, properties);\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_loadf_inited(\n    tsk_table_collection_t *self, FILE *file, tsk_flags_t options)\n{\n    int ret = 0;\n    kastore_t store;\n\n    int kas_flags = KAS_READ_ALL;\n    if ((options & TSK_LOAD_SKIP_TABLES)\n        || (options & TSK_LOAD_SKIP_REFERENCE_SEQUENCE)) {\n        kas_flags = 0;\n    }\n    kas_flags = kas_flags | KAS_GET_TAKES_OWNERSHIP;\n    ret = kastore_openf(&store, file, \"r\", kas_flags);\n\n    if (ret != 0) {\n        if (ret == KAS_ERR_EOF) {\n            /* KAS_ERR_EOF means that we tried to read a store from the stream\n             * and we hit EOF immediately without reading any bytes. We signal\n             * this back to the client, which allows it to read an indefinite\n             * number of stores from a stream */\n            ret = tsk_trace_error(TSK_ERR_EOF);\n        } else {\n            ret = tsk_set_kas_error(ret);\n        }\n        goto out;\n    }\n    ret = tsk_table_collection_read_format_data(self, &store);\n    if (ret != 0) {\n        goto out;\n    }\n    if (!(options & TSK_LOAD_SKIP_TABLES)) {\n        ret = tsk_node_table_load(&self->nodes, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_edge_table_load(&self->edges, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_site_table_load(&self->sites, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_mutation_table_load(&self->mutations, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_migration_table_load(&self->migrations, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_individual_table_load(&self->individuals, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_population_table_load(&self->populations, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_provenance_table_load(&self->provenances, &store);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_table_collection_load_indexes(self, &store);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        ret = tsk_table_collection_build_index(self, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (!(options & TSK_LOAD_SKIP_REFERENCE_SEQUENCE)) {\n        ret = tsk_table_collection_load_reference_sequence(self, &store);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = kastore_close(&store);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    /* If we're exiting on an error, we ignore any further errors that might come\n     * from kastore. In the nominal case, closing an already-closed store is a\n     * safe noop */\n    kastore_close(&store);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_loadf(tsk_table_collection_t *self, FILE *file, tsk_flags_t options)\n{\n    int ret = 0;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_table_collection_init(self, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    ret = tsk_table_collection_loadf_inited(self, file, options);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_load(\n    tsk_table_collection_t *self, const char *filename, tsk_flags_t options)\n{\n    int ret = 0;\n    FILE *file = NULL;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_table_collection_init(self, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    file = fopen(filename, \"rb\");\n    if (file == NULL) {\n        ret = tsk_trace_error(TSK_ERR_IO);\n        goto out;\n    }\n    ret = tsk_table_collection_loadf_inited(self, file, options);\n    if (ret != 0) {\n        goto out;\n    }\n    if (fclose(file) != 0) {\n        ret = tsk_trace_error(TSK_ERR_IO);\n        goto out;\n    }\n    file = NULL;\nout:\n    if (file != NULL) {\n        /* Ignore any additional errors we might get when closing the file\n         * in error conditions */\n        fclose(file);\n    }\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_table_collection_dump_reference_sequence(const tsk_table_collection_t *self,\n    kastore_t *store, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    const tsk_reference_sequence_t *ref = &self->reference_sequence;\n    write_table_col_t write_cols[] = {\n        { \"reference_sequence/data\", (void *) ref->data, ref->data_length, KAS_UINT8 },\n        { \"reference_sequence/url\", (void *) ref->url, ref->url_length, KAS_UINT8 },\n        { \"reference_sequence/metadata\", (void *) ref->metadata, ref->metadata_length,\n            KAS_UINT8 },\n        { \"reference_sequence/metadata_schema\", (void *) ref->metadata_schema,\n            ref->metadata_schema_length, KAS_UINT8 },\n        { .name = NULL },\n    };\n    if (tsk_table_collection_has_reference_sequence(self)) {\n        ret = write_table_cols(store, write_cols, 0);\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_dump(\n    const tsk_table_collection_t *self, const char *filename, tsk_flags_t options)\n{\n    int ret = 0;\n    FILE *file = fopen(filename, \"wb\");\n\n    if (file == NULL) {\n        ret = tsk_trace_error(TSK_ERR_IO);\n        goto out;\n    }\n    ret = tsk_table_collection_dumpf(self, file, options);\n    if (ret != 0) {\n        goto out;\n    }\n    if (fclose(file) != 0) {\n        ret = tsk_trace_error(TSK_ERR_IO);\n        goto out;\n    }\n    file = NULL;\nout:\n    if (file != NULL) {\n        /* Ignore any additional errors we might get when closing the file\n         * in error conditions */\n        fclose(file);\n        /* If an error occurred make sure that the filename is removed */\n        remove(filename);\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_dumpf(\n    const tsk_table_collection_t *self, FILE *file, tsk_flags_t options)\n{\n    int ret = 0;\n    kastore_t store;\n    char uuid[TSK_UUID_SIZE + 1]; // Must include space for trailing null.\n    write_table_col_t format_columns[] = {\n        { \"format/name\", (const void *) &TSK_FILE_FORMAT_NAME,\n            TSK_FILE_FORMAT_NAME_LENGTH, KAS_INT8 },\n        { \"format/version\",\n            (const void *) &(uint32_t[]) {\n                TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR },\n            2, KAS_UINT32 },\n        { \"sequence_length\", (const void *) &self->sequence_length, 1, KAS_FLOAT64 },\n        { \"uuid\", (void *) uuid, TSK_UUID_SIZE, KAS_INT8 },\n        { \"time_units\", (void *) self->time_units, self->time_units_length, KAS_INT8 },\n        { \"metadata\", (void *) self->metadata, self->metadata_length, KAS_INT8 },\n        { \"metadata_schema\", (void *) self->metadata_schema,\n            self->metadata_schema_length, KAS_INT8 },\n        { .name = NULL },\n    };\n\n    tsk_memset(&store, 0, sizeof(store));\n\n    ret = kastore_openf(&store, file, \"w\", 0);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\n\n    /* Write format data */\n    ret = tsk_generate_uuid(uuid, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = write_table_cols(&store, format_columns, options);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* All of these functions will set the kas_error internally, so we don't have\n     * to modify the return value. */\n    ret = tsk_node_table_dump(&self->nodes, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_dump(&self->edges, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_dump(&self->sites, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_dump(&self->migrations, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_dump(&self->mutations, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_dump(&self->individuals, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_dump(&self->populations, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_dump(&self->provenances, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_dump_indexes(self, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_dump_reference_sequence(self, &store, options);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = kastore_close(&store);\n    if (ret != 0) {\n        ret = tsk_set_kas_error(ret);\n        goto out;\n    }\nout:\n    /* It's safe to close a kastore twice. */\n    if (ret != 0) {\n        kastore_close(&store);\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_simplify(tsk_table_collection_t *self, const tsk_id_t *samples,\n    tsk_size_t num_samples, tsk_flags_t options, tsk_id_t *node_map)\n{\n    int ret = 0;\n    simplifier_t simplifier;\n    tsk_id_t *local_samples = NULL;\n    tsk_id_t u;\n\n    /* Avoid calling to simplifier_free with uninit'd memory on error branches */\n    tsk_memset(&simplifier, 0, sizeof(simplifier_t));\n\n    if ((options & TSK_SIMPLIFY_KEEP_UNARY)\n        && (options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)) {\n        ret = tsk_trace_error(TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE);\n        goto out;\n    }\n\n    /* For now we don't bother with edge metadata, but it can easily be\n     * implemented. */\n    if (self->edges.metadata_length > 0) {\n        ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n        goto out;\n    }\n\n    if (samples == NULL) {\n        local_samples = tsk_malloc(self->nodes.num_rows * sizeof(*local_samples));\n        if (local_samples == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        num_samples = 0;\n        for (u = 0; u < (tsk_id_t) self->nodes.num_rows; u++) {\n            if (!!(self->nodes.flags[u] & TSK_NODE_IS_SAMPLE)) {\n                local_samples[num_samples] = u;\n                num_samples++;\n            }\n        }\n        samples = local_samples;\n    }\n\n    ret = simplifier_init(&simplifier, samples, num_samples, self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = simplifier_run(&simplifier, node_map);\n    if (ret != 0) {\n        goto out;\n    }\n    if (!!(options & TSK_DEBUG)) {\n        simplifier_print_state(&simplifier, tsk_get_debug_stream());\n    }\n    /* The indexes are invalidated now so drop them */\n    ret = tsk_table_collection_drop_index(self, 0);\nout:\n    simplifier_free(&simplifier);\n    tsk_safe_free(local_samples);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_link_ancestors(tsk_table_collection_t *self, tsk_id_t *samples,\n    tsk_size_t num_samples, tsk_id_t *ancestors, tsk_size_t num_ancestors,\n    tsk_flags_t TSK_UNUSED(options), tsk_edge_table_t *result)\n{\n    int ret = 0;\n    ancestor_mapper_t ancestor_mapper;\n\n    tsk_memset(&ancestor_mapper, 0, sizeof(ancestor_mapper_t));\n\n    if (self->edges.metadata_length > 0) {\n        ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n        goto out;\n    }\n\n    ret = ancestor_mapper_init(\n        &ancestor_mapper, samples, num_samples, ancestors, num_ancestors, self, result);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = ancestor_mapper_run(&ancestor_mapper);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    ancestor_mapper_free(&ancestor_mapper);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_ibd_within(const tsk_table_collection_t *self,\n    tsk_identity_segments_t *result, const tsk_id_t *samples, tsk_size_t num_samples,\n    double min_span, double max_time, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_ibd_finder_t ibd_finder;\n\n    ret = tsk_identity_segments_init(result, self->nodes.num_rows, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_span, max_time);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ibd_finder_init_within(&ibd_finder, samples, num_samples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ibd_finder_run(&ibd_finder);\n    if (ret != 0) {\n        goto out;\n    }\n    if (!!(options & TSK_DEBUG)) {\n        tsk_ibd_finder_print_state(&ibd_finder, tsk_get_debug_stream());\n    }\nout:\n    tsk_ibd_finder_free(&ibd_finder);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_ibd_between(const tsk_table_collection_t *self,\n    tsk_identity_segments_t *result, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_span,\n    double max_time, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_ibd_finder_t ibd_finder;\n\n    ret = tsk_identity_segments_init(result, self->nodes.num_rows, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_span, max_time);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ibd_finder_init_between(\n        &ibd_finder, num_sample_sets, sample_set_sizes, sample_sets);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_ibd_finder_run(&ibd_finder);\n    if (ret != 0) {\n        goto out;\n    }\n    if (!!(options & TSK_DEBUG)) {\n        tsk_ibd_finder_print_state(&ibd_finder, tsk_get_debug_stream());\n    }\nout:\n    tsk_ibd_finder_free(&ibd_finder);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_sort(\n    tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_table_sorter_t sorter;\n\n    ret = tsk_table_sorter_init(&sorter, self, options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_sorter_run(&sorter, start);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_table_sorter_free(&sorter);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_canonicalise(tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_id_t k;\n    tsk_id_t *nodes = NULL;\n    tsk_table_sorter_t sorter;\n    tsk_flags_t subset_options = options & TSK_SUBSET_KEEP_UNREFERENCED;\n\n    ret = tsk_table_sorter_init(&sorter, self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    sorter.sort_mutations = tsk_table_sorter_sort_mutations;\n    sorter.sort_individuals = tsk_table_sorter_sort_individuals_canonical;\n\n    nodes = tsk_malloc(self->nodes.num_rows * sizeof(*nodes));\n    if (nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (k = 0; k < (tsk_id_t) self->nodes.num_rows; k++) {\n        nodes[k] = k;\n    }\n    ret = tsk_table_collection_subset(self, nodes, self->nodes.num_rows, subset_options);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_sorter_run(&sorter, NULL);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    tsk_safe_free(nodes);\n    tsk_table_sorter_free(&sorter);\n    return ret;\n}\n\n/*\n * Remove any sites with duplicate positions, retaining only the *first*\n * one. Assumes the tables have been sorted, throwing an error if not.\n */\nint TSK_WARN_UNUSED\ntsk_table_collection_deduplicate_sites(\n    tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t ret_id;\n    tsk_size_t j;\n    /* Map of old site IDs to new site IDs. */\n    tsk_id_t *site_id_map = NULL;\n    tsk_site_table_t copy;\n    tsk_site_t row, last_row;\n\n    /* Early exit if there's 0 rows. We don't exit early for one row because\n     * we would then skip error checking, making the semantics inconsistent. */\n    if (self->sites.num_rows == 0) {\n        return 0;\n    }\n\n    /* Must allocate the site table first for tsk_site_table_free to be safe */\n    ret = tsk_site_table_copy(&self->sites, &copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret_id = tsk_table_collection_check_integrity(self, TSK_CHECK_SITE_ORDERING);\n    if (ret_id != 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n\n    site_id_map = tsk_malloc(copy.num_rows * sizeof(*site_id_map));\n    if (site_id_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_site_table_clear(&self->sites);\n    if (ret != 0) {\n        goto out;\n    }\n\n    last_row.position = -1;\n    site_id_map[0] = 0;\n    for (j = 0; j < copy.num_rows; j++) {\n        tsk_site_table_get_row_unsafe(&copy, (tsk_id_t) j, &row);\n        if (row.position != last_row.position) {\n            ret_id\n                = tsk_site_table_add_row(&self->sites, row.position, row.ancestral_state,\n                    row.ancestral_state_length, row.metadata, row.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n        }\n        site_id_map[j] = (tsk_id_t) self->sites.num_rows - 1;\n        last_row = row;\n    }\n\n    if (self->sites.num_rows < copy.num_rows) {\n        // Remap sites in the mutation table\n        // (but only if there's been any changed sites)\n        for (j = 0; j < self->mutations.num_rows; j++) {\n            self->mutations.site[j] = site_id_map[self->mutations.site[j]];\n        }\n    }\n    ret = 0;\nout:\n    tsk_site_table_free(&copy);\n    tsk_safe_free(site_id_map);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_compute_mutation_parents(\n    tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_mutation_table_t *mutations = &self->mutations;\n    tsk_id_t *parent_backup = NULL;\n    bool restore_parents = false;\n\n    if (!(options & TSK_NO_CHECK_INTEGRITY)) {\n        if (mutations->num_rows > 0) {\n            /* We need to wipe the parent column before computing, as otherwise invalid\n             * parents can cause integrity checks to fail. We take a copy to restore on\n             * error */\n            parent_backup = tsk_malloc(mutations->num_rows * sizeof(*parent_backup));\n            if (parent_backup == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            tsk_memcpy(parent_backup, mutations->parent,\n                mutations->num_rows * sizeof(*parent_backup));\n            /* Set the parent pointers to TSK_NULL */\n            tsk_memset(mutations->parent, 0xff,\n                mutations->num_rows * sizeof(*mutations->parent));\n            restore_parents = true;\n        }\n        /* Safe to cast here as we're not counting trees */\n        ret = (int) tsk_table_collection_check_integrity(self, TSK_CHECK_TREES);\n        if (ret < 0) {\n            goto out;\n        }\n    }\n\n    ret = tsk_table_collection_compute_mutation_parents_to_array(\n        self, self->mutations.parent);\n    if (ret != 0) {\n        goto out;\n    }\n\nout:\n    if (ret != 0 && restore_parents) {\n        tsk_memcpy(mutations->parent, parent_backup,\n            mutations->num_rows * sizeof(*parent_backup));\n    }\n    tsk_safe_free(parent_backup);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_compute_mutation_times(\n    tsk_table_collection_t *self, double *random, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_id_t num_trees;\n    const tsk_id_t *restrict I = self->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->indexes.edge_removal_order;\n    const tsk_edge_table_t edges = self->edges;\n    const tsk_node_table_t nodes = self->nodes;\n    const tsk_site_table_t sites = self->sites;\n    const tsk_mutation_table_t mutations = self->mutations;\n    const tsk_id_t M = (tsk_id_t) edges.num_rows;\n    tsk_id_t tj, tk;\n    tsk_id_t *parent = NULL;\n    double *numerator = NULL;\n    double *denominator = NULL;\n    tsk_id_t u;\n    double left, right, parent_time;\n    tsk_id_t site;\n    /* Using unsigned values here avoids potentially undefined behaviour */\n    tsk_size_t j, mutation, first_mutation;\n    tsk_bookmark_t skip_edges = { 0, 0, self->edges.num_rows, 0, 0, 0, 0, 0 };\n\n    /* The random param is for future usage */\n    if (random != NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n\n    /* First set the times to TSK_UNKNOWN_TIME so that check will succeed */\n    for (j = 0; j < mutations.num_rows; j++) {\n        mutations.time[j] = TSK_UNKNOWN_TIME;\n    }\n    /* TSK_CHECK_MUTATION_PARENTS isn't needed here as we're not using the parents */\n    num_trees = tsk_table_collection_check_integrity(self, TSK_CHECK_TREES);\n    if (num_trees < 0) {\n        ret = (int) num_trees;\n        goto out;\n    }\n    parent = tsk_malloc(nodes.num_rows * sizeof(*parent));\n    numerator = tsk_malloc(nodes.num_rows * sizeof(*numerator));\n    denominator = tsk_malloc(nodes.num_rows * sizeof(*denominator));\n    if (parent == NULL || numerator == NULL || denominator == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, nodes.num_rows * sizeof(*parent));\n    tsk_memset(numerator, 0, nodes.num_rows * sizeof(*numerator));\n    tsk_memset(denominator, 0, nodes.num_rows * sizeof(*denominator));\n\n    tj = 0;\n    tk = 0;\n    site = 0;\n    mutation = 0;\n    left = 0;\n    while (tj < M || left < self->sequence_length) {\n        while (tk < M && edges.right[O[tk]] == left) {\n            parent[edges.child[O[tk]]] = TSK_NULL;\n            tk++;\n        }\n        while (tj < M && edges.left[I[tj]] == left) {\n            parent[edges.child[I[tj]]] = edges.parent[I[tj]];\n            tj++;\n        }\n        right = self->sequence_length;\n        if (tj < M) {\n            right = TSK_MIN(right, edges.left[I[tj]]);\n        }\n        if (tk < M) {\n            right = TSK_MIN(right, edges.right[O[tk]]);\n        }\n\n        /* Tree is now ready. We look at each site on this tree in turn */\n        while (site < (tsk_id_t) sites.num_rows && sites.position[site] < right) {\n            first_mutation = mutation;\n            /* Count how many mutations each edge has to get our\n               denominator */\n            while (mutation < mutations.num_rows && mutations.site[mutation] == site) {\n                denominator[mutations.node[mutation]]++;\n                mutation++;\n            }\n            /* Go over the mutations again assigning times. As the sorting\n               requirements guarantee that parents are before children, we assign\n               oldest first */\n            for (j = first_mutation; j < mutation; j++) {\n                u = mutations.node[j];\n                numerator[u]++;\n                if (parent[u] == TSK_NULL) {\n                    /* This mutation is above a root */\n                    mutations.time[j] = nodes.time[u];\n                } else {\n                    parent_time = nodes.time[parent[u]];\n                    mutations.time[j] = parent_time\n                                        - (parent_time - nodes.time[u]) * numerator[u]\n                                              / (denominator[u] + 1);\n                }\n            }\n            /* Reset the book-keeping for the next site */\n            for (j = first_mutation; j < mutation; j++) {\n                u = mutations.node[j];\n                numerator[u] = 0;\n                denominator[u] = 0;\n            }\n            site++;\n        }\n        /* Move on to the next tree */\n        left = right;\n    }\n\n    /* Now that mutations have times their sort order may have been invalidated, so\n     * re-sort. Safe to cast the result to an int here because we're not counting\n     * trees. */\n    ret = (int) tsk_table_collection_check_integrity(self, TSK_CHECK_MUTATION_ORDERING);\n    if (ret == TSK_ERR_UNSORTED_MUTATIONS) {\n        ret = tsk_table_collection_sort(self, &skip_edges, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    } else if (ret < 0) {\n        goto out;\n    }\n\nout:\n    tsk_safe_free(parent);\n    tsk_safe_free(numerator);\n    tsk_safe_free(denominator);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_delete_older(\n    tsk_table_collection_t *self, double time, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_edge_t edge;\n    tsk_mutation_t mutation;\n    tsk_migration_t migration;\n    tsk_edge_table_t edges;\n    tsk_mutation_table_t mutations;\n    tsk_migration_table_t migrations;\n    const double *restrict node_time = self->nodes.time;\n    tsk_id_t j, ret_id, parent;\n    double mutation_time;\n    tsk_id_t *mutation_map = NULL;\n\n    memset(&edges, 0, sizeof(edges));\n    memset(&mutations, 0, sizeof(mutations));\n    memset(&migrations, 0, sizeof(migrations));\n\n    ret = tsk_edge_table_copy(&self->edges, &edges, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_clear(&self->edges);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < (tsk_id_t) edges.num_rows; j++) {\n        tsk_edge_table_get_row_unsafe(&edges, j, &edge);\n        if (node_time[edge.parent] <= time) {\n            ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,\n                edge.parent, edge.child, edge.metadata, edge.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n        }\n    }\n    /* Calling x_table_free multiple times is safe, so get rid of the\n     * extra edge table memory as soon as we can. */\n    tsk_edge_table_free(&edges);\n\n    mutation_map = tsk_malloc(self->mutations.num_rows * sizeof(*mutation_map));\n    if (mutation_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_mutation_table_copy(&self->mutations, &mutations, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_clear(&self->mutations);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < (tsk_id_t) mutations.num_rows; j++) {\n        tsk_mutation_table_get_row_unsafe(&mutations, j, &mutation);\n        mutation_time = tsk_is_unknown_time(mutation.time) ? node_time[mutation.node]\n                                                           : mutation.time;\n        mutation_map[j] = TSK_NULL;\n        if (mutation_time < time) {\n            ret_id = tsk_mutation_table_add_row(&self->mutations, mutation.site,\n                mutation.node, mutation.parent, mutation.time, mutation.derived_state,\n                mutation.derived_state_length, mutation.metadata,\n                mutation.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            mutation_map[j] = ret_id;\n        }\n    }\n    tsk_mutation_table_free(&mutations);\n    for (j = 0; j < (tsk_id_t) self->mutations.num_rows; j++) {\n        parent = self->mutations.parent[j];\n        if (parent != TSK_NULL) {\n            self->mutations.parent[j] = mutation_map[parent];\n        }\n    }\n\n    ret = tsk_migration_table_copy(&self->migrations, &migrations, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_clear(&self->migrations);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < (tsk_id_t) migrations.num_rows; j++) {\n        tsk_migration_table_get_row_unsafe(&migrations, j, &migration);\n        if (migration.time < time) {\n            ret_id = tsk_migration_table_add_row(&self->migrations, migration.left,\n                migration.right, migration.node, migration.source, migration.dest,\n                migration.time, migration.metadata, migration.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n        }\n    }\n    tsk_migration_table_free(&migrations);\nout:\n    tsk_edge_table_free(&edges);\n    tsk_mutation_table_free(&mutations);\n    tsk_migration_table_free(&migrations);\n    tsk_safe_free(mutation_map);\n    return ret;\n}\n\nint\ntsk_table_collection_record_num_rows(\n    const tsk_table_collection_t *self, tsk_bookmark_t *position)\n{\n    position->individuals = self->individuals.num_rows;\n    position->nodes = self->nodes.num_rows;\n    position->edges = self->edges.num_rows;\n    position->migrations = self->migrations.num_rows;\n    position->sites = self->sites.num_rows;\n    position->mutations = self->mutations.num_rows;\n    position->populations = self->populations.num_rows;\n    position->provenances = self->provenances.num_rows;\n    return 0;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_truncate(tsk_table_collection_t *tables, tsk_bookmark_t *position)\n{\n    int ret = 0;\n\n    ret = tsk_table_collection_drop_index(tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_individual_table_truncate(&tables->individuals, position->individuals);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_node_table_truncate(&tables->nodes, position->nodes);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_edge_table_truncate(&tables->edges, position->edges);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_migration_table_truncate(&tables->migrations, position->migrations);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_site_table_truncate(&tables->sites, position->sites);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_truncate(&tables->mutations, position->mutations);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_population_table_truncate(&tables->populations, position->populations);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_provenance_table_truncate(&tables->provenances, position->provenances);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_clear(tsk_table_collection_t *self, tsk_flags_t options)\n{\n    int ret = 0;\n    bool clear_provenance = !!(options & TSK_CLEAR_PROVENANCE);\n    bool clear_metadata_schemas = !!(options & TSK_CLEAR_METADATA_SCHEMAS);\n    bool clear_ts_metadata = !!(options & TSK_CLEAR_TS_METADATA_AND_SCHEMA);\n    tsk_bookmark_t rows_to_retain\n        = { .provenances = clear_provenance ? 0 : self->provenances.num_rows };\n\n    ret = tsk_table_collection_truncate(self, &rows_to_retain);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (clear_metadata_schemas) {\n        ret = tsk_individual_table_set_metadata_schema(&self->individuals, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_node_table_set_metadata_schema(&self->nodes, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_edge_table_set_metadata_schema(&self->edges, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_migration_table_set_metadata_schema(&self->migrations, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_site_table_set_metadata_schema(&self->sites, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_mutation_table_set_metadata_schema(&self->mutations, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_population_table_set_metadata_schema(&self->populations, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    if (clear_ts_metadata) {\n        ret = tsk_table_collection_set_metadata(self, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_table_collection_set_metadata_schema(self, \"\", 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\nout:\n    return ret;\n}\n\nstatic int\ntsk_table_collection_add_and_remap_node(tsk_table_collection_t *self,\n    const tsk_table_collection_t *other, tsk_id_t node_id, tsk_id_t *individual_map,\n    tsk_id_t *population_map, tsk_id_t *node_map, bool add_populations)\n{\n    int ret = 0;\n    tsk_id_t ret_id, new_ind, new_pop;\n    tsk_node_t node;\n    tsk_individual_t ind;\n    tsk_population_t pop;\n\n    ret = tsk_node_table_get_row(&other->nodes, node_id, &node);\n    if (ret < 0) {\n        goto out;\n    }\n    new_ind = TSK_NULL;\n    if (node.individual != TSK_NULL) {\n        if (individual_map[node.individual] == TSK_NULL) {\n            ret = tsk_individual_table_get_row(\n                &other->individuals, node.individual, &ind);\n            if (ret < 0) {\n                goto out;\n            }\n            ret_id = tsk_individual_table_add_row(&self->individuals, ind.flags,\n                ind.location, ind.location_length, ind.parents, ind.parents_length,\n                ind.metadata, ind.metadata_length);\n            if (ret < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            individual_map[node.individual] = ret_id;\n        }\n        new_ind = individual_map[node.individual];\n    }\n    new_pop = TSK_NULL;\n    if (node.population != TSK_NULL) {\n        // keep same pops if add_populations is False\n        if (!add_populations) {\n            population_map[node.population] = node.population;\n        }\n        if (population_map[node.population] == TSK_NULL) {\n            ret = tsk_population_table_get_row(\n                &other->populations, node.population, &pop);\n            if (ret < 0) {\n                goto out;\n            }\n            ret_id = tsk_population_table_add_row(\n                &self->populations, pop.metadata, pop.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            population_map[node.population] = ret_id;\n        }\n        new_pop = population_map[node.population];\n    }\n    ret_id = tsk_node_table_add_row(&self->nodes, node.flags, node.time, new_pop,\n        new_ind, node.metadata, node.metadata_length);\n    if (ret_id < 0) {\n        ret = (int) ret_id;\n        goto out;\n    }\n    node_map[node.id] = ret_id;\n\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_subset(tsk_table_collection_t *self, const tsk_id_t *nodes,\n    tsk_size_t num_nodes, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_id_t ret_id, j, k, parent_ind, new_parent, new_child, new_node, site_id;\n    tsk_size_t num_parents;\n    tsk_individual_t ind;\n    tsk_edge_t edge;\n    tsk_id_t *node_map = NULL;\n    tsk_id_t *individual_map = NULL;\n    tsk_id_t *population_map = NULL;\n    tsk_id_t *site_map = NULL;\n    tsk_id_t *mutation_map = NULL;\n    tsk_table_collection_t tables;\n    tsk_population_t pop;\n    tsk_site_t site;\n    tsk_mutation_t mut;\n    bool keep_unreferenced = !!(options & TSK_SUBSET_KEEP_UNREFERENCED);\n    bool no_change_populations = !!(options & TSK_SUBSET_NO_CHANGE_POPULATIONS);\n\n    ret = tsk_table_collection_copy(self, &tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    /* Not calling TSK_CHECK_TREES so casting to int is safe */\n    ret = (int) tsk_table_collection_check_integrity(self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_clear(self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    node_map = tsk_malloc(tables.nodes.num_rows * sizeof(*node_map));\n    individual_map = tsk_malloc(tables.individuals.num_rows * sizeof(*individual_map));\n    population_map = tsk_malloc(tables.populations.num_rows * sizeof(*population_map));\n    site_map = tsk_malloc(tables.sites.num_rows * sizeof(*site_map));\n    mutation_map = tsk_malloc(tables.mutations.num_rows * sizeof(*mutation_map));\n    if (node_map == NULL || individual_map == NULL || population_map == NULL\n        || site_map == NULL || mutation_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(node_map, 0xff, tables.nodes.num_rows * sizeof(*node_map));\n    tsk_memset(\n        individual_map, 0xff, tables.individuals.num_rows * sizeof(*individual_map));\n    tsk_memset(\n        population_map, 0xff, tables.populations.num_rows * sizeof(*population_map));\n    tsk_memset(site_map, 0xff, tables.sites.num_rows * sizeof(*site_map));\n    tsk_memset(mutation_map, 0xff, tables.mutations.num_rows * sizeof(*mutation_map));\n\n    if (no_change_populations) {\n        ret = tsk_population_table_copy(\n            &tables.populations, &self->populations, TSK_NO_INIT);\n        if (ret < 0) {\n            goto out;\n        }\n        for (k = 0; k < (tsk_id_t) tables.populations.num_rows; k++) {\n            population_map[k] = k;\n        }\n    }\n\n    // First do individuals so they stay in the same order.\n    // So we can remap individual parents and not rely on sortedness,\n    // we first check who to keep; then build the individual map, and\n    // finally populate the tables.\n    if (keep_unreferenced) {\n        for (k = 0; k < (tsk_id_t) tables.individuals.num_rows; k++) {\n            // put a non-NULL value here; fill in the actual order next\n            individual_map[k] = 0;\n        }\n    } else {\n        for (k = 0; k < (tsk_id_t) num_nodes; k++) {\n            if (nodes[k] < 0 || nodes[k] >= (tsk_id_t) tables.nodes.num_rows) {\n                ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n                goto out;\n            }\n            j = tables.nodes.individual[nodes[k]];\n            if (j != TSK_NULL) {\n                individual_map[j] = 0;\n            }\n        }\n    }\n    j = 0;\n    for (k = 0; k < (tsk_id_t) tables.individuals.num_rows; k++) {\n        if (individual_map[k] != TSK_NULL) {\n            individual_map[k] = j;\n            j++;\n        }\n    }\n    for (k = 0; k < (tsk_id_t) tables.individuals.num_rows; k++) {\n        if (individual_map[k] != TSK_NULL) {\n            tsk_individual_table_get_row_unsafe(&tables.individuals, k, &ind);\n            num_parents = 0;\n            for (j = 0; j < (tsk_id_t) ind.parents_length; j++) {\n                parent_ind = ind.parents[j];\n                new_parent = parent_ind;\n                if (parent_ind != TSK_NULL) {\n                    new_parent = individual_map[parent_ind];\n                }\n                if ((parent_ind == TSK_NULL) || (new_parent != TSK_NULL)) {\n                    /* Beware: this modifies the parents column of tables.individuals\n                     * in-place! But it's OK as we don't use it again. */\n                    ind.parents[num_parents] = new_parent;\n                    num_parents++;\n                }\n            }\n            ret_id = tsk_individual_table_add_row(&self->individuals, ind.flags,\n                ind.location, ind.location_length, ind.parents, num_parents,\n                ind.metadata, ind.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            tsk_bug_assert(individual_map[k] == ret_id);\n        }\n    }\n\n    // Nodes and populations\n    for (k = 0; k < (tsk_id_t) num_nodes; k++) {\n        ret = tsk_table_collection_add_and_remap_node(\n            self, &tables, nodes[k], individual_map, population_map, node_map, true);\n        if (ret < 0) {\n            goto out;\n        }\n    }\n\n    /* TODO: Subset the migrations table. We would need to make sure\n     * that we don't remove populations that are referenced, so it would\n     * need to be done before the next code block. */\n    if (tables.migrations.num_rows != 0) {\n        ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n        goto out;\n    }\n\n    if (keep_unreferenced) {\n        // Keep unused populations\n        for (k = 0; k < (tsk_id_t) tables.populations.num_rows; k++) {\n            if (population_map[k] == TSK_NULL) {\n                tsk_population_table_get_row_unsafe(&tables.populations, k, &pop);\n                ret_id = tsk_population_table_add_row(\n                    &self->populations, pop.metadata, pop.metadata_length);\n                if (ret_id < 0) {\n                    ret = (int) ret_id;\n                    goto out;\n                }\n            }\n        }\n    }\n\n    // Edges\n    for (k = 0; k < (tsk_id_t) tables.edges.num_rows; k++) {\n        tsk_edge_table_get_row_unsafe(&tables.edges, k, &edge);\n        new_parent = node_map[edge.parent];\n        new_child = node_map[edge.child];\n        if ((new_parent != TSK_NULL) && (new_child != TSK_NULL)) {\n            ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,\n                new_parent, new_child, edge.metadata, edge.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n        }\n    }\n\n    // Mutations and sites\n    // Make a first pass through to build the mutation_map so that\n    // mutation parent can be remapped even if the table is not in order.\n    j = 0;\n    for (k = 0; k < (tsk_id_t) tables.mutations.num_rows; k++) {\n        if (node_map[tables.mutations.node[k]] != TSK_NULL) {\n            mutation_map[k] = j;\n            j++;\n            site_id = tables.mutations.site[k];\n            if (site_map[site_id] == TSK_NULL) {\n                // Insert a temporary non-NULL value\n                site_map[site_id] = 1;\n            }\n        }\n    }\n    // Keep retained sites in their original order\n    j = 0;\n    for (k = 0; k < (tsk_id_t) tables.sites.num_rows; k++) {\n        if (keep_unreferenced || site_map[k] != TSK_NULL) {\n            tsk_site_table_get_row_unsafe(&tables.sites, k, &site);\n            ret_id = tsk_site_table_add_row(&self->sites, site.position,\n                site.ancestral_state, site.ancestral_state_length, site.metadata,\n                site.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            site_map[k] = j;\n            j++;\n        }\n    }\n    for (k = 0; k < (tsk_id_t) tables.mutations.num_rows; k++) {\n        tsk_mutation_table_get_row_unsafe(&tables.mutations, k, &mut);\n        new_node = node_map[mut.node];\n        if (new_node != TSK_NULL) {\n            new_parent = TSK_NULL;\n            if (mut.parent != TSK_NULL) {\n                new_parent = mutation_map[mut.parent];\n            }\n            ret_id = tsk_mutation_table_add_row(&self->mutations, site_map[mut.site],\n                new_node, new_parent, mut.time, mut.derived_state,\n                mut.derived_state_length, mut.metadata, mut.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            tsk_bug_assert(mutation_map[mut.id] == ret_id);\n        }\n        if (ret < 0) {\n            goto out;\n        }\n    }\n\n    ret = 0;\nout:\n    tsk_safe_free(node_map);\n    tsk_safe_free(individual_map);\n    tsk_safe_free(population_map);\n    tsk_safe_free(site_map);\n    tsk_safe_free(mutation_map);\n    tsk_table_collection_free(&tables);\n    return ret;\n}\n\nstatic int\ntsk_check_subset_equality(tsk_table_collection_t *self,\n    const tsk_table_collection_t *other, const tsk_id_t *other_node_mapping,\n    tsk_size_t num_shared_nodes)\n{\n    int ret = 0;\n    tsk_id_t k, i;\n    tsk_id_t *self_nodes = NULL;\n    tsk_id_t *other_nodes = NULL;\n    tsk_table_collection_t self_copy;\n    tsk_table_collection_t other_copy;\n\n    tsk_memset(&self_copy, 0, sizeof(self_copy));\n    tsk_memset(&other_copy, 0, sizeof(other_copy));\n    self_nodes = tsk_malloc(num_shared_nodes * sizeof(*self_nodes));\n    other_nodes = tsk_malloc(num_shared_nodes * sizeof(*other_nodes));\n    if (self_nodes == NULL || other_nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    i = 0;\n    for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {\n        if (other_node_mapping[k] != TSK_NULL) {\n            self_nodes[i] = other_node_mapping[k];\n            other_nodes[i] = k;\n            i++;\n        }\n    }\n\n    ret = tsk_table_collection_copy(self, &self_copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_copy(other, &other_copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_subset(&self_copy, self_nodes, num_shared_nodes, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_subset(&other_copy, other_nodes, num_shared_nodes, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_canonicalise(&self_copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_canonicalise(&other_copy, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (!tsk_table_collection_equals(&self_copy, &other_copy,\n            TSK_CMP_IGNORE_TS_METADATA | TSK_CMP_IGNORE_PROVENANCE\n                | TSK_CMP_IGNORE_REFERENCE_SEQUENCE)) {\n        ret = tsk_trace_error(TSK_ERR_UNION_DIFF_HISTORIES);\n        goto out;\n    }\n\nout:\n    tsk_table_collection_free(&self_copy);\n    tsk_table_collection_free(&other_copy);\n    tsk_safe_free(other_nodes);\n    tsk_safe_free(self_nodes);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_table_collection_union(tsk_table_collection_t *self,\n    const tsk_table_collection_t *other, const tsk_id_t *other_node_mapping,\n    tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_id_t ret_id, k, i, new_parent, new_child;\n    tsk_size_t num_shared_nodes = 0;\n    tsk_size_t num_individuals_self = self->individuals.num_rows;\n    tsk_edge_t edge;\n    tsk_mutation_t mut;\n    tsk_site_t site;\n    tsk_id_t *node_map = NULL;\n    tsk_id_t *individual_map = NULL;\n    tsk_id_t *population_map = NULL;\n    tsk_id_t *site_map = NULL;\n    bool add_populations = !(options & TSK_UNION_NO_ADD_POP);\n    bool check_shared_portion = !(options & TSK_UNION_NO_CHECK_SHARED);\n    bool all_edges = !!(options & TSK_UNION_ALL_EDGES);\n    bool all_mutations = !!(options & TSK_UNION_ALL_MUTATIONS);\n\n    /* Not calling TSK_CHECK_TREES so casting to int is safe */\n    ret = (int) tsk_table_collection_check_integrity(self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = (int) tsk_table_collection_check_integrity(other, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {\n        if (other_node_mapping[k] >= (tsk_id_t) self->nodes.num_rows\n            || other_node_mapping[k] < TSK_NULL) {\n            ret = tsk_trace_error(TSK_ERR_UNION_BAD_MAP);\n            goto out;\n        }\n        if (other_node_mapping[k] != TSK_NULL) {\n            num_shared_nodes++;\n        }\n    }\n\n    if (check_shared_portion) {\n        ret = tsk_check_subset_equality(\n            self, other, other_node_mapping, num_shared_nodes);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    // Maps relating the IDs in other to the new IDs in self.\n    node_map = tsk_malloc(other->nodes.num_rows * sizeof(*node_map));\n    individual_map = tsk_malloc(other->individuals.num_rows * sizeof(*individual_map));\n    population_map = tsk_malloc(other->populations.num_rows * sizeof(*population_map));\n    site_map = tsk_malloc(other->sites.num_rows * sizeof(*site_map));\n    if (node_map == NULL || individual_map == NULL || population_map == NULL\n        || site_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(node_map, 0xff, other->nodes.num_rows * sizeof(*node_map));\n    tsk_memset(\n        individual_map, 0xff, other->individuals.num_rows * sizeof(*individual_map));\n    tsk_memset(\n        population_map, 0xff, other->populations.num_rows * sizeof(*population_map));\n    tsk_memset(site_map, 0xff, other->sites.num_rows * sizeof(*site_map));\n\n    /* We have to map the individuals who are linked to nodes in the intersection first\n       as otherwise an individual linked to one node in the intersection and one in\n       `other` would be duplicated. We assume that the individual in `self` takes\n       priority.\n     */\n    for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {\n        if (other_node_mapping[k] != TSK_NULL\n            && other->nodes.individual[k] != TSK_NULL) {\n            individual_map[other->nodes.individual[k]]\n                = self->nodes.individual[other_node_mapping[k]];\n        }\n    }\n    // nodes, individuals, populations\n    for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {\n        if (other_node_mapping[k] != TSK_NULL) {\n            node_map[k] = other_node_mapping[k];\n        } else {\n            ret = tsk_table_collection_add_and_remap_node(self, other, k, individual_map,\n                population_map, node_map, add_populations);\n            if (ret < 0) {\n                goto out;\n            }\n        }\n    }\n\n    /* Now we know the full individual map we can remap the parents of the new\n     * individuals*/\n    for (k = (tsk_id_t) self->individuals.parents_offset[num_individuals_self];\n        k < (tsk_id_t) self->individuals.parents_length; k++) {\n        if (self->individuals.parents[k] != TSK_NULL) {\n            self->individuals.parents[k] = individual_map[self->individuals.parents[k]];\n        }\n    }\n\n    // edges\n    for (k = 0; k < (tsk_id_t) other->edges.num_rows; k++) {\n        tsk_edge_table_get_row_unsafe(&other->edges, k, &edge);\n        if (all_edges || (other_node_mapping[edge.parent] == TSK_NULL)\n            || (other_node_mapping[edge.child] == TSK_NULL)) {\n            new_parent = node_map[edge.parent];\n            new_child = node_map[edge.child];\n            ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,\n                new_parent, new_child, edge.metadata, edge.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n        }\n    }\n\n    // sites\n    // first do the \"disjoint\" (all_mutations) case, where we just add all sites;\n    // otherwise we want to just add sites for new mutations\n    if (all_mutations) {\n        for (k = 0; k < (tsk_id_t) other->sites.num_rows; k++) {\n            tsk_site_table_get_row_unsafe(&other->sites, k, &site);\n            ret_id = tsk_site_table_add_row(&self->sites, site.position,\n                site.ancestral_state, site.ancestral_state_length, site.metadata,\n                site.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            site_map[site.id] = ret_id;\n        }\n    }\n\n    // mutations (and maybe sites)\n    i = 0;\n    for (k = 0; k < (tsk_id_t) other->sites.num_rows; k++) {\n        tsk_site_table_get_row_unsafe(&other->sites, k, &site);\n        while ((i < (tsk_id_t) other->mutations.num_rows)\n               && (other->mutations.site[i] == site.id)) {\n            tsk_mutation_table_get_row_unsafe(&other->mutations, i, &mut);\n            if (all_mutations || (other_node_mapping[mut.node] == TSK_NULL)) {\n                if (site_map[site.id] == TSK_NULL) {\n                    ret_id = tsk_site_table_add_row(&self->sites, site.position,\n                        site.ancestral_state, site.ancestral_state_length, site.metadata,\n                        site.metadata_length);\n                    if (ret_id < 0) {\n                        ret = (int) ret_id;\n                        goto out;\n                    }\n                    site_map[site.id] = ret_id;\n                }\n                // the parents will be recomputed later\n                new_parent = TSK_NULL;\n                ret_id = tsk_mutation_table_add_row(&self->mutations, site_map[site.id],\n                    node_map[mut.node], new_parent, mut.time, mut.derived_state,\n                    mut.derived_state_length, mut.metadata, mut.metadata_length);\n                if (ret_id < 0) {\n                    ret = (int) ret_id;\n                    goto out;\n                }\n            }\n            i++;\n        }\n    }\n\n    /* TODO: Union of the Migrations Table. The only hindrance to performing the\n     * union operation on Migrations Tables is that tsk_table_collection_sort\n     * does not sort migrations by time, and instead throws an error. */\n    if (self->migrations.num_rows != 0 || other->migrations.num_rows != 0) {\n        ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n        goto out;\n    }\n\n    // sorting, deduplicating, and computing parents\n    ret = tsk_table_collection_sort(self, 0, 0);\n    if (ret < 0) {\n        goto out;\n    }\n\n    ret = tsk_table_collection_deduplicate_sites(self, 0);\n    if (ret < 0) {\n        goto out;\n    }\n\n    // need to sort again since after deduplicating sites, mutations\n    // may not be sorted by time within sites\n    ret = tsk_table_collection_sort(self, 0, 0);\n    if (ret < 0) {\n        goto out;\n    }\n\n    ret = tsk_table_collection_build_index(self, 0);\n    if (ret < 0) {\n        goto out;\n    }\n\n    ret = tsk_table_collection_compute_mutation_parents(self, 0);\n    if (ret < 0) {\n        goto out;\n    }\n\nout:\n    tsk_safe_free(node_map);\n    tsk_safe_free(individual_map);\n    tsk_safe_free(population_map);\n    tsk_safe_free(site_map);\n    return ret;\n}\n\nstatic int\ncmp_edge_cl(const void *a, const void *b)\n{\n    const tsk_edge_t *ia = (const tsk_edge_t *) a;\n    const tsk_edge_t *ib = (const tsk_edge_t *) b;\n    int ret = (ia->parent > ib->parent) - (ia->parent < ib->parent);\n    if (ret == 0) {\n        ret = (ia->child > ib->child) - (ia->child < ib->child);\n        if (ret == 0) {\n            ret = (ia->left > ib->left) - (ia->left < ib->left);\n        }\n    }\n    return ret;\n}\n\n/* Squash the edges in the specified array in place. The output edges will\n * be sorted by (child_id, left).\n */\n\nint TSK_WARN_UNUSED\ntsk_squash_edges(tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output_edges)\n{\n    int ret = 0;\n    tsk_size_t j, k, l;\n\n    if (num_edges < 2) {\n        *num_output_edges = num_edges;\n        return ret;\n    }\n\n    qsort(edges, (size_t) num_edges, sizeof(tsk_edge_t), cmp_edge_cl);\n    j = 0;\n    l = 0;\n    for (k = 1; k < num_edges; k++) {\n        if (edges[k - 1].metadata_length > 0) {\n            ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);\n            goto out;\n        }\n\n        /* Check for overlapping edges. */\n        if (edges[k - 1].parent == edges[k].parent\n            && edges[k - 1].child == edges[k].child\n            && edges[k - 1].right > edges[k].left) {\n            ret = tsk_trace_error(TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);\n            goto out;\n        }\n\n        /* Add squashed edge. */\n        if (edges[k - 1].parent != edges[k].parent || edges[k - 1].right != edges[k].left\n            || edges[j].child != edges[k].child) {\n\n            edges[l].left = edges[j].left;\n            edges[l].right = edges[k - 1].right;\n            edges[l].parent = edges[j].parent;\n            edges[l].child = edges[j].child;\n\n            j = k;\n            l++;\n        }\n    }\n    edges[l].left = edges[j].left;\n    edges[l].right = edges[k - 1].right;\n    edges[l].parent = edges[j].parent;\n    edges[l].child = edges[j].child;\n\n    *num_output_edges = (tsk_size_t) l + 1;\n\nout:\n    return ret;\n}\n"
  },
  {
    "path": "c/tskit/tables.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n * Copyright (c) 2017-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/**\n * @file tables.h\n * @brief Tskit Tables API.\n */\n#ifndef TSK_TABLES_H\n#define TSK_TABLES_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <stdio.h>\n#include <stdbool.h>\n#include <stdint.h>\n\n#include <kastore.h>\n\n#include <tskit/core.h>\n\n/****************************************************************************/\n/* Definitions for the basic objects */\n/****************************************************************************/\n\n/**\n@brief A single individual defined by a row in the individual table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\nan individual and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Bitwise flags. */\n    tsk_flags_t flags;\n    /** @brief Spatial location. The number of dimensions is defined by\n     * ``location_length``. */\n    const double *location;\n    /** @brief Number of spatial dimensions. */\n    tsk_size_t location_length;\n    /** @brief IDs of the parents. The number of parents given by ``parents_length``*/\n    tsk_id_t *parents;\n    /** @brief Number of parents. */\n    tsk_size_t parents_length;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Size of the metadata in bytes. */\n    tsk_size_t metadata_length;\n    /** @brief An array of the nodes associated with this individual */\n    const tsk_id_t *nodes;\n    /** @brief The number of nodes associated with this individual*/\n    tsk_size_t nodes_length;\n} tsk_individual_t;\n\n/**\n@brief A single node defined by a row in the node table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\na node and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Bitwise flags. */\n    tsk_flags_t flags;\n    /** @brief Time. */\n    double time;\n    /** @brief Population ID. */\n    tsk_id_t population;\n    /** @brief Individual ID. */\n    tsk_id_t individual;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Size of the metadata in bytes. */\n    tsk_size_t metadata_length;\n} tsk_node_t;\n\n/**\n@brief A single edge defined by a row in the edge table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\nan edge and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Parent node ID. */\n    tsk_id_t parent;\n    /** @brief Child node ID. */\n    tsk_id_t child;\n    /** @brief Left coordinate. */\n    double left;\n    /** @brief Right coordinate. */\n    double right;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Size of the metadata in bytes. */\n    tsk_size_t metadata_length;\n} tsk_edge_t;\n\n/**\n@brief A single mutation defined by a row in the mutation table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\na mutation and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Site ID. */\n    tsk_id_t site;\n    /** @brief Node ID. */\n    tsk_id_t node;\n    /** @brief Parent mutation ID. */\n    tsk_id_t parent;\n    /** @brief Mutation time. */\n    double time;\n    /** @brief Derived state. */\n    const char *derived_state;\n    /** @brief Size of the derived state in bytes. */\n    tsk_size_t derived_state_length;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Size of the metadata in bytes. */\n    tsk_size_t metadata_length;\n    /** @brief The ID of the edge that this mutation lies on, or TSK_NULL\n      if there is no corresponding edge.*/\n    tsk_id_t edge;\n    /** @brief Inherited state. */\n    const char *inherited_state;\n    /** @brief Size of the inherited state in bytes. */\n    tsk_size_t inherited_state_length;\n} tsk_mutation_t;\n\n/**\n@brief A single site defined by a row in the site table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\na site and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Position coordinate. */\n    double position;\n    /** @brief Ancestral state. */\n    const char *ancestral_state;\n    /** @brief Ancestral state length in bytes. */\n    tsk_size_t ancestral_state_length;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Metadata length in bytes. */\n    tsk_size_t metadata_length;\n    /** @brief An array of this site's mutations */\n    const tsk_mutation_t *mutations;\n    /** @brief The number of mutations at this site */\n    tsk_size_t mutations_length;\n} tsk_site_t;\n\n/**\n@brief A single migration defined by a row in the migration table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\na migration and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Source population ID. */\n    tsk_id_t source;\n    /** @brief Destination population ID. */\n    tsk_id_t dest;\n    /** @brief Node ID. */\n    tsk_id_t node;\n    /** @brief Left coordinate. */\n    double left;\n    /** @brief Right coordinate. */\n    double right;\n    /** @brief Time. */\n    double time;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Size of the metadata in bytes. */\n    tsk_size_t metadata_length;\n\n} tsk_migration_t;\n\n/**\n@brief A single population defined by a row in the population table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\na population and its properties.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief Metadata. */\n    const char *metadata;\n    /** @brief Metadata length in bytes. */\n    tsk_size_t metadata_length;\n} tsk_population_t;\n\n/**\n@brief A single provenance defined by a row in the provenance table.\n\n@rst\nSee the :ref:`data model <sec_data_model_definitions>` section for the definition of\na provenance object and its properties. See the :ref:`sec_provenance` section\nfor more information on how provenance records should be structured.\n@endrst\n*/\ntypedef struct {\n    /** @brief Non-negative ID value corresponding to table row. */\n    tsk_id_t id;\n    /** @brief The timestamp. */\n    const char *timestamp;\n    /** @brief The timestamp length in bytes. */\n    tsk_size_t timestamp_length;\n    /** @brief The record. */\n    const char *record;\n    /** @brief The record length in bytes. */\n    tsk_size_t record_length;\n} tsk_provenance_t;\n\n/****************************************************************************/\n/* Table definitions */\n/****************************************************************************/\n\n/**\n@brief The individual table.\n\n@rst\nSee the individual :ref:`table definition <sec_individual_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    /** @brief The total length of the location column. */\n    tsk_size_t location_length;\n    tsk_size_t max_location_length;\n    tsk_size_t max_location_length_increment;\n    /** @brief The total length of the parent column. */\n    tsk_size_t parents_length;\n    tsk_size_t max_parents_length;\n    tsk_size_t max_parents_length_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The flags column. */\n    tsk_flags_t *flags;\n    /** @brief The location column. */\n    double *location;\n    /** @brief The location_offset column. */\n    tsk_size_t *location_offset;\n    /** @brief The parents column. */\n    tsk_id_t *parents;\n    /** @brief The parents_offset column. */\n    tsk_size_t *parents_offset;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n} tsk_individual_table_t;\n\n/**\n@brief The node table.\n\n@rst\nSee the node :ref:`table definition <sec_node_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The flags column. */\n    tsk_flags_t *flags;\n    /** @brief The time column. */\n    double *time;\n    /** @brief The population column. */\n    tsk_id_t *population;\n    /** @brief The individual column. */\n    tsk_id_t *individual;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n} tsk_node_table_t;\n\n/**\n@brief The edge table.\n\n@rst\nSee the edge :ref:`table definition <sec_edge_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The left column. */\n    double *left;\n    /** @brief The right column. */\n    double *right;\n    /** @brief The parent column. */\n    tsk_id_t *parent;\n    /** @brief The child column. */\n    tsk_id_t *child;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n    /** @brief Flags for this table */\n    tsk_flags_t options;\n} tsk_edge_table_t;\n\n/**\n@brief The migration table.\n\n@rst\nSee the migration :ref:`table definition <sec_migration_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The source column. */\n    tsk_id_t *source;\n    /** @brief The dest column. */\n    tsk_id_t *dest;\n    /** @brief The node column. */\n    tsk_id_t *node;\n    /** @brief The left column. */\n    double *left;\n    /** @brief The right column. */\n    double *right;\n    /** @brief The time column. */\n    double *time;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n} tsk_migration_table_t;\n\n/**\n@brief The site table.\n\n@rst\nSee the site :ref:`table definition <sec_site_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    tsk_size_t ancestral_state_length;\n    tsk_size_t max_ancestral_state_length;\n    tsk_size_t max_ancestral_state_length_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The position column. */\n    double *position;\n    /** @brief The ancestral_state column. */\n    char *ancestral_state;\n    /** @brief The ancestral_state_offset column. */\n    tsk_size_t *ancestral_state_offset;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n} tsk_site_table_t;\n\n/**\n@brief The mutation table.\n\n@rst\nSee the mutation :ref:`table definition <sec_mutation_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    tsk_size_t derived_state_length;\n    tsk_size_t max_derived_state_length;\n    tsk_size_t max_derived_state_length_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The node column. */\n    tsk_id_t *node;\n    /** @brief The site column. */\n    tsk_id_t *site;\n    /** @brief The parent column. */\n    tsk_id_t *parent;\n    /** @brief The time column. */\n    double *time;\n    /** @brief The derived_state column. */\n    char *derived_state;\n    /** @brief The derived_state_offset column. */\n    tsk_size_t *derived_state_offset;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n} tsk_mutation_table_t;\n\n/**\n@brief The population table.\n\n@rst\nSee the population :ref:`table definition <sec_population_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    /** @brief The total length of the metadata column. */\n    tsk_size_t metadata_length;\n    tsk_size_t max_metadata_length;\n    tsk_size_t max_metadata_length_increment;\n    tsk_size_t metadata_schema_length;\n    /** @brief The metadata column. */\n    char *metadata;\n    /** @brief The metadata_offset column. */\n    tsk_size_t *metadata_offset;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n} tsk_population_table_t;\n\n/**\n@brief The provenance table.\n\n@rst\nSee the provenance :ref:`table definition <sec_provenance_table_definition>` for\ndetails of the columns in this table.\n@endrst\n*/\ntypedef struct {\n    /** @brief The number of rows in this table. */\n    tsk_size_t num_rows;\n    tsk_size_t max_rows;\n    tsk_size_t max_rows_increment;\n    /** @brief The total length of the timestamp column. */\n    tsk_size_t timestamp_length;\n    tsk_size_t max_timestamp_length;\n    tsk_size_t max_timestamp_length_increment;\n    /** @brief The total length of the record column. */\n    tsk_size_t record_length;\n    tsk_size_t max_record_length;\n    tsk_size_t max_record_length_increment;\n    /** @brief The timestamp column. */\n    char *timestamp;\n    /** @brief The timestamp_offset column. */\n    tsk_size_t *timestamp_offset;\n    /** @brief The record column. */\n    char *record;\n    /** @brief The record_offset column. */\n    tsk_size_t *record_offset;\n} tsk_provenance_table_t;\n\ntypedef struct {\n    char *data;\n    tsk_size_t data_length;\n    char *url;\n    tsk_size_t url_length;\n    char *metadata;\n    tsk_size_t metadata_length;\n    char *metadata_schema;\n    tsk_size_t metadata_schema_length;\n} tsk_reference_sequence_t;\n\n/**\n@brief A collection of tables defining the data for a tree sequence.\n*/\ntypedef struct {\n    /** @brief The sequence length defining the tree sequence's coordinate space */\n    double sequence_length;\n    char *file_uuid;\n    /** @brief The units of the time dimension */\n    char *time_units;\n    tsk_size_t time_units_length;\n    /** @brief The tree-sequence metadata */\n    char *metadata;\n    tsk_size_t metadata_length;\n    /** @brief The metadata schema */\n    char *metadata_schema;\n    tsk_size_t metadata_schema_length;\n    tsk_reference_sequence_t reference_sequence;\n    /** @brief The individual table */\n    tsk_individual_table_t individuals;\n    /** @brief The node table */\n    tsk_node_table_t nodes;\n    /** @brief The edge table */\n    tsk_edge_table_t edges;\n    /** @brief The migration table */\n    tsk_migration_table_t migrations;\n    /** @brief The site table */\n    tsk_site_table_t sites;\n    /** @brief The mutation table */\n    tsk_mutation_table_t mutations;\n    /** @brief The population table */\n    tsk_population_table_t populations;\n    /** @brief The provenance table */\n    tsk_provenance_table_t provenances;\n    struct {\n        tsk_id_t *edge_insertion_order;\n        tsk_id_t *edge_removal_order;\n        tsk_size_t num_edges;\n    } indexes;\n} tsk_table_collection_t;\n\n/**\n@brief A bookmark recording the position of all the tables in a table collection.\n*/\ntypedef struct {\n    /** @brief The position in the individual table. */\n    tsk_size_t individuals;\n    /** @brief The position in the node table. */\n    tsk_size_t nodes;\n    /** @brief The position in the edge table. */\n    tsk_size_t edges;\n    /** @brief The position in the migration table. */\n    tsk_size_t migrations;\n    /** @brief The position in the site table. */\n    tsk_size_t sites;\n    /** @brief The position in the mutation table. */\n    tsk_size_t mutations;\n    /** @brief The position in the population table. */\n    tsk_size_t populations;\n    /** @brief The position in the provenance table. */\n    tsk_size_t provenances;\n} tsk_bookmark_t;\n\n/**\n@brief Low-level table sorting method.\n*/\ntypedef struct _tsk_table_sorter_t {\n    /** @brief The input tables that are being sorted. */\n    tsk_table_collection_t *tables;\n    /** @brief The edge sorting function. If set to NULL, edges are not sorted. */\n    int (*sort_edges)(struct _tsk_table_sorter_t *self, tsk_size_t start);\n    /** @brief The mutation sorting function. */\n    int (*sort_mutations)(struct _tsk_table_sorter_t *self);\n    /** @brief The individual sorting function. */\n    int (*sort_individuals)(struct _tsk_table_sorter_t *self);\n    /** @brief An opaque pointer for use by client code */\n    void *user_data;\n    /** @brief Mapping from input site IDs to output site IDs */\n    tsk_id_t *site_id_map;\n} tsk_table_sorter_t;\n\n/* Structs for IBD finding.\n * TODO: document properly\n * */\n\n/* Note for tskit developers: it's perhaps a bit confusing/pointless to\n * have the tsk_identity_segment_t struct as well as the internal tsk_segment_t\n * struct (which is identical). However, we may want to implement either\n * segment type differently in future, and since the tsk_identity_segment_t\n * is part of the public API we want to allow the freedom for the different\n * structures to evolve over time */\ntypedef struct _tsk_identity_segment_t {\n    double left;\n    double right;\n    struct _tsk_identity_segment_t *next;\n    tsk_id_t node;\n} tsk_identity_segment_t;\n\ntypedef struct {\n    tsk_size_t num_segments;\n    double total_span;\n    tsk_identity_segment_t *head;\n    tsk_identity_segment_t *tail;\n} tsk_identity_segment_list_t;\n\ntypedef struct {\n    tsk_size_t num_nodes;\n    tsk_avl_tree_int_t pair_map;\n    tsk_size_t num_segments;\n    double total_span;\n    tsk_blkalloc_t heap;\n    bool store_segments;\n    bool store_pairs;\n} tsk_identity_segments_t;\n\n/* Diff iterator. */\ntypedef struct _tsk_edge_list_node_t {\n    tsk_edge_t edge;\n    struct _tsk_edge_list_node_t *next;\n    struct _tsk_edge_list_node_t *prev;\n} tsk_edge_list_node_t;\n\ntypedef struct {\n    tsk_edge_list_node_t *head;\n    tsk_edge_list_node_t *tail;\n} tsk_edge_list_t;\n\n/****************************************************************************/\n/* Common function options */\n/****************************************************************************/\n\n/**\n@defgroup API_FLAGS_SIMPLIFY_GROUP :c:func:`tsk_table_collection_simplify` and\n:c:func:`tsk_treeseq_simplify` specific flags.\n@{\n*/\n/** Remove sites from the output if there are no mutations that reference them.*/\n#define TSK_SIMPLIFY_FILTER_SITES (1 << 0)\n/** Remove populations from the output if there are no nodes or migrations that\nreference them. */\n#define TSK_SIMPLIFY_FILTER_POPULATIONS (1 << 1)\n/** Remove individuals from the output if there are no nodes that reference them.*/\n#define TSK_SIMPLIFY_FILTER_INDIVIDUALS (1 << 2)\n/** Do not remove nodes from the output if there are no edges that reference\nthem and do not reorder nodes so that the samples are nodes 0 to num_samples - 1.\nNote that this flag is negated compared to other filtering options because\nthe default behaviour is to filter unreferenced nodes and reorder to put samples\nfirst.\n*/\n#define TSK_SIMPLIFY_NO_FILTER_NODES (1 << 7)\n/**\nDo not update the sample status of nodes as a result of simplification.\n*/\n#define TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS (1 << 8)\n/**\nReduce the topological information in the tables to the minimum necessary to\nrepresent the trees that contain sites. If there are zero sites this will\nresult in an zero output edges. When the number of sites is greater than zero,\nevery tree in the output tree sequence will contain at least one site.\nFor a given site, the topology of the tree containing that site will be\nidentical (up to node ID remapping) to the topology of the corresponding tree\nin the input.\n*/\n#define TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY (1 << 3)\n/**\nBy default simplify removes unary nodes (i.e., nodes with exactly one child)\nalong the path from samples to root. If this option is specified such unary\nnodes will be preserved in the output.\n*/\n#define TSK_SIMPLIFY_KEEP_UNARY (1 << 4)\n/**\nBy default simplify removes all topology ancestral the MRCAs of the samples.\nThis option inserts edges from these MRCAs back to the roots of the input\ntrees.\n*/\n#define TSK_SIMPLIFY_KEEP_INPUT_ROOTS (1 << 5)\n/**\n@rst\nThis acts like :c:macro:`TSK_SIMPLIFY_KEEP_UNARY` (and is mutually exclusive with that\nflag). It keeps unary nodes, but only if the unary node is referenced from an individual.\n@endrst\n*/\n#define TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS (1 << 6)\n/** @} */\n\n/**\n@defgroup API_FLAGS_SUBSET_GROUP :c:func:`tsk_table_collection_subset` specific flags.\n@{\n*/\n/**If this flag is provided, the population table will not be changed in any way.*/\n#define TSK_SUBSET_NO_CHANGE_POPULATIONS (1 << 0)\n/**\n@rst\nIf this flag is provided, then unreferenced sites, individuals, and populations\nwill not be removed. If so, the site and individual tables will not be changed,\nand (unless :c:macro:`TSK_SUBSET_NO_CHANGE_POPULATIONS` is also provided) unreferenced\npopulations will be placed last, in their original order.\n@endrst\n*/\n#define TSK_SUBSET_KEEP_UNREFERENCED (1 << 1)\n/** @} */\n\n/**\n@defgroup API_FLAGS_CHECK_INTEGRITY_GROUP :c:func:`tsk_table_collection_check_integrity`\nspecific flags.\n@{\n*/\n/** Check edge ordering constraints for a tree sequence. */\n#define TSK_CHECK_EDGE_ORDERING (1 << 0)\n/** Check that sites are in non-decreasing position order. */\n#define TSK_CHECK_SITE_ORDERING (1 << 1)\n/**Check for any duplicate site positions. */\n#define TSK_CHECK_SITE_DUPLICATES (1 << 2)\n/**\nCheck constraints on the ordering of mutations. Any non-null\nmutation parents and known times are checked for ordering\nconstraints.\n*/\n#define TSK_CHECK_MUTATION_ORDERING (1 << 3)\n/**Check individual parents are before children, where specified. */\n#define TSK_CHECK_INDIVIDUAL_ORDERING (1 << 4)\n/**Check migrations are ordered by time. */\n#define TSK_CHECK_MIGRATION_ORDERING (1 << 5)\n/**Check that the table indexes exist, and contain valid edge references. */\n#define TSK_CHECK_INDEXES (1 << 6)\n/**\nAll checks needed to define a valid tree sequence. Note that\nthis implies all of the above checks.\n*/\n#define TSK_CHECK_TREES (1 << 7)\n/**\nCheck mutation parents are consistent with topology.\nImplies TSK_CHECK_TREES.\n*/\n#define TSK_CHECK_MUTATION_PARENTS (1 << 8)\n\n/* Leave room for more positive check flags */\n/**\nDo not check integrity of references to populations. This\ncan be safely combined with the other checks.\n*/\n#define TSK_NO_CHECK_POPULATION_REFS (1 << 12)\n/** @} */\n\n/**\n@defgroup API_FLAGS_LOAD_INIT_GROUP Flags used by load and init methods.\n@{\n*/\n/* These flags are for table collection load or init, or used as\n   flags on table collection or individual tables.\n * As flags are passed though from load to init they share a namespace */\n/** Skip reading tables, and only load top-level information. */\n#define TSK_LOAD_SKIP_TABLES (1 << 0)\n/** Do not load reference sequence. */\n#define TSK_LOAD_SKIP_REFERENCE_SEQUENCE (1 << 1)\n/**\n@rst\nDo not allocate space to store metadata in this table. Operations\nattempting to add non-empty metadata to the table will fail\nwith error TSK_ERR_METADATA_DISABLED.\n@endrst\n*/\n#define TSK_TABLE_NO_METADATA (1 << 2)\n/**\n@rst\nDo not allocate space to store metadata in the edge table. Operations\nattempting to add non-empty metadata to the edge table will fail\nwith error TSK_ERR_METADATA_DISABLED.\n@endrst\n*/\n#define TSK_TC_NO_EDGE_METADATA (1 << 3)\n/** @} */\n\n/* Flags for dump tables */\n/* We may not want to document this flag, but it's useful for testing\n * so we put it high up in the bit space, below the common options */\n#define TSK_DUMP_FORCE_OFFSET_64 (1 << 27)\n\n/**\n@defgroup API_FLAGS_COPY_GROUP Flags used by :c:func:`tsk_table_collection_copy`.\n@{\n*/\n/** Copy the file uuid, by default this is not copied. */\n#define TSK_COPY_FILE_UUID (1 << 0)\n/** @} */\n\n/**\n@defgroup API_FLAGS_UNION_GROUP Flags used by :c:func:`tsk_table_collection_union`.\n@{\n*/\n/**\nBy default, union checks that the portion of shared history between\n``self`` and ``other``, as implied by ``other_node_mapping``, are indeed\nequivalent. It does so by subsetting both ``self`` and ``other`` on the\nequivalent nodes specified in ``other_node_mapping``, and then checking for\nequality of the subsets.\n*/\n#define TSK_UNION_NO_CHECK_SHARED (1 << 0)\n/**\nBy default, all nodes new to ``self`` are assigned new populations. If this\noption is specified, nodes that are added to ``self`` will retain the\npopulation IDs they have in ``other``.\n */\n#define TSK_UNION_NO_ADD_POP (1 << 1)\n/**\nBy default, union only adds edges adjacent to a newly added node;\nthis option adds all edges.\n */\n#define TSK_UNION_ALL_EDGES (1 << 2)\n/**\nBy default, union only adds only mutations on newly added edges, and\nsites for those mutations; this option adds all mutations and all sites.\n */\n#define TSK_UNION_ALL_MUTATIONS (1 << 3)\n/** @} */\n\n/**\n@defgroup API_FLAGS_CMP_GROUP Flags used by :c:func:`tsk_table_collection_equals`.\n@{\n*/\n/**\nDo not include the top-level tree sequence metadata and metadata schemas\nin the comparison.\n*/\n#define TSK_CMP_IGNORE_TS_METADATA (1 << 0)\n/** Do not include the provenance table in comparison. */\n#define TSK_CMP_IGNORE_PROVENANCE (1 << 1)\n/**\n@rst\nDo not include metadata when comparing the table collections.\nThis includes both the top-level tree sequence metadata as well as the\nmetadata for each of the tables (i.e, :c:macro:`TSK_CMP_IGNORE_TS_METADATA` is implied).\nAll metadata schemas are also ignored.\n@endrst\n*/\n#define TSK_CMP_IGNORE_METADATA (1 << 2)\n/**\n@rst\nDo not include the timestamp information when comparing the provenance\ntables. This has no effect if :c:macro:`TSK_CMP_IGNORE_PROVENANCE` is specified.\n@endrst\n*/\n#define TSK_CMP_IGNORE_TIMESTAMPS (1 << 3)\n/**\nDo not include any tables in the comparison, thus comparing only the\ntop-level information of the table collections being compared.\n*/\n#define TSK_CMP_IGNORE_TABLES (1 << 4)\n/** Do not include the reference sequence in the comparison. */\n#define TSK_CMP_IGNORE_REFERENCE_SEQUENCE (1 << 5)\n/** @} */\n\n/**\n@defgroup API_FLAGS_CLEAR_GROUP Flags used by :c:func:`tsk_table_collection_clear`.\n@{\n*/\n/** Additionally clear the table metadata schemas*/\n#define TSK_CLEAR_METADATA_SCHEMAS (1 << 0)\n/** Additionally clear the tree-sequence metadata and schema*/\n#define TSK_CLEAR_TS_METADATA_AND_SCHEMA (1 << 1)\n/** Additionally clear the provenance table*/\n#define TSK_CLEAR_PROVENANCE (1 << 2)\n/** @} */\n\n/* For the edge diff iterator */\n#define TSK_INCLUDE_TERMINAL (1 << 0)\n\n/** @brief Value returned by seeking methods when they have successfully\n    seeked to a non-null tree.\n\n    @ingroup TREE_API_SEEKING_GROUP\n*/\n#define TSK_TREE_OK 1\n\n/****************************************************************************/\n/* Function signatures */\n/****************************************************************************/\n\n/**\n@defgroup INDIVIDUAL_TABLE_API_GROUP Individual table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_individual_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_individual_table_t object.\n@return Always returns 0.\n*/\nint tsk_individual_table_free(tsk_individual_table_t *self);\n\n/**\n@brief Adds a row to this individual table.\n\n@rst\nAdd a new individual with the specified ``flags``, ``location``, ``parents`` and\n``metadata`` to the table. Copies of the ``location``, ``parents`` and ``metadata``\nparameters are taken immediately. See the :ref:`table definition\n<sec_individual_table_definition>` for details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param flags The bitwise flags for the new individual.\n@param location A pointer to a double array representing the spatial location\n    of the new individual. Can be ``NULL`` if ``location_length`` is 0.\n@param location_length The number of dimensions in the locations position.\n    Note this the number of elements in the corresponding double array\n    not the number of bytes.\n@param parents A pointer to a ``tsk_id`` array representing the parents\n    of the new individual. Can be ``NULL`` if ``parents_length`` is 0.\n@param parents_length The number of parents.\n    Note this the number of elements in the corresponding ``tsk_id`` array\n    not the number of bytes.\n@param metadata The metadata to be associated with the new individual. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return the ID of the newly added individual on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags,\n    const double *location, tsk_size_t location_length, const tsk_id_t *parents,\n    tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. Copies of the ``location``, ``parents`` and ``metadata``\nparameters are taken immediately. See the :ref:`table definition\n<sec_individual_table_definition>` for details of the columns in this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param index The row to update.\n@param flags The bitwise flags for the individual.\n@param location A pointer to a double array representing the spatial location\n    of the new individual. Can be ``NULL`` if ``location_length`` is 0.\n@param location_length The number of dimensions in the locations position.\n    Note this the number of elements in the corresponding double array\n    not the number of bytes.\n@param parents A pointer to a ``tsk_id`` array representing the parents\n    of the new individual. Can be ``NULL`` if ``parents_length`` is 0.\n@param parents_length The number of parents.\n    Note this the number of elements in the corresponding ``tsk_id`` array\n    not the number of bytes.\n@param metadata The metadata to be associated with the new individual. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_update_row(tsk_individual_table_t *self, tsk_id_t index,\n    tsk_flags_t flags, const double *location, tsk_size_t location_length,\n    const tsk_id_t *parents, tsk_size_t parents_length, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_individual_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_clear(tsk_individual_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_individual_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_truncate(tsk_individual_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is\nand is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object where rows are to be added.\n@param other A pointer to a tsk_individual_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_extend(tsk_individual_table_t *self,\n    const tsk_individual_table_t *other, tsk_size_t num_rows,\n    const tsk_id_t *row_indexes, tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\nThe values in the ``parents`` column are updated according to this map, so that\nreference integrity within the table is maintained. As a consequence of this,\nthe values in the ``parents`` column for kept rows are bounds-checked and an\nerror raised if they are not valid. Rows that are deleted are not checked for\nparent ID integrity.\n\nIf an attempt is made to delete rows that are referred to by the ``parents``\ncolumn of rows that are retained, an error is raised.\n\nThese error conditions are checked before any alterations to the table are\nmade.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_keep_rows(tsk_individual_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param other A pointer to a tsk_individual_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_individual_table_equals(const tsk_individual_table_t *self,\n    const tsk_individual_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n\nIndexes that are present are also copied to the destination table.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param dest A pointer to a tsk_individual_table_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised individual table. If not, it must be an\nuninitialised individual table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_copy(const tsk_individual_table_t *self,\n    tsk_individual_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified individual struct to reflect the values in the specified row.\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_individual_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_get_row(\n    const tsk_individual_table_t *self, tsk_id_t index, tsk_individual_t *row);\n\n/**\n@brief Set the metadata schema\n\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_set_metadata_schema(tsk_individual_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_individual_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_individual_table_print_state(const tsk_individual_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param flags The array of tsk_flag_t flag values to be copied.\n@param location The array of double location values to be copied.\n@param location_offset The array of tsk_size_t location offset values to be copied.\n@param parents The array of tsk_id_t parent values to be copied.\n@param parents_offset The array of tsk_size_t parent offset values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset,\n    const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata,\n    const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays\n@param flags The array of tsk_flag_t flag values to be copied.\n@param location The array of double location values to be copied.\n@param location_offset The array of tsk_size_t location offset values to be copied.\n@param parents The array of tsk_id_t parent values to be copied.\n@param parents_offset The array of tsk_size_t parent offset values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_append_columns(tsk_individual_table_t *self,\n    tsk_size_t num_rows, const tsk_flags_t *flags, const double *location,\n    const tsk_size_t *location_offset, const tsk_id_t *parents,\n    const tsk_size_t *parents_offset, const char *metadata,\n    const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_set_max_rows_increment(\n    tsk_individual_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_set_max_metadata_length_increment(\n    tsk_individual_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the location column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param max_location_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_set_max_location_length_increment(\n    tsk_individual_table_t *self, tsk_size_t max_location_length_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the parents column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param max_parents_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_individual_table_set_max_parents_length_increment(\n    tsk_individual_table_t *self, tsk_size_t max_parents_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out);\n/**\n@defgroup NODE_TABLE_API_GROUP Node table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_node_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_init(tsk_node_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_node_table_t object.\n@return Always returns 0.\n*/\nint tsk_node_table_free(tsk_node_table_t *self);\n\n/**\n@brief Adds a row to this node table.\n\n@rst\nAdd a new node with the specified ``flags``, ``time``, ``population``,\n``individual`` and ``metadata`` to the table. A copy of the ``metadata`` parameter\nis taken immediately. See the :ref:`table definition <sec_node_table_definition>`\nfor details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param flags The bitwise flags for the new node.\n@param time The time for the new node.\n@param population The population for the new node. Set to TSK_NULL if not\nknown.\n@param individual The individual for the new node. Set to TSK_NULL if not\nknown.\n@param metadata The metadata to be associated with the new node. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return the ID of the newly added node on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_node_table_add_row(tsk_node_table_t *self, tsk_flags_t flags, double time,\n    tsk_id_t population, tsk_id_t individual, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. A copy of the ``metadata`` parameter is taken immediately. See the\n:ref:`table definition <sec_node_table_definition>` for details of the columns\nin this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param index The row to update.\n@param flags The bitwise flags for the node.\n@param time The time for the node.\n@param population The population for the node. Set to TSK_NULL if not known.\n@param individual The individual for the node. Set to TSK_NULL if not known.\n@param metadata The metadata to be associated with the node. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_update_row(tsk_node_table_t *self, tsk_id_t index, tsk_flags_t flags,\n    double time, tsk_id_t population, tsk_id_t individual, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_node_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_clear(tsk_node_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_node_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_truncate(tsk_node_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is\nand is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object where rows are to be added.\n@param other A pointer to a tsk_node_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_keep_rows(tsk_node_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param other A pointer to a tsk_node_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_node_table_equals(\n    const tsk_node_table_t *self, const tsk_node_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the TSK_NO_INIT option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param dest A pointer to a tsk_node_table_t object. If the TSK_NO_INIT option\n    is specified, this must be an initialised node table. If not, it must\n    be an uninitialised node table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_copy(\n    const tsk_node_table_t *self, tsk_node_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified node struct to reflect the values in the specified row.\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_node_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_get_row(\n    const tsk_node_table_t *self, tsk_id_t index, tsk_node_t *row);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n@param self A pointer to a tsk_node_table_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_set_metadata_schema(tsk_node_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_node_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_node_table_print_state(const tsk_node_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param flags The array of tsk_flag_t values to be copied.\n@param time The array of double time values to be copied.\n@param population The array of tsk_id_t population values to be copied.\n@param individual The array of tsk_id_t individual values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_set_columns(tsk_node_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *time, const tsk_id_t *population,\n    const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays\n@param flags The array of tsk_flag_t values to be copied.\n@param time The array of double time values to be copied.\n@param population The array of tsk_id_t population values to be copied.\n@param individual The array of tsk_id_t individual values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_append_columns(tsk_node_table_t *self, tsk_size_t num_rows,\n    const tsk_flags_t *flags, const double *time, const tsk_id_t *population,\n    const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\n\nint tsk_node_table_set_max_rows_increment(\n    tsk_node_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_node_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_node_table_set_max_metadata_length_increment(\n    tsk_node_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_node_table_dump_text(const tsk_node_table_t *self, FILE *out);\n\n/**\n@defgroup EDGE_TABLE_API_GROUP Edge table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n\n**Options**\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_TABLE_NO_METADATA`\n@endrst\n\n@param self A pointer to an uninitialised tsk_edge_table_t object.\n@param options Allocation time options.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_init(tsk_edge_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_edge_table_t object.\n@return Always returns 0.\n*/\nint tsk_edge_table_free(tsk_edge_table_t *self);\n\n/**\n@brief Adds a row to this edge table.\n\n@rst\nAdd a new edge with the specified ``left``, ``right``, ``parent``, ``child`` and\n``metadata`` to the table. See the :ref:`table definition <sec_edge_table_definition>`\nfor details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param left The left coordinate for the new edge.\n@param right The right coordinate for the new edge.\n@param parent The parent node for the new edge.\n@param child The child node for the new edge.\n@param metadata The metadata to be associated with the new edge. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n\n@return Return the ID of the newly added edge on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_edge_table_add_row(tsk_edge_table_t *self, double left, double right,\n    tsk_id_t parent, tsk_id_t child, const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. A copy of the ``metadata`` parameter is taken immediately. See the\n:ref:`table definition <sec_edge_table_definition>` for details of the columns\nin this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param index The row to update.\n@param left The left coordinate for the edge.\n@param right The right coordinate for the edge.\n@param parent The parent node for the edge.\n@param child The child node for the edge.\n@param metadata The metadata to be associated with the edge. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_update_row(tsk_edge_table_t *self, tsk_id_t index, double left,\n    double right, tsk_id_t parent, tsk_id_t child, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_edge_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_clear(tsk_edge_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_edge_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_truncate(tsk_edge_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is ``NULL``,\nappend the first ``num_rows`` from ``other`` to this table. Note that metadata is copied\nas-is and is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object where rows are to be added.\n@param other A pointer to a tsk_edge_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_extend(tsk_edge_table_t *self, const tsk_edge_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_keep_rows(tsk_edge_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param other A pointer to a tsk_edge_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_edge_table_equals(\n    const tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param dest A pointer to a tsk_edge_table_t object. If the TSK_NO_INIT option\n    is specified, this must be an initialised edge table. If not, it must\n    be an uninitialised edge table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_copy(\n    const tsk_edge_table_t *self, tsk_edge_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified edge struct to reflect the values in the specified row.\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_edge_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_get_row(\n    const tsk_edge_table_t *self, tsk_id_t index, tsk_edge_t *row);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n@param self A pointer to a tsk_edge_table_t object.\n@param metadata_schema A pointer to a char array\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_set_metadata_schema(tsk_edge_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_edge_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_edge_table_print_state(const tsk_edge_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param left The array of double left values to be copied.\n@param right The array of double right values to be copied.\n@param parent The array of tsk_id_t parent values to be copied.\n@param child The array of tsk_id_t child values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_set_columns(tsk_edge_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *parent,\n    const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param left The array of double left values to be copied.\n@param right The array of double right values to be copied.\n@param parent The array of tsk_id_t parent values to be copied.\n@param child The array of tsk_id_t child values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n*/\nint tsk_edge_table_append_columns(tsk_edge_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *parent,\n    const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_set_max_rows_increment(\n    tsk_edge_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_set_max_metadata_length_increment(\n    tsk_edge_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/**\n@brief Squash adjacent edges in-place\n\n@rst\nSorts, then condenses the table into the smallest possible number of rows by\ncombining any adjacent edges. A pair of edges is said to be `adjacent` if\nthey have the same parent and child nodes, and if the left coordinate of\none of the edges is equal to the right coordinate of the other edge.\nThis process is performed in-place so that any set of adjacent edges is\nreplaced by a single edge. The new edge will have the same parent and child\nnode, a left coordinate equal to the smallest left coordinate in the set,\nand a right coordinate equal to the largest right coordinate in the set.\nThe new edge table will be sorted in the canonical order (P, C, L, R).\n\n.. note::\n    Note that this method will fail if any edges have non-empty metadata.\n\n@endrst\n\n@param self A pointer to a tsk_edge_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_edge_table_squash(tsk_edge_table_t *self);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_edge_table_dump_text(const tsk_edge_table_t *self, FILE *out);\n\n/**\n@defgroup MIGRATION_TABLE_API_GROUP Migration table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_migration_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_init(tsk_migration_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_migration_table_t object.\n@return Always returns 0.\n*/\nint tsk_migration_table_free(tsk_migration_table_t *self);\n\n/**\n@brief Adds a row to this migration table.\n\n@rst\nAdd a new migration with the specified ``left``, ``right``, ``node``,\n``source``, ``dest``, ``time`` and ``metadata`` to the table.\nSee the :ref:`table definition <sec_migration_table_definition>`\nfor details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param left The left coordinate for the new migration.\n@param right The right coordinate for the new migration.\n@param node The node ID for the new migration.\n@param source The source population ID for the new migration.\n@param dest The destination population ID for the new migration.\n@param time The time for the new migration.\n@param metadata The metadata to be associated with the new migration. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n\n@return Return the ID of the newly added migration on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_migration_table_add_row(tsk_migration_table_t *self, double left,\n    double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest, double time,\n    const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. A copy of the ``metadata`` parameter is taken immediately. See the\n:ref:`table definition <sec_migration_table_definition>` for details of the columns\nin this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param index The row to update.\n@param left The left coordinate for the migration.\n@param right The right coordinate for the migration.\n@param node The node ID for the migration.\n@param source The source population ID for the migration.\n@param dest The destination population ID for the migration.\n@param time The time for the migration.\n@param metadata The metadata to be associated with the migration. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_update_row(tsk_migration_table_t *self, tsk_id_t index,\n    double left, double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest,\n    double time, const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_migration_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_clear(tsk_migration_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_migration_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_truncate(tsk_migration_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is\nand is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object where rows are to be added.\n@param other A pointer to a tsk_migration_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\n\nint tsk_migration_table_extend(tsk_migration_table_t *self,\n    const tsk_migration_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes,\n    tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_keep_rows(tsk_migration_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param other A pointer to a tsk_migration_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_migration_table_equals(const tsk_migration_table_t *self,\n    const tsk_migration_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param dest A pointer to a tsk_migration_table_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised migration table. If not, it must be an\nuninitialised migration table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_copy(\n    const tsk_migration_table_t *self, tsk_migration_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified migration struct to reflect the values in the specified row.\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_migration_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_get_row(\n    const tsk_migration_table_t *self, tsk_id_t index, tsk_migration_t *row);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n@param self A pointer to a tsk_migration_table_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_set_metadata_schema(tsk_migration_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_migration_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_migration_table_print_state(const tsk_migration_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param left The array of double left values to be copied.\n@param right The array of double right values to be copied.\n@param node The array of tsk_id_t node values to be copied.\n@param source The array of tsk_id_t source values to be copied.\n@param dest The array of tsk_id_t dest values to be copied.\n@param time The array of double time values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_set_columns(tsk_migration_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *node,\n    const tsk_id_t *source, const tsk_id_t *dest, const double *time,\n    const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays\n@param left The array of double left values to be copied.\n@param right The array of double right values to be copied.\n@param node The array of tsk_id_t node values to be copied.\n@param source The array of tsk_id_t source values to be copied.\n@param dest The array of tsk_id_t dest values to be copied.\n@param time The array of double time values to be copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_append_columns(tsk_migration_table_t *self, tsk_size_t num_rows,\n    const double *left, const double *right, const tsk_id_t *node,\n    const tsk_id_t *source, const tsk_id_t *dest, const double *time,\n    const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_set_max_rows_increment(\n    tsk_migration_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_migration_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_migration_table_set_max_metadata_length_increment(\n    tsk_migration_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_migration_table_dump_text(const tsk_migration_table_t *self, FILE *out);\n\n/**\n@defgroup SITE_TABLE_API_GROUP Site table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_site_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_init(tsk_site_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_site_table_t object.\n@return Always returns 0.\n*/\nint tsk_site_table_free(tsk_site_table_t *self);\n\n/**\n@brief Adds a row to this site table.\n\n@rst\nAdd a new site with the specified ``position``, ``ancestral_state``\nand ``metadata`` to the table. Copies of ``ancestral_state`` and ``metadata``\nare immediately taken. See the :ref:`table definition <sec_site_table_definition>`\nfor details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param position The position coordinate for the new site.\n@param ancestral_state The ancestral_state for the new site.\n@param ancestral_state_length The length of the ancestral_state in bytes.\n@param metadata The metadata to be associated with the new site. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return the ID of the newly added site on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_site_table_add_row(tsk_site_table_t *self, double position,\n    const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. Copies of the ``ancestral_state`` and ``metadata`` parameters are taken\nimmediately. See the :ref:`table definition <sec_site_table_definition>` for\ndetails of the columns in this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param index The row to update.\n@param position The position coordinate for the site.\n@param ancestral_state The ancestral_state for the site.\n@param ancestral_state_length The length of the ancestral_state in bytes.\n@param metadata The metadata to be associated with the site. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_update_row(tsk_site_table_t *self, tsk_id_t index, double position,\n    const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_site_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_clear(tsk_site_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_site_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_truncate(tsk_site_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is\nand is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object where rows are to be added.\n@param other A pointer to a tsk_site_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_extend(tsk_site_table_t *self, const tsk_site_table_t *other,\n    tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_keep_rows(tsk_site_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param other A pointer to a tsk_site_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_site_table_equals(\n    const tsk_site_table_t *self, const tsk_site_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param dest A pointer to a tsk_site_table_t object. If the TSK_NO_INIT option\n    is specified, this must be an initialised site table. If not, it must\n    be an uninitialised site table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_copy(\n    const tsk_site_table_t *self, tsk_site_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified site struct to reflect the values in the specified row.\n\nThis function always sets the ``mutations`` and ``mutations_length``\nfields in the parameter :c:struct:`tsk_site_t` to ``NULL`` and ``0`` respectively.\nTo get access to the mutations for a particular site, please use the\ntree sequence method, :c:func:`tsk_treeseq_get_site`.\n\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_site_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_get_row(\n    const tsk_site_table_t *self, tsk_id_t index, tsk_site_t *row);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n@param self A pointer to a tsk_site_table_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_set_metadata_schema(tsk_site_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_site_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_site_table_print_state(const tsk_site_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param position The array of double position values to be copied.\n@param ancestral_state The array of char ancestral state values to be copied.\n@param ancestral_state_offset The array of tsk_size_t ancestral state offset values to be\n        copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_set_columns(tsk_site_table_t *self, tsk_size_t num_rows,\n    const double *position, const char *ancestral_state,\n    const tsk_size_t *ancestral_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param position The array of double position values to be copied.\n@param ancestral_state The array of char ancestral state values to be copied.\n@param ancestral_state_offset The array of tsk_size_t ancestral state offset values to be\n    copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_append_columns(tsk_site_table_t *self, tsk_size_t num_rows,\n    const double *position, const char *ancestral_state,\n    const tsk_size_t *ancestral_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_set_max_rows_increment(\n    tsk_site_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\n\nint tsk_site_table_set_max_metadata_length_increment(\n    tsk_site_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the ancestral_state column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_site_table_t object.\n@param max_ancestral_state_length_increment The number of bytes to pre-allocate, or zero\nfor the default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_site_table_set_max_ancestral_state_length_increment(\n    tsk_site_table_t *self, tsk_size_t max_ancestral_state_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_site_table_dump_text(const tsk_site_table_t *self, FILE *out);\n\n/**\n@defgroup MUTATION_TABLE_API_GROUP Mutation table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_mutation_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_init(tsk_mutation_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_mutation_table_t object.\n@return Always returns 0.\n*/\nint tsk_mutation_table_free(tsk_mutation_table_t *self);\n\n/**\n@brief Adds a row to this mutation table.\n\n@rst\nAdd a new mutation with the specified ``site``, ``parent``, ``derived_state``\nand ``metadata`` to the table. Copies of ``derived_state`` and ``metadata``\nare immediately taken. See the :ref:`table definition <sec_mutation_table_definition>`\nfor details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param site The site ID for the new mutation.\n@param node The ID of the node this mutation occurs over.\n@param parent The ID of the parent mutation.\n@param time The time of the mutation.\n@param derived_state The derived_state for the new mutation.\n@param derived_state_length The length of the derived_state in bytes.\n@param metadata The metadata to be associated with the new mutation. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return the ID of the newly added mutation on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_mutation_table_add_row(tsk_mutation_table_t *self, tsk_id_t site,\n    tsk_id_t node, tsk_id_t parent, double time, const char *derived_state,\n    tsk_size_t derived_state_length, const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. Copies of the ``derived_state`` and ``metadata`` parameters are taken\nimmediately. See the :ref:`table definition <sec_mutation_table_definition>` for\ndetails of the columns in this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param index The row to update.\n@param site The site ID for the mutation.\n@param node The ID of the node this mutation occurs over.\n@param parent The ID of the parent mutation.\n@param time The time of the mutation.\n@param derived_state The derived_state for the mutation.\n@param derived_state_length The length of the derived_state in bytes.\n@param metadata The metadata to be associated with the mutation. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_update_row(tsk_mutation_table_t *self, tsk_id_t index,\n    tsk_id_t site, tsk_id_t node, tsk_id_t parent, double time,\n    const char *derived_state, tsk_size_t derived_state_length, const char *metadata,\n    tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_mutation_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_clear(tsk_mutation_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_truncate(tsk_mutation_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is\nand is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object where rows are to be added.\n@param other A pointer to a tsk_mutation_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_extend(tsk_mutation_table_t *self,\n    const tsk_mutation_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes,\n    tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\nThe values in the ``parent`` column are updated according to this map, so that\nreference integrity within the table is maintained. As a consequence of this,\nthe values in the ``parent`` column for kept rows are bounds-checked and an\nerror raised if they are not valid. Rows that are deleted are not checked for\nparent ID integrity.\n\nIf an attempt is made to delete rows that are referred to by the ``parent``\ncolumn of rows that are retained, an error is raised.\n\nThese error conditions are checked before any alterations to the table are\nmade.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_keep_rows(tsk_mutation_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param other A pointer to a tsk_mutation_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_mutation_table_equals(const tsk_mutation_table_t *self,\n    const tsk_mutation_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param dest A pointer to a tsk_mutation_table_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised mutation table. If not, it must be an\nuninitialised mutation table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_copy(\n    const tsk_mutation_table_t *self, tsk_mutation_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified mutation struct to reflect the values in the specified row.\n\nThis function always sets the ``edge`` field in parameter\n:c:struct:`tsk_mutation_t` to ``TSK_NULL``. To determine the ID of\nthe edge associated with a particular mutation, please use the\ntree sequence method, :c:func:`tsk_treeseq_get_mutation`.\n\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_mutation_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_get_row(\n    const tsk_mutation_table_t *self, tsk_id_t index, tsk_mutation_t *row);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n@param self A pointer to a tsk_mutation_table_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_set_metadata_schema(tsk_mutation_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_mutation_table_print_state(const tsk_mutation_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param site The array of tsk_id_t site values to be copied.\n@param node The array of tsk_id_t node values to be copied.\n@param parent The array of tsk_id_t parent values to be copied.\n@param time The array of double time values to be copied.\n@param derived_state The array of char derived_state values to be copied.\n@param derived_state_offset The array of tsk_size_t derived state offset values to be\ncopied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_set_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,\n    const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,\n    const double *time, const char *derived_state,\n    const tsk_size_t *derived_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param site The array of tsk_id_t site values to be copied.\n@param node The array of tsk_id_t node values to be copied.\n@param parent The array of tsk_id_t parent values to be copied.\n@param time The array of double time values to be copied.\n@param derived_state The array of char derived_state values to be copied.\n@param derived_state_offset The array of tsk_size_t derived state offset values to be\n    copied.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_append_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,\n    const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,\n    const double *time, const char *derived_state,\n    const tsk_size_t *derived_state_offset, const char *metadata,\n    const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_set_max_rows_increment(\n    tsk_mutation_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_set_max_metadata_length_increment(\n    tsk_mutation_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the derived_state column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_mutation_table_t object.\n@param max_derived_state_length_increment The number of bytes to pre-allocate, or zero\nfor the default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_mutation_table_set_max_derived_state_length_increment(\n    tsk_mutation_table_t *self, tsk_size_t max_derived_state_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_mutation_table_dump_text(const tsk_mutation_table_t *self, FILE *out);\n\n/**\n@defgroup POPULATION_TABLE_API_GROUP Population table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_population_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_init(tsk_population_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_population_table_t object.\n@return Always returns 0.\n*/\nint tsk_population_table_free(tsk_population_table_t *self);\n\n/**\n@brief Adds a row to this population table.\n\n@rst\nAdd a new population with the specified ``metadata`` to the table. A copy of the\n``metadata`` is immediately taken. See the :ref:`table definition\n<sec_population_table_definition>` for details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param metadata The metadata to be associated with the new population. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return the ID of the newly added population on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_population_table_add_row(\n    tsk_population_table_t *self, const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. A copy of the ``metadata`` parameter is taken immediately. See the\n:ref:`table definition <sec_population_table_definition>` for details of the\ncolumns in this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param index The row to update.\n@param metadata The metadata to be associated with the population. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.\n@param metadata_length The size of the metadata array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_update_row(tsk_population_table_t *self, tsk_id_t index,\n    const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_population_table_free` to free the table's internal resources. Note that the\nmetadata schema is not cleared.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_clear(tsk_population_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_population_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_truncate(tsk_population_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is\nand is not checked for compatibility with any existing schema on this table.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object where rows are to be added.\n@param other A pointer to a tsk_population_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_extend(tsk_population_table_t *self,\n    const tsk_population_table_t *other, tsk_size_t num_rows,\n    const tsk_id_t *row_indexes, tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_keep_rows(tsk_population_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns,\nand their metadata schemas are byte-wise identical.\n\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n    Do not include metadata in the comparison. Note that as metadata is the\n    only column in the population table, two population tables are considered\n    equal if they have the same number of rows if this flag is specified.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param other A pointer to a tsk_population_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_population_table_equals(const tsk_population_table_t *self,\n    const tsk_population_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param dest A pointer to a tsk_population_table_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised population table. If not, it must be an\nuninitialised population table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_copy(const tsk_population_table_t *self,\n    tsk_population_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified population struct to reflect the values in the specified row.\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_population_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_get_row(\n    const tsk_population_table_t *self, tsk_id_t index, tsk_population_t *row);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table, replacing any existing.\n@endrst\n@param self A pointer to a tsk_population_table_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_set_metadata_schema(tsk_population_table_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_population_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_population_table_print_state(const tsk_population_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_set_columns(tsk_population_table_t *self, tsk_size_t num_rows,\n    const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param metadata The array of char metadata values to be copied.\n@param metadata_offset The array of tsk_size_t metadata offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_append_columns(tsk_population_table_t *self,\n    tsk_size_t num_rows, const char *metadata, const tsk_size_t *metadata_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_set_max_rows_increment(\n    tsk_population_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the metadata column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_population_table_t object.\n@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_population_table_set_max_metadata_length_increment(\n    tsk_population_table_t *self, tsk_size_t max_metadata_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\n\nint tsk_population_table_dump_text(const tsk_population_table_t *self, FILE *out);\n\n/**\n@defgroup PROVENANCE_TABLE_API_GROUP Provenance table API.\n@{\n*/\n\n/**\n@brief Initialises the table by allocating the internal memory.\n\n@rst\nThis must be called before any operations are performed on the table.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n@endrst\n\n@param self A pointer to an uninitialised tsk_provenance_table_t object.\n@param options Allocation time options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_init(tsk_provenance_table_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table.\n\n@param self A pointer to an initialised tsk_provenance_table_t object.\n@return Always returns 0.\n*/\nint tsk_provenance_table_free(tsk_provenance_table_t *self);\n\n/**\n@brief Adds a row to this provenance table.\n\n@rst\nAdd a new provenance with the specified ``timestamp`` and ``record`` to the table.\nCopies of the ``timestamp`` and ``record`` are immediately taken.\nSee the :ref:`table definition <sec_provenance_table_definition>`\nfor details of the columns in this table.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param timestamp The timestamp to be associated with the new provenance. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``timestamp_length`` is 0.\n@param timestamp_length The size of the timestamp array in bytes.\n@param record The record to be associated with the new provenance. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``record_length`` is 0.\n@param record_length The size of the record array in bytes.\n@return Return the ID of the newly added provenance on success,\n    or a negative value on failure.\n*/\ntsk_id_t tsk_provenance_table_add_row(tsk_provenance_table_t *self,\n    const char *timestamp, tsk_size_t timestamp_length, const char *record,\n    tsk_size_t record_length);\n\n/**\n@brief Updates the row at the specified index.\n\n@rst\nRewrite the row at the specified index in this table to use the specified\nvalues. Copies of the ``timestamp`` and ``record`` parameters are taken\nimmediately. See the :ref:`table definition <sec_provenance_table_definition>`\nfor details of the columns in this table.\n\n.. warning::\n    Because of the way that ragged columns are encoded, this method requires a\n    full rewrite of the internal column memory in worst case, and would\n    therefore be inefficient for bulk updates for such columns. However, if the\n    sizes of all ragged column values are unchanged in the updated row, this\n    method is guaranteed to only update the memory for the row in question.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param index The row to update.\n@param timestamp The timestamp to be associated with new provenance. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``timestamp_length`` is 0.\n@param timestamp_length The size of the timestamp array in bytes.\n@param record The record to be associated with the provenance. This\n    is a pointer to arbitrary memory. Can be ``NULL`` if ``record_length`` is 0.\n@param record_length The size of the record array in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_update_row(tsk_provenance_table_t *self, tsk_id_t index,\n    const char *timestamp, tsk_size_t timestamp_length, const char *record,\n    tsk_size_t record_length);\n\n/**\n@brief Clears this table, setting the number of rows to zero.\n\n@rst\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_provenance_table_free` to free the table's internal resources.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_clear(tsk_provenance_table_t *self);\n\n/**\n@brief Truncates this table so that only the first num_rows are retained.\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param num_rows The number of rows to retain in the table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_truncate(tsk_provenance_table_t *self, tsk_size_t num_rows);\n\n/**\n@brief Extends this table by appending rows copied from another table.\n\n@rst\nAppends the rows at the specified indexes from the table ``other`` to the end of this\ntable. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append\nthe first ``num_rows`` from ``other`` to this table.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object where rows are to be added.\n@param other A pointer to a tsk_provenance_table_t object where rows are copied from.\n@param num_rows The number of rows from ``other`` to append to this table.\n@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the\n    first ``num_rows`` of ``other`` are used.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_extend(tsk_provenance_table_t *self,\n    const tsk_provenance_table_t *other, tsk_size_t num_rows,\n    const tsk_id_t *row_indexes, tsk_flags_t options);\n\n/**\n@brief Subset this table by keeping rows according to a boolean mask.\n\n@rst\nDeletes rows from this table and optionally return the mapping from IDs in\nthe current table to the updated table. Rows are kept or deleted according to\nthe specified boolean array ``keep`` such that for each row ``j`` if\n``keep[j]`` is false (zero) the row is deleted, and otherwise the row is\nretained. Thus, ``keep`` must be an array of at least ``num_rows``\n:c:type:`bool` values.\n\nIf the ``id_map`` argument is non-null, this array will be updated to represent\nthe mapping between IDs before and after row deletion. For row ``j``,\n``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or\n:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an\narray of at least ``num_rows`` :c:type:`tsk_id_t` values.\n\n.. warning::\n    C++ users need to be careful to specify the correct type when\n    passing in values for the ``keep`` array,\n    using ``std::vector<tsk_bool_t>`` and not ``std::vector<bool>``,\n    as the latter may not be correct size.\n\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param keep Array of boolean flags describing whether a particular\n    row should be kept or not. Must be at least ``num_rows`` long.\n@param options Bitwise option flags. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@param id_map An array in which to store the mapping between new\n    and old IDs. If NULL, this will be ignored.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_keep_rows(tsk_provenance_table_t *self, const tsk_bool_t *keep,\n    tsk_flags_t options, tsk_id_t *id_map);\n\n/**\n@brief Returns true if the data in the specified table is identical to the data\n       in this table.\n\n@rst\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) tables are\nconsidered equal if they are byte-wise identical in all columns.\n\n- :c:macro:`TSK_CMP_IGNORE_TIMESTAMPS`\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param other A pointer to a tsk_provenance_table_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table is equal to this table.\n*/\nbool tsk_provenance_table_equals(const tsk_provenance_table_t *self,\n    const tsk_provenance_table_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param dest A pointer to a tsk_provenance_table_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised provenance table. If not, it must be an\nuninitialised provenance table.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_copy(const tsk_provenance_table_t *self,\n    tsk_provenance_table_t *dest, tsk_flags_t options);\n\n/**\n@brief Get the row at the specified index.\n\n@rst\nUpdates the specified provenance struct to reflect the values in the specified row.\nPointers to memory within this struct are handled by the table and should **not**\nbe freed by client code. These pointers are guaranteed to be valid until the\nnext operation that modifies the table (e.g., by adding a new row), but not afterwards.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param index The requested table row.\n@param row A pointer to a tsk_provenance_t struct that is updated to reflect the\n    values in the specified row.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_get_row(\n    const tsk_provenance_table_t *self, tsk_id_t index, tsk_provenance_t *row);\n\n/**\n@brief Print out the state of this table to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_provenance_table_print_state(const tsk_provenance_table_t *self, FILE *out);\n\n/**\n@brief Replace this table's data by copying from a set of column arrays\n\n@rst\nClears the data columns of this table and then copies column data from the specified\nset of arrays. The supplied arrays should all contain data on the same number of rows.\nThe metadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param timestamp The array of char timestamp values to be copied.\n@param timestamp_offset The array of tsk_size_t timestamp offset values to be copied.\n@param record The array of char record values to be copied.\n@param record_offset The array of tsk_size_t record offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_set_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,\n    const char *timestamp, const tsk_size_t *timestamp_offset, const char *record,\n    const tsk_size_t *record_offset);\n\n/**\n@brief Extends this table by copying from a set of column arrays\n\n@rst\nCopies column data from the specified set of arrays to create new rows at the end of the\ntable. The supplied arrays should all contain data on the same number of rows. The\nmetadata schema is not affected.\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param num_rows The number of rows to copy from the specifed arrays.\n@param timestamp The array of char timestamp values to be copied.\n@param timestamp_offset The array of tsk_size_t timestamp offset values to be copied.\n@param record The array of char record values to be copied.\n@param record_offset The array of tsk_size_t record offset values to be copied.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_append_columns(tsk_provenance_table_t *self,\n    tsk_size_t num_rows, const char *timestamp, const tsk_size_t *timestamp_offset,\n    const char *record, const tsk_size_t *record_offset);\n\n/**\n@brief Controls the pre-allocation strategy for this table\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param max_rows_increment The number of rows to pre-allocate, or zero for the default\n    doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_set_max_rows_increment(\n    tsk_provenance_table_t *self, tsk_size_t max_rows_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the timestamp column\n\n@rst\nSet a fixed pre-allocation size, or use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param max_timestamp_length_increment The number of bytes to pre-allocate, or zero for\nthe default doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_set_max_timestamp_length_increment(\n    tsk_provenance_table_t *self, tsk_size_t max_timestamp_length_increment);\n\n/**\n@brief Controls the pre-allocation strategy for the record column\n\n@rst\nSet a fixed pre-allocation size, use the default doubling strategy.\nSee :ref:`sec_c_api_memory_allocation_strategy` for details on the default\npre-allocation strategy,\n@endrst\n\n@param self A pointer to a tsk_provenance_table_t object.\n@param max_record_length_increment The number of bytes to pre-allocate, or zero for the\ndefault doubling strategy.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_provenance_table_set_max_record_length_increment(\n    tsk_provenance_table_t *self, tsk_size_t max_record_length_increment);\n\n/** @} */\n\n/* Undocumented methods */\nint tsk_provenance_table_dump_text(const tsk_provenance_table_t *self, FILE *out);\n\n/****************************************************************************/\n/* Table collection .*/\n/****************************************************************************/\n\n/**\n@defgroup TABLE_COLLECTION_API_GROUP Table collection API.\n@{\n*/\n\n/**\n@brief Initialises the table collection by allocating the internal memory\n       and initialising all the constituent tables.\n\n@rst\nThis must be called before any operations are performed on the table\ncollection. See the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n\n**Options**\n\nOptions can be specified by providing bitwise flags:\n\n- :c:macro:`TSK_TC_NO_EDGE_METADATA`\n@endrst\n\n@param self A pointer to an uninitialised tsk_table_collection_t object.\n@param options Allocation time options as above.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_init(tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified table collection.\n\n@param self A pointer to an initialised tsk_table_collection_t object.\n@return Always returns 0.\n*/\nint tsk_table_collection_free(tsk_table_collection_t *self);\n\n/**\n@brief Clears data tables (and optionally provenances and metadata) in\nthis table collection.\n\n@rst\nBy default this operation clears all tables except the provenance table, retaining\ntable metadata schemas and the tree-sequence level metadata and schema.\n\nNo memory is freed as a result of this operation; please use\n:c:func:`tsk_table_collection_free` to free internal resources.\n\n**Options**\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_CLEAR_PROVENANCE`\n- :c:macro:`TSK_CLEAR_METADATA_SCHEMAS`\n- :c:macro:`TSK_CLEAR_TS_METADATA_AND_SCHEMA`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param options Bitwise clearing options.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_clear(tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Returns true if the data in the specified table collection is equal\n    to the data in this table collection.\n\n@rst\n\nReturns true if the two table collections are equal. The indexes are\nnot considered as these are derived from the tables. We also do not\nconsider the ``file_uuid``, since it is a property of the file that set\nof tables is stored in.\n\n**Options**\n\nOptions to control the comparison can be specified by providing one or\nmore of the following bitwise flags. By default (options=0) two table\ncollections are considered equal if all of the tables are byte-wise\nidentical, and the sequence lengths, metadata and metadata schemas\nof the two table collections are identical.\n\n- :c:macro:`TSK_CMP_IGNORE_PROVENANCE`\n- :c:macro:`TSK_CMP_IGNORE_METADATA`\n- :c:macro:`TSK_CMP_IGNORE_TS_METADATA`\n- :c:macro:`TSK_CMP_IGNORE_TIMESTAMPS`\n- :c:macro:`TSK_CMP_IGNORE_TABLES`\n- :c:macro:`TSK_CMP_IGNORE_REFERENCE_SEQUENCE`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param other A pointer to a tsk_table_collection_t object.\n@param options Bitwise comparison options.\n@return Return true if the specified table collection is equal to this table.\n*/\nbool tsk_table_collection_equals(const tsk_table_collection_t *self,\n    const tsk_table_collection_t *other, tsk_flags_t options);\n\n/**\n@brief Copies the state of this table collection into the specified destination.\n\n@rst\nBy default the method initialises the specified destination table collection. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n\n**Options**\n\nOptions can be specified by providing bitwise flags:\n\n:c:macro:`TSK_COPY_FILE_UUID`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param dest A pointer to a tsk_table_collection_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised table collection. If not, it must be an\nuninitialised table collection.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_copy(const tsk_table_collection_t *self,\n    tsk_table_collection_t *dest, tsk_flags_t options);\n\n/**\n@brief Print out the state of this table collection to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_table_collection_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_table_collection_print_state(const tsk_table_collection_t *self, FILE *out);\n\n/**\n@brief Load a table collection from a file path.\n\n@rst\nLoads the data from the specified file into this table collection.\nBy default, the table collection is also initialised.\nThe resources allocated must be freed using\n:c:func:`tsk_table_collection_free` even in error conditions.\n\nIf the :c:macro:`TSK_NO_INIT` option is set, the table collection is\nnot initialised, allowing an already initialised table collection to\nbe overwritten with the data from a file.\n\nIf the file contains multiple table collections, this function will load\nthe first. Please see the :c:func:`tsk_table_collection_loadf` for details\non how to sequentially load table collections from a stream.\n\nIf the :c:macro:`TSK_LOAD_SKIP_TABLES` option is set, only the non-table information from\nthe table collection will be read, leaving all tables with zero rows and no\nmetadata or schema.\nIf the :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE` option is set, the table collection is\nread without loading the reference sequence.\n\n**Options**\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_NO_INIT`\n- :c:macro:`TSK_LOAD_SKIP_TABLES`\n- :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE`\n\n**Examples**\n\n.. code-block:: c\n\n    int ret;\n    tsk_table_collection_t tables;\n    ret = tsk_table_collection_load(&tables, \"data.trees\", 0);\n    if (ret != 0) {\n        fprintf(stderr, \"Load error:%s\\n\", tsk_strerror(ret));\n        exit(EXIT_FAILURE);\n    }\n\n@endrst\n\n@param self A pointer to an uninitialised tsk_table_collection_t object\n    if the TSK_NO_INIT option is not set (default), or an initialised\n    tsk_table_collection_t otherwise.\n@param filename A NULL terminated string containing the filename.\n@param options Bitwise options. See above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_load(\n    tsk_table_collection_t *self, const char *filename, tsk_flags_t options);\n\n/**\n@brief Load a table collection from a stream.\n\n@rst\nLoads a tables definition from the specified file stream to this table\ncollection. By default, the table collection is also initialised.\nThe resources allocated must be freed using\n:c:func:`tsk_table_collection_free` even in error conditions.\n\nIf the :c:macro:`TSK_NO_INIT` option is set, the table collection is\nnot initialised, allowing an already initialised table collection to\nbe overwritten with the data from a file.\n\nThe stream can be an arbitrary file descriptor, for example a network socket.\nNo seek operations are performed.\n\nIf the stream contains multiple table collection definitions, this function\nwill load the next table collection from the stream. If the stream contains no\nmore table collection definitions the error value :c:macro:`TSK_ERR_EOF` will\nbe returned. Note that EOF is only returned in the case where zero bytes are\nread from the stream --- malformed files or other errors will result in\ndifferent error conditions. Please see the\n:ref:`sec_c_api_examples_file_streaming` section for an example of how to\nsequentially load tree sequences from a stream.\n\nPlease note that this streaming behaviour is not supported if the\n:c:macro:`TSK_LOAD_SKIP_TABLES` or :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE` option is\nset. If the :c:macro:`TSK_LOAD_SKIP_TABLES` option is set, only the non-table information\nfrom the table collection will be read, leaving all tables with zero rows and no metadata\nor schema. If the :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE` option is set, the table\ncollection is read without loading the reference sequence. When attempting to read from a\nstream with multiple table collection definitions and either of these two options set,\nthe requested information from the first table collection will be read on the first call\nto :c:func:`tsk_table_collection_loadf`, with subsequent calls leading to errors.\n\n**Options**\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_NO_INIT`\n- :c:macro:`TSK_LOAD_SKIP_TABLES`\n- :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE`\n@endrst\n\n@param self A pointer to an uninitialised tsk_table_collection_t object\n    if the TSK_NO_INIT option is not set (default), or an initialised\n    tsk_table_collection_t otherwise.\n@param file A FILE stream opened in an appropriate mode for reading (e.g.\n    \"r\", \"r+\" or \"w+\") positioned at the beginning of a table collection\n    definition.\n@param options Bitwise options. See above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_loadf(\n    tsk_table_collection_t *self, FILE *file, tsk_flags_t options);\n\n/**\n@brief Write a table collection to file.\n\n@rst\nWrites the data from this table collection to the specified file.\n\nIf an error occurs the file path is deleted, ensuring that only complete\nand well formed files will be written.\n\n**Examples**\n\n.. code-block:: c\n\n    int ret;\n    tsk_table_collection_t tables;\n\n    ret = tsk_table_collection_init(&tables, 0);\n    error_check(ret);\n    tables.sequence_length = 1.0;\n    // Write out the empty tree sequence\n    ret = tsk_table_collection_dump(&tables, \"empty.trees\", 0);\n    error_check(ret);\n\n@endrst\n\n@param self A pointer to an initialised tsk_table_collection_t object.\n@param filename A NULL terminated string containing the filename.\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_dump(\n    const tsk_table_collection_t *self, const char *filename, tsk_flags_t options);\n\n/**\n@brief Write a table collection to a stream.\n\n@rst\nWrites the data from this table collection to the specified FILE stream.\nSemantics are identical to :c:func:`tsk_table_collection_dump`.\n\nPlease see the :ref:`sec_c_api_examples_file_streaming` section for an example\nof how to sequentially dump and load tree sequences from a stream.\n\n@endrst\n\n@param self A pointer to an initialised tsk_table_collection_t object.\n@param file A FILE stream opened in an appropriate mode for writing (e.g.\n    \"w\", \"a\", \"r+\" or \"w+\").\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_dumpf(\n    const tsk_table_collection_t *self, FILE *file, tsk_flags_t options);\n\n/**\n@brief Record the number of rows in each table in the specified tsk_bookmark_t object.\n\n@param self A pointer to an initialised tsk_table_collection_t object.\n@param bookmark A pointer to a tsk_bookmark_t which is updated to contain the number of\n    rows in all tables.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_record_num_rows(\n    const tsk_table_collection_t *self, tsk_bookmark_t *bookmark);\n\n/**\n@brief Truncates the tables in this table collection according to the specified bookmark.\n\n@rst\nTruncate the tables in this collection so that each one has the number\nof rows specified in the parameter :c:type:`tsk_bookmark_t`. Use the\n:c:func:`tsk_table_collection_record_num_rows` function to record the\nnumber rows for each table in a table collection at a particular time.\n@endrst\n\n@param self A pointer to a tsk_individual_table_t object.\n@param bookmark The number of rows to retain in each table.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_truncate(\n    tsk_table_collection_t *self, tsk_bookmark_t *bookmark);\n\n/**\n@brief Sorts the tables in this collection.\n\n@rst\nSome of the tables in a table collection must satisfy specific sortedness requirements\nin order to define a :ref:`valid tree sequence <sec_valid_tree_sequence_requirements>`.\nThis method sorts the ``edge``, ``site``, ``mutation`` and ``individual`` tables such\nthat these requirements are guaranteed to be fulfilled. The ``node``, ``population``\nand ``provenance`` tables do not have any sortedness requirements, and are therefore\nignored by this method.\n\n.. note:: The current implementation **may** sort in such a way that exceeds\n    these requirements, but this behaviour should not be relied upon and later\n    versions may weaken the level of sortedness. However, the method does **guarantee**\n    that the resulting tables describes a valid tree sequence.\n\n.. warning:: Sorting migrations is currently not supported and an error will be raised\n    if a table collection containing a non-empty migration table is specified.\n\nThe specified :c:type:`tsk_bookmark_t` allows us to specify a start position\nfor sorting in each of the tables; rows before this value are assumed to already be\nin sorted order and this information is used to make sorting more efficient.\nPositions in tables that are not sorted (``node``, ``population``\nand ``provenance``) are ignored and can be set to arbitrary values.\n\n.. warning:: The current implementation only supports specifying a start\n    position for the ``edge`` table and in a limited form for the\n    ``site``, ``mutation`` and ``individual`` tables. Specifying a non-zero\n    ``migration``, start position results in an error. The start positions for the\n    ``site``, ``mutation`` and ``individual`` tables can either be 0 or the length of the\n    respective tables, allowing these tables to either be fully sorted, or not sorted at\n    all.\n\nThe table collection will always be unindexed after sort successfully completes.\n\nFor more control over the sorting process, see the :ref:`sec_c_api_low_level_sorting`\nsection.\n\n**Options**\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n:c:macro:`TSK_NO_CHECK_INTEGRITY`\n    Do not run integrity checks using\n    :c:func:`tsk_table_collection_check_integrity` before sorting,\n    potentially leading to a small reduction in execution time. This\n    performance optimisation should not be used unless the calling code can\n    guarantee reference integrity within the table collection. References\n    to rows not in the table or bad offsets will result in undefined\n    behaviour.\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param start The position to begin sorting in each table; all rows less than this\n    position must fulfill the tree sequence sortedness requirements. If this is\n    NULL, sort all rows.\n@param options Sort options.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_sort(\n    tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options);\n\n/**\n@brief Sorts the individual table in this collection.\n\n@rst\nSorts the individual table in place, so that parents come before children,\nand the parent column is remapped as required. Node references to individuals\nare also updated.\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param options Sort options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_individual_topological_sort(\n    tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Puts the tables into canonical form.\n\n@rst\nPut tables into canonical form such that randomly reshuffled tables\nare guaranteed to always be sorted in the same order, and redundant\ninformation is removed. The canonical sorting exceeds the usual\ntree sequence sortedness requirements.\n\n**Options**:\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_SUBSET_KEEP_UNREFERENCED`\n\n@endrst\n\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_canonicalise(tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Simplify the tables to remove redundant information.\n\n@rst\nSimplification transforms the tables to remove redundancy and canonicalise\ntree sequence data. See the :ref:`simplification <sec_simplification>` tutorial for\nmore details.\n\nA mapping from the node IDs in the table before simplification to their equivalent\nvalues after simplification can be obtained via the ``node_map`` argument. If this\nis non NULL, ``node_map[u]`` will contain the new ID for node ``u`` after simplification,\nor :c:macro:`TSK_NULL` if the node has been removed. Thus, ``node_map`` must be an array\nof at least ``self->nodes.num_rows`` :c:type:`tsk_id_t` values.\n\nIf the `TSK_SIMPLIFY_NO_FILTER_NODES` option is specified, the node table will be\nunaltered except for changing the sample status of nodes (but see the\n`TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS` option below) and to update references\nto other tables that may have changed as a result of filtering (see below).\nThe ``node_map`` (if specified) will always be the identity mapping, such that\n``node_map[u] == u`` for all nodes. Note also that the order of the list of\nsamples is not important in this case.\n\nWhen a table is not filtered (i.e., if the `TSK_SIMPLIFY_NO_FILTER_NODES`\noption is provided or the `TSK_SIMPLIFY_FILTER_SITES`,\n`TSK_SIMPLIFY_FILTER_POPULATIONS` or `TSK_SIMPLIFY_FILTER_INDIVIDUALS`\noptions are *not* provided) the corresponding table is modified as\nlittle as possible, and all pointers are guaranteed to remain valid\nafter simplification. The only changes made to an unfiltered table are\nto update any references to tables that may have changed (for example,\nremapping population IDs in the node table if\n`TSK_SIMPLIFY_FILTER_POPULATIONS` was specified) or altering the\nsample status flag of nodes.\n\n.. note:: It is possible for populations and individuals to be filtered\n   even if `TSK_SIMPLIFY_NO_FILTER_NODES` is specified because there\n   may be entirely unreferenced entities in the input tables, which\n   are not affected by whether we filter nodes or not.\n\nBy default, the node sample flags are updated by unsetting the\n:c:macro:`TSK_NODE_IS_SAMPLE` flag for all nodes and subsequently setting it\nfor the nodes provided as input to this function. The\n`TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS` option will prevent this from occuring,\nmaking it the responsibility of calling code to keep track of the ultimate\nsample status of nodes. Using this option in conjunction with\n`TSK_SIMPLIFY_NO_FILTER_NODES` (and without the\n`TSK_SIMPLIFY_FILTER_POPULATIONS` and `TSK_SIMPLIFY_FILTER_INDIVIDUALS`\noptions) guarantees that the node table will not be written to during the\nlifetime of this function.\n\nThe table collection will always be unindexed after simplify successfully completes.\n\n.. note:: Migrations are currently not supported by simplify, and an error will\n    be raised if we attempt call simplify on a table collection with greater\n    than zero migrations. See `<https://github.com/tskit-dev/tskit/issues/20>`_\n\n**Options**:\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_SIMPLIFY_FILTER_SITES`\n- :c:macro:`TSK_SIMPLIFY_FILTER_POPULATIONS`\n- :c:macro:`TSK_SIMPLIFY_FILTER_INDIVIDUALS`\n- :c:macro:`TSK_SIMPLIFY_NO_FILTER_NODES`\n- :c:macro:`TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS`\n- :c:macro:`TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY`\n- :c:macro:`TSK_SIMPLIFY_KEEP_UNARY`\n- :c:macro:`TSK_SIMPLIFY_KEEP_INPUT_ROOTS`\n- :c:macro:`TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param samples Either NULL or an array of num_samples distinct and valid node IDs.\n    If non-null the nodes in this array will be marked as samples in the output.\n    If NULL, the num_samples parameter is ignored and the samples in the output\n    will be the same as the samples in the input. This is equivalent to populating\n    the samples array with all of the sample nodes in the input in increasing\n    order of ID.\n@param num_samples The number of node IDs in the input samples array. Ignored\n    if the samples array is NULL.\n@param options Simplify options; see above for the available bitwise flags.\n    For the default behaviour, a value of 0 should be provided.\n@param node_map If not NULL, this array will be filled to define the mapping\n    between nodes IDs in the table collection before and after simplification.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_simplify(tsk_table_collection_t *self, const tsk_id_t *samples,\n    tsk_size_t num_samples, tsk_flags_t options, tsk_id_t *node_map);\n\n/**\n@brief Subsets and reorders a table collection according to an array of nodes.\n\n@rst\nReduces the table collection to contain only the entries referring to\nthe provided list of nodes, with nodes reordered according to the order\nthey appear in the ``nodes`` argument. Specifically, this subsets and reorders\neach of the tables as follows (but see options, below):\n\n1. Nodes: if in the list of nodes, and in the order provided.\n2. Individuals: if referred to by a retained node.\n3. Populations: if referred to by a retained node, and in the order first seen\n   when traversing the list of retained nodes.\n4. Edges: if both parent and child are retained nodes.\n5. Mutations: if the mutation's node is a retained node.\n6. Sites: if any mutations remain at the site after removing mutations.\n\nRetained individuals, edges, mutations, and sites appear in the same\norder as in the original tables. Note that only the information *directly*\nassociated with the provided nodes is retained - for instance,\nsubsetting to nodes=[A, B] does not retain nodes ancestral to A and B,\nand only retains the individuals A and B are in, and not their parents.\n\nThis function does *not* require the tables to be sorted.\n\n.. note:: Migrations are currently not supported by subset, and an error will\n    be raised if we attempt call subset on a table collection with greater\n    than zero migrations.\n\n**Options**:\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_SUBSET_NO_CHANGE_POPULATIONS`\n- :c:macro:`TSK_SUBSET_KEEP_UNREFERENCED`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param nodes An array of num_nodes valid node IDs.\n@param num_nodes The number of node IDs in the input nodes array.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_subset(tsk_table_collection_t *self, const tsk_id_t *nodes,\n    tsk_size_t num_nodes, tsk_flags_t options);\n\n/**\n@brief Forms the node-wise union of two table collections.\n\n@rst\nExpands this table collection by adding the non-shared portions of another table\ncollection to itself. The ``other_node_mapping`` encodes which nodes in ``other`` are\nequivalent to a node in ``self``. The positions in the ``other_node_mapping`` array\ncorrespond to node ids in ``other``, and the elements encode the equivalent\nnode id in ``self`` or :c:macro:`TSK_NULL` if the node is exclusive to ``other``. Nodes\nthat are exclusive ``other`` are added to ``self``, along with:\n\n1. Individuals which are new to ``self``.\n2. Edges whose parent or child are new to ``self``.\n3. Sites which were not present in ``self``.\n4. Mutations whose nodes are new to ``self``.\n\nBy default, populations of newly added nodes are assumed to be new populations,\nand added to the population table as well.\n\nThe behavior can be changed by the flags ``TSK_UNION_ALL_EDGES`` and\n``TSK_UNION_ALL_MUTATIONS``, which will (respectively) add *all* edges\nor *all* sites and mutations instead.\n\nThis operation will also sort the resulting tables, so the tables may change\neven if nothing new is added, if the original tables were not sorted.\n\n.. note:: Migrations are currently not supported by union, and an error will\n    be raised if we attempt call union on a table collection with migrations.\n\n**Options**:\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_UNION_NO_CHECK_SHARED`\n- :c:macro:`TSK_UNION_NO_ADD_POP`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param other A pointer to a tsk_table_collection_t object.\n@param other_node_mapping An array of node IDs that relate nodes in other to nodes in\nself: the k-th element of other_node_mapping should be the index of the equivalent\nnode in self, or TSK_NULL if the node is not present in self (in which case it\nwill be added to self).\n@param options Union options; see above for the available bitwise flags.\n    For the default behaviour, a value of 0 should be provided.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_union(tsk_table_collection_t *self,\n    const tsk_table_collection_t *other, const tsk_id_t *other_node_mapping,\n    tsk_flags_t options);\n\n/**\n@brief Set the time_units\n@rst\nCopies the time_units string to this table collection, replacing any existing.\n@endrst\n@param self A pointer to a tsk_table_collection_t object.\n@param time_units A pointer to a char array.\n@param time_units_length The size of the time units string in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_set_time_units(\n    tsk_table_collection_t *self, const char *time_units, tsk_size_t time_units_length);\n\n/**\n@brief Set the metadata\n@rst\nCopies the metadata string to this table collection, replacing any existing.\n@endrst\n@param self A pointer to a tsk_table_collection_t object.\n@param metadata A pointer to a char array.\n@param metadata_length The size of the metadata in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_set_metadata(\n    tsk_table_collection_t *self, const char *metadata, tsk_size_t metadata_length);\n\n/**\n@brief Set the metadata schema\n@rst\nCopies the metadata schema string to this table collection, replacing any existing.\n@endrst\n@param self A pointer to a tsk_table_collection_t object.\n@param metadata_schema A pointer to a char array.\n@param metadata_schema_length The size of the metadata schema in bytes.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\n\n/**\n@brief Returns true if this table collection is indexed.\n\n@rst\nThis method returns true if the table collection has an index\nfor the edge table. It guarantees that the index exists, and that\nit is for the same number of edges that are in the edge table. It\ndoes *not* guarantee that the index is valid (i.e., if the rows\nin the edge have been permuted in some way since the index was built).\n\nSee the :ref:`sec_c_api_table_indexes` section for details on the index\nlife-cycle.\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return true if there is an index present for this table collection.\n*/\nbool tsk_table_collection_has_index(\n    const tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Deletes the indexes for this table collection.\n\n@rst\nUnconditionally drop the indexes that may be present for this table collection. It\nis not an error to call this method on an unindexed table collection.\nSee the :ref:`sec_c_api_table_indexes` section for details on the index\nlife-cycle.\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Always returns 0.\n*/\nint tsk_table_collection_drop_index(tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Builds indexes for this table collection.\n\n@rst\nBuilds the tree traversal :ref:`indexes <sec_table_indexes>` for this table\ncollection. Any existing index is first dropped using\n:c:func:`tsk_table_collection_drop_index`. See the\n:ref:`sec_c_api_table_indexes` section for details on the index life-cycle.\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_collection_build_index(tsk_table_collection_t *self, tsk_flags_t options);\n\n/**\n@brief Runs integrity checks on this table collection.\n\n@rst\n\nChecks the integrity of this table collection. The default checks (i.e., with\noptions = 0) guarantee the integrity of memory and entity references within the\ntable collection. All positions along the genome are checked\nto see if they are finite values and within the required bounds. Time values\nare checked to see if they are finite or marked as unknown.\nConsistency of the direction of inheritance is also checked: whether\nparents are more recent than children, mutations are not more recent\nthan their nodes or their mutation parents, etcetera.\n\nTo check if a set of tables fulfills the :ref:`requirements\n<sec_valid_tree_sequence_requirements>` needed for a valid tree sequence, use\nthe :c:macro:`TSK_CHECK_TREES` option. When this method is called with\n:c:macro:`TSK_CHECK_TREES`, the number of trees in the tree sequence is returned. Thus,\nto check for errors client code should verify that the return value is less than zero.\nAll other options will return zero on success and a negative value on failure.\n\nMore fine-grained checks can be achieved using bitwise combinations of the\nother options.\n\n**Options**:\n\nOptions can be specified by providing one or more of the following bitwise\nflags:\n\n- :c:macro:`TSK_CHECK_EDGE_ORDERING`\n- :c:macro:`TSK_CHECK_SITE_ORDERING`\n- :c:macro:`TSK_CHECK_SITE_DUPLICATES`\n- :c:macro:`TSK_CHECK_MUTATION_ORDERING`\n- :c:macro:`TSK_CHECK_INDIVIDUAL_ORDERING`\n- :c:macro:`TSK_CHECK_MIGRATION_ORDERING`\n- :c:macro:`TSK_CHECK_INDEXES`\n- :c:macro:`TSK_CHECK_TREES`\n- :c:macro:`TSK_NO_CHECK_POPULATION_REFS`\n@endrst\n\n@param self A pointer to a tsk_table_collection_t object.\n@param options Bitwise options.\n@return Return a negative error value on if any problems are detected\n   in the tree sequence. If the TSK_CHECK_TREES option is provided,\n   the number of trees in the tree sequence will be returned, on\n   success.\n*/\ntsk_id_t tsk_table_collection_check_integrity(\n    const tsk_table_collection_t *self, tsk_flags_t options);\n\n/** @} */\n\n/* Undocumented methods */\n\n/* Flags for ibd_segments */\n#define TSK_IBD_STORE_PAIRS    (1 << 0)\n#define TSK_IBD_STORE_SEGMENTS (1 << 1)\n\n/* TODO be systematic about where \"result\" should be in the params\n * list, different here and in link_ancestors. */\n/* FIXME the order of num_samples and samples needs to be reversed in within.\n * This should be done as part of documenting, I guess. */\nint tsk_table_collection_ibd_within(const tsk_table_collection_t *self,\n    tsk_identity_segments_t *result, const tsk_id_t *samples, tsk_size_t num_samples,\n    double min_span, double max_time, tsk_flags_t options);\n\nint tsk_table_collection_ibd_between(const tsk_table_collection_t *self,\n    tsk_identity_segments_t *result, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_span,\n    double max_time, tsk_flags_t options);\n\nint tsk_table_collection_link_ancestors(tsk_table_collection_t *self, tsk_id_t *samples,\n    tsk_size_t num_samples, tsk_id_t *ancestors, tsk_size_t num_ancestors,\n    tsk_flags_t options, tsk_edge_table_t *result);\nint tsk_table_collection_deduplicate_sites(\n    tsk_table_collection_t *tables, tsk_flags_t options);\nint tsk_table_collection_compute_mutation_parents(\n    tsk_table_collection_t *self, tsk_flags_t options);\nint tsk_table_collection_compute_mutation_times(\n    tsk_table_collection_t *self, double *random, tsk_flags_t options);\nint tsk_table_collection_delete_older(\n    tsk_table_collection_t *self, double time, tsk_flags_t options);\n\nint tsk_table_collection_set_indexes(tsk_table_collection_t *self,\n    tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order);\n\nint tsk_table_collection_takeset_metadata(\n    tsk_table_collection_t *self, char *metadata, tsk_size_t metadata_length);\nint tsk_table_collection_takeset_indexes(tsk_table_collection_t *self,\n    tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order);\nint tsk_individual_table_takeset_columns(tsk_individual_table_t *self,\n    tsk_size_t num_rows, tsk_flags_t *flags, double *location,\n    tsk_size_t *location_offset, tsk_id_t *parents, tsk_size_t *parents_offset,\n    char *metadata, tsk_size_t *metadata_offset);\nint tsk_node_table_takeset_columns(tsk_node_table_t *self, tsk_size_t num_rows,\n    tsk_flags_t *flags, double *time, tsk_id_t *population, tsk_id_t *individual,\n    char *metadata, tsk_size_t *metadata_offset);\nint tsk_edge_table_takeset_columns(tsk_edge_table_t *self, tsk_size_t num_rows,\n    double *left, double *right, tsk_id_t *parent, tsk_id_t *child, char *metadata,\n    tsk_size_t *metadata_offset);\nint tsk_migration_table_takeset_columns(tsk_migration_table_t *self, tsk_size_t num_rows,\n    double *left, double *right, tsk_id_t *node, tsk_id_t *source, tsk_id_t *dest,\n    double *time, char *metadata, tsk_size_t *metadata_offset);\nint tsk_site_table_takeset_columns(tsk_site_table_t *self, tsk_size_t num_rows,\n    double *position, char *ancestral_state, tsk_size_t *ancestral_state_offset,\n    char *metadata, tsk_size_t *metadata_offset);\nint tsk_mutation_table_takeset_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,\n    tsk_id_t *site, tsk_id_t *node, tsk_id_t *parent, double *time, char *derived_state,\n    tsk_size_t *derived_state_offset, char *metadata, tsk_size_t *metadata_offset);\nint tsk_population_table_takeset_columns(tsk_population_table_t *self,\n    tsk_size_t num_rows, char *metadata, tsk_size_t *metadata_offset);\nint tsk_provenance_table_takeset_columns(tsk_provenance_table_t *self,\n    tsk_size_t num_rows, char *timestamp, tsk_size_t *timestamp_offset, char *record,\n    tsk_size_t *record_offset);\n\nbool tsk_table_collection_has_reference_sequence(const tsk_table_collection_t *self);\n\nint tsk_reference_sequence_init(tsk_reference_sequence_t *self, tsk_flags_t options);\nint tsk_reference_sequence_free(tsk_reference_sequence_t *self);\nbool tsk_reference_sequence_is_null(const tsk_reference_sequence_t *self);\nbool tsk_reference_sequence_equals(const tsk_reference_sequence_t *self,\n    const tsk_reference_sequence_t *other, tsk_flags_t options);\nint tsk_reference_sequence_copy(const tsk_reference_sequence_t *self,\n    tsk_reference_sequence_t *dest, tsk_flags_t options);\nint tsk_reference_sequence_set_data(\n    tsk_reference_sequence_t *self, const char *data, tsk_size_t data_length);\nint tsk_reference_sequence_set_url(\n    tsk_reference_sequence_t *self, const char *url, tsk_size_t url_length);\nint tsk_reference_sequence_set_metadata(\n    tsk_reference_sequence_t *self, const char *metadata, tsk_size_t metadata_length);\nint tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self,\n    const char *metadata_schema, tsk_size_t metadata_schema_length);\nint tsk_reference_sequence_takeset_data(\n    tsk_reference_sequence_t *self, char *data, tsk_size_t data_length);\nint tsk_reference_sequence_takeset_metadata(\n    tsk_reference_sequence_t *self, char *metadata, tsk_size_t metadata_length);\n\n/**\n@defgroup TABLE_SORTER_API_GROUP Low-level table sorter API.\n@{\n*/\n\n/* NOTE: We use the \"struct _tsk_table_sorter_t\" form here\n * rather then the usual tsk_table_sorter_t alias because\n * of problems with Doxygen. This was the only way I could\n * get it to work - ideally, we'd use the usual typedefs\n * to avoid confusing people.\n */\n\n/**\n@brief Initialises the memory for the sorter object.\n\n@rst\nThis must be called before any operations are performed on the\ntable sorter and initialises all fields. The ``edge_sort`` function\nis set to the default method using qsort. The ``user_data``\nfield is set to NULL.\nThis method supports the same options as\n:c:func:`tsk_table_collection_sort`.\n\n@endrst\n\n@param self A pointer to an uninitialised tsk_table_sorter_t object.\n@param tables The table collection to sort.\n@param options Sorting options.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_sorter_init(struct _tsk_table_sorter_t *self,\n    tsk_table_collection_t *tables, tsk_flags_t options);\n\n/**\n@brief Runs the sort using the configured functions.\n\n@rst\nRuns the sorting process:\n\n1. Drop the table indexes.\n2. If the ``sort_edges`` function pointer is not NULL, run it. The\n   first parameter to the called function will be a pointer to this\n   table_sorter_t object. The second parameter will be the value\n   ``start.edges``. This specifies the offset at which sorting should\n   start in the edge table. This offset is guaranteed to be within the\n   bounds of the edge table.\n3. Sort the site table, building the mapping between site IDs in the\n   current and sorted tables.\n4. Sort the mutation table, using the ``sort_mutations`` pointer.\n\nIf an error occurs during the execution of a user-supplied\nsorting function a non-zero value must be returned. This value\nwill then be returned by ``tsk_table_sorter_run``. The error\nreturn value should be chosen to avoid conflicts with tskit error\ncodes.\n\nSee :c:func:`tsk_table_collection_sort` for details on the ``start`` parameter.\n\n@endrst\n\n@param self A pointer to a tsk_table_sorter_t object.\n@param start The position in the tables at which sorting starts.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_table_sorter_run(struct _tsk_table_sorter_t *self, const tsk_bookmark_t *start);\n\n/**\n@brief Free the internal memory for the specified table sorter.\n\n@param self A pointer to an initialised tsk_table_sorter_t object.\n@return Always returns 0.\n*/\nint tsk_table_sorter_free(struct _tsk_table_sorter_t *self);\n\n/** @} */\n\nint tsk_squash_edges(\n    tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output_edges);\n\n/* IBD segments API. This is experimental and the interface may change. */\n\ntsk_size_t tsk_identity_segments_get_num_segments(const tsk_identity_segments_t *self);\ndouble tsk_identity_segments_get_total_span(const tsk_identity_segments_t *self);\ntsk_size_t tsk_identity_segments_get_num_pairs(const tsk_identity_segments_t *self);\nint tsk_identity_segments_get_keys(\n    const tsk_identity_segments_t *result, tsk_id_t *pairs);\nint tsk_identity_segments_get_items(const tsk_identity_segments_t *self, tsk_id_t *pairs,\n    tsk_identity_segment_list_t **lists);\nint tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t a,\n    tsk_id_t b, tsk_identity_segment_list_t **ret_list);\nvoid tsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out);\nint tsk_identity_segments_free(tsk_identity_segments_t *self);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "c/tskit/trees.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <string.h>\n#include <stdbool.h>\n#include <stdlib.h>\n#include <math.h>\n#include <assert.h>\n\n#include <tskit/trees.h>\n#include <tskit/genotypes.h>\n\nstatic inline bool\nis_discrete(double x)\n{\n    return trunc(x) == x;\n}\n\n/* ======================================================== *\n * tree sequence\n * ======================================================== */\n\nstatic void\ntsk_treeseq_check_state(const tsk_treeseq_t *self)\n{\n    tsk_size_t j;\n    tsk_size_t k, l;\n    tsk_site_t site;\n    tsk_id_t site_id = 0;\n\n    for (j = 0; j < self->num_trees; j++) {\n        for (k = 0; k < self->tree_sites_length[j]; k++) {\n            site = self->tree_sites[j][k];\n            tsk_bug_assert(site.id == site_id);\n            site_id++;\n            for (l = 0; l < site.mutations_length; l++) {\n                tsk_bug_assert(site.mutations[l].site == site.id);\n            }\n        }\n    }\n}\n\nvoid\ntsk_treeseq_print_state(const tsk_treeseq_t *self, FILE *out)\n{\n    tsk_size_t j;\n    tsk_size_t k, l, m;\n    tsk_site_t site;\n\n    fprintf(out, \"tree_sequence state\\n\");\n    fprintf(out, \"num_trees = %lld\\n\", (long long) self->num_trees);\n    fprintf(out, \"samples = (%lld)\\n\", (long long) self->num_samples);\n    for (j = 0; j < self->num_samples; j++) {\n        fprintf(out, \"\\t%lld\\n\", (long long) self->samples[j]);\n    }\n    tsk_table_collection_print_state(self->tables, out);\n    fprintf(out, \"tree_sites = \\n\");\n    for (j = 0; j < self->num_trees; j++) {\n        fprintf(out, \"tree %lld\\t%lld sites\\n\", (long long) j,\n            (long long) self->tree_sites_length[j]);\n        for (k = 0; k < self->tree_sites_length[j]; k++) {\n            site = self->tree_sites[j][k];\n            fprintf(out, \"\\tsite %lld pos = %f ancestral state = \", (long long) site.id,\n                site.position);\n            for (l = 0; l < site.ancestral_state_length; l++) {\n                fprintf(out, \"%c\", site.ancestral_state[l]);\n            }\n            fprintf(out, \" %lld mutations\\n\", (long long) site.mutations_length);\n            for (l = 0; l < site.mutations_length; l++) {\n                fprintf(out, \"\\t\\tmutation %lld node = %lld derived_state = \",\n                    (long long) site.mutations[l].id,\n                    (long long) site.mutations[l].node);\n                for (m = 0; m < site.mutations[l].derived_state_length; m++) {\n                    fprintf(out, \"%c\", site.mutations[l].derived_state[m]);\n                }\n                fprintf(out, \"\\n\");\n            }\n        }\n    }\n    tsk_treeseq_check_state(self);\n}\n\nint\ntsk_treeseq_free(tsk_treeseq_t *self)\n{\n    if (self->tables != NULL) {\n        tsk_table_collection_free(self->tables);\n    }\n    tsk_safe_free(self->tables);\n    tsk_safe_free(self->samples);\n    tsk_safe_free(self->sample_index_map);\n    tsk_safe_free(self->breakpoints);\n    tsk_safe_free(self->tree_sites);\n    tsk_safe_free(self->tree_sites_length);\n    tsk_safe_free(self->tree_sites_mem);\n    tsk_safe_free(self->site_mutations_mem);\n    tsk_safe_free(self->site_mutations_length);\n    tsk_safe_free(self->site_mutations);\n    tsk_safe_free(self->individual_nodes_mem);\n    tsk_safe_free(self->individual_nodes_length);\n    tsk_safe_free(self->individual_nodes);\n    return 0;\n}\n\nstatic int\ntsk_treeseq_init_sites(tsk_treeseq_t *self)\n{\n    tsk_id_t j, k;\n    int ret = 0;\n    tsk_size_t offset = 0;\n    const tsk_size_t num_mutations = self->tables->mutations.num_rows;\n    const tsk_size_t num_sites = self->tables->sites.num_rows;\n    const tsk_id_t *restrict mutation_site = self->tables->mutations.site;\n    const double *restrict site_position = self->tables->sites.position;\n    bool discrete_sites = true;\n    tsk_mutation_t *mutation;\n\n    self->site_mutations_mem\n        = tsk_malloc(num_mutations * sizeof(*self->site_mutations_mem));\n    self->site_mutations_length\n        = tsk_malloc(num_sites * sizeof(*self->site_mutations_length));\n    self->site_mutations = tsk_malloc(num_sites * sizeof(*self->site_mutations));\n    self->tree_sites_mem = tsk_malloc(num_sites * sizeof(*self->tree_sites_mem));\n    if (self->site_mutations_mem == NULL || self->site_mutations_length == NULL\n        || self->site_mutations == NULL || self->tree_sites_mem == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (k = 0; k < (tsk_id_t) num_mutations; k++) {\n        mutation = self->site_mutations_mem + k;\n        ret = tsk_treeseq_get_mutation(self, k, mutation);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    k = 0;\n    for (j = 0; j < (tsk_id_t) num_sites; j++) {\n        discrete_sites = discrete_sites && is_discrete(site_position[j]);\n        self->site_mutations[j] = self->site_mutations_mem + offset;\n        self->site_mutations_length[j] = 0;\n        /* Go through all mutations for this site */\n        while (k < (tsk_id_t) num_mutations && mutation_site[k] == j) {\n            self->site_mutations_length[j]++;\n            offset++;\n            k++;\n        }\n        ret = tsk_treeseq_get_site(self, j, self->tree_sites_mem + j);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    self->discrete_genome = self->discrete_genome && discrete_sites;\nout:\n    return ret;\n}\n\nstatic int\ntsk_treeseq_init_individuals(tsk_treeseq_t *self)\n{\n    int ret = 0;\n    tsk_id_t node;\n    tsk_id_t ind;\n    tsk_size_t offset = 0;\n    tsk_size_t total_node_refs = 0;\n    tsk_size_t *node_count = NULL;\n    tsk_id_t *node_array;\n    const tsk_size_t num_inds = self->tables->individuals.num_rows;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t *restrict node_individual = self->tables->nodes.individual;\n\n    // First find number of nodes per individual\n    self->individual_nodes_length\n        = tsk_calloc(TSK_MAX(1, num_inds), sizeof(*self->individual_nodes_length));\n    node_count = tsk_calloc(TSK_MAX(1, num_inds), sizeof(*node_count));\n\n    if (self->individual_nodes_length == NULL || node_count == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (node = 0; node < (tsk_id_t) num_nodes; node++) {\n        ind = node_individual[node];\n        if (ind != TSK_NULL) {\n            self->individual_nodes_length[ind]++;\n            total_node_refs++;\n        }\n    }\n\n    self->individual_nodes_mem\n        = tsk_malloc(TSK_MAX(1, total_node_refs) * sizeof(tsk_node_t));\n    self->individual_nodes = tsk_malloc(TSK_MAX(1, num_inds) * sizeof(tsk_node_t *));\n    if (self->individual_nodes_mem == NULL || self->individual_nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    /* Now fill in the node IDs */\n    for (ind = 0; ind < (tsk_id_t) num_inds; ind++) {\n        self->individual_nodes[ind] = self->individual_nodes_mem + offset;\n        offset += self->individual_nodes_length[ind];\n    }\n    for (node = 0; node < (tsk_id_t) num_nodes; node++) {\n        ind = node_individual[node];\n        if (ind != TSK_NULL) {\n            node_array = self->individual_nodes[ind];\n            tsk_bug_assert(node_array - self->individual_nodes_mem\n                           < (tsk_id_t) (total_node_refs - node_count[ind]));\n            node_array[node_count[ind]] = node;\n            node_count[ind] += 1;\n        }\n    }\nout:\n    tsk_safe_free(node_count);\n    return ret;\n}\n\n/* Initialises memory associated with the trees.\n */\nstatic int\ntsk_treeseq_init_trees(tsk_treeseq_t *self)\n{\n    int ret = TSK_ERR_GENERIC;\n    tsk_size_t j, k, tree_index;\n    tsk_id_t site_id, edge_id, mutation_id;\n    double tree_left, tree_right;\n    const double sequence_length = self->tables->sequence_length;\n    const tsk_id_t num_sites = (tsk_id_t) self->tables->sites.num_rows;\n    const tsk_id_t num_mutations = (tsk_id_t) self->tables->mutations.num_rows;\n    const tsk_size_t num_edges = self->tables->edges.num_rows;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const double *restrict site_position = self->tables->sites.position;\n    const tsk_id_t *restrict mutation_site = self->tables->mutations.site;\n    const tsk_id_t *restrict mutation_parent = self->tables->mutations.parent;\n    const char *restrict sites_ancestral_state = self->tables->sites.ancestral_state;\n    const tsk_size_t *restrict sites_ancestral_state_offset\n        = self->tables->sites.ancestral_state_offset;\n    const char *restrict mutations_derived_state = self->tables->mutations.derived_state;\n    const tsk_size_t *restrict mutations_derived_state_offset\n        = self->tables->mutations.derived_state_offset;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_right = self->tables->edges.right;\n    const double *restrict edge_left = self->tables->edges.left;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    tsk_size_t num_trees_alloc = self->num_trees + 1;\n    bool discrete_breakpoints = true;\n    tsk_id_t *node_edge_map = tsk_malloc(num_nodes * sizeof(*node_edge_map));\n    tsk_mutation_t *mutation;\n    tsk_id_t parent_id;\n\n    self->tree_sites_length\n        = tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length));\n    self->tree_sites = tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites));\n    self->breakpoints = tsk_malloc(num_trees_alloc * sizeof(*self->breakpoints));\n    if (node_edge_map == NULL || self->tree_sites == NULL\n        || self->tree_sites_length == NULL || self->breakpoints == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(\n        self->tree_sites_length, 0, self->num_trees * sizeof(*self->tree_sites_length));\n    tsk_memset(self->tree_sites, 0, self->num_trees * sizeof(*self->tree_sites));\n    tsk_memset(node_edge_map, TSK_NULL, num_nodes * sizeof(*node_edge_map));\n\n    tree_left = 0;\n    tree_right = sequence_length;\n    tree_index = 0;\n    site_id = 0;\n    mutation_id = 0;\n    j = 0;\n    k = 0;\n    while (j < num_edges || tree_left < sequence_length) {\n        discrete_breakpoints = discrete_breakpoints && is_discrete(tree_left);\n        self->breakpoints[tree_index] = tree_left;\n        while (k < num_edges && edge_right[O[k]] == tree_left) {\n            edge_id = O[k];\n            node_edge_map[edge_child[edge_id]] = TSK_NULL;\n            k++;\n        }\n        while (j < num_edges && edge_left[I[j]] == tree_left) {\n            edge_id = I[j];\n            node_edge_map[edge_child[edge_id]] = edge_id;\n            j++;\n        }\n        tree_right = sequence_length;\n        if (j < num_edges) {\n            tree_right = TSK_MIN(tree_right, edge_left[I[j]]);\n        }\n        if (k < num_edges) {\n            tree_right = TSK_MIN(tree_right, edge_right[O[k]]);\n        }\n        self->tree_sites[tree_index] = self->tree_sites_mem + site_id;\n        while (site_id < num_sites && site_position[site_id] < tree_right) {\n            self->tree_sites_length[tree_index]++;\n            while (\n                mutation_id < num_mutations && mutation_site[mutation_id] == site_id) {\n                mutation = self->site_mutations_mem + mutation_id;\n                mutation->edge = node_edge_map[mutation->node];\n\n                /* Compute inherited state */\n                if (mutation_parent[mutation_id] == TSK_NULL) {\n                    /* No parent: inherited state is the site's ancestral state */\n                    mutation->inherited_state\n                        = sites_ancestral_state + sites_ancestral_state_offset[site_id];\n                    mutation->inherited_state_length\n                        = sites_ancestral_state_offset[site_id + 1]\n                          - sites_ancestral_state_offset[site_id];\n                } else {\n                    /* Has parent: inherited state is parent's derived state */\n                    parent_id = mutation_parent[mutation_id];\n                    mutation->inherited_state\n                        = mutations_derived_state\n                          + mutations_derived_state_offset[parent_id];\n                    mutation->inherited_state_length\n                        = mutations_derived_state_offset[parent_id + 1]\n                          - mutations_derived_state_offset[parent_id];\n                }\n\n                mutation_id++;\n            }\n            site_id++;\n        }\n        tree_left = tree_right;\n        tree_index++;\n    }\n    tsk_bug_assert(site_id == num_sites);\n    tsk_bug_assert(tree_index == self->num_trees);\n    self->breakpoints[tree_index] = tree_right;\n    discrete_breakpoints = discrete_breakpoints && is_discrete(tree_right);\n    self->discrete_genome = self->discrete_genome && discrete_breakpoints;\n    ret = 0;\nout:\n    tsk_safe_free(node_edge_map);\n    return ret;\n}\n\nstatic void\ntsk_treeseq_init_migrations(tsk_treeseq_t *self)\n{\n    tsk_size_t j;\n    tsk_size_t num_migrations = self->tables->migrations.num_rows;\n    const double *restrict left = self->tables->migrations.left;\n    const double *restrict right = self->tables->migrations.right;\n    const double *restrict time = self->tables->migrations.time;\n    bool discrete_breakpoints = true;\n    bool discrete_times = true;\n\n    for (j = 0; j < num_migrations; j++) {\n        discrete_breakpoints\n            = discrete_breakpoints && is_discrete(left[j]) && is_discrete(right[j]);\n        discrete_times\n            = discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));\n    }\n    self->discrete_genome = self->discrete_genome && discrete_breakpoints;\n    self->discrete_time = self->discrete_time && discrete_times;\n}\n\nstatic void\ntsk_treeseq_init_mutations(tsk_treeseq_t *self)\n{\n    tsk_size_t j;\n    tsk_size_t num_mutations = self->tables->mutations.num_rows;\n    const double *restrict time = self->tables->mutations.time;\n    bool discrete_times = true;\n\n    for (j = 0; j < num_mutations; j++) {\n        discrete_times\n            = discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));\n    }\n    self->discrete_time = self->discrete_time && discrete_times;\n\n    for (j = 0; j < num_mutations; j++) {\n        if (!tsk_is_unknown_time(time[j])) {\n            self->min_time = TSK_MIN(self->min_time, time[j]);\n            self->max_time = TSK_MAX(self->max_time, time[j]);\n        }\n    }\n}\n\nstatic int\ntsk_treeseq_init_nodes(tsk_treeseq_t *self)\n{\n    tsk_size_t j, k;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_flags_t *restrict node_flags = self->tables->nodes.flags;\n    const double *restrict time = self->tables->nodes.time;\n    int ret = 0;\n    bool discrete_times = true;\n\n    /* Determine the sample size */\n    self->num_samples = 0;\n    for (j = 0; j < num_nodes; j++) {\n        if (!!(node_flags[j] & TSK_NODE_IS_SAMPLE)) {\n            self->num_samples++;\n        }\n    }\n    /* TODO raise an error if < 2 samples?? */\n    self->samples = tsk_malloc(self->num_samples * sizeof(tsk_id_t));\n    self->sample_index_map = tsk_malloc(num_nodes * sizeof(tsk_id_t));\n    if (self->samples == NULL || self->sample_index_map == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    k = 0;\n    for (j = 0; j < num_nodes; j++) {\n        self->sample_index_map[j] = -1;\n        if (!!(node_flags[j] & TSK_NODE_IS_SAMPLE)) {\n            self->samples[k] = (tsk_id_t) j;\n            self->sample_index_map[j] = (tsk_id_t) k;\n            k++;\n        }\n    }\n    tsk_bug_assert(k == self->num_samples);\n\n    for (j = 0; j < num_nodes; j++) {\n        discrete_times\n            = discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));\n    }\n    self->discrete_time = self->discrete_time && discrete_times;\n\n    for (j = 0; j < num_nodes; j++) {\n        if (!tsk_is_unknown_time(time[j])) {\n            self->min_time = TSK_MIN(self->min_time, time[j]);\n            self->max_time = TSK_MAX(self->max_time, time[j]);\n        }\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_init(\n    tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_id_t num_trees;\n\n    tsk_memset(self, 0, sizeof(*self));\n    if (options & TSK_TAKE_OWNERSHIP) {\n        self->tables = tables;\n        if (tables->edges.options & TSK_TABLE_NO_METADATA) {\n            ret = tsk_trace_error(TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA);\n            goto out;\n        }\n    } else {\n        self->tables = tsk_malloc(sizeof(*self->tables));\n        if (self->tables == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n\n        /* Note that this copy reinstates metadata for a table collection with\n         * TSK_TC_NO_EDGE_METADATA. Otherwise a table without metadata would\n         * crash tsk_diff_iter_next. */\n        ret = tsk_table_collection_copy(tables, self->tables, TSK_COPY_FILE_UUID);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (options & TSK_TS_INIT_BUILD_INDEXES) {\n        ret = tsk_table_collection_build_index(self->tables, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    if (options & TSK_TS_INIT_COMPUTE_MUTATION_PARENTS) {\n        /* As tsk_table_collection_compute_mutation_parents performs an\n           integrity check, and we don't wish to do that twice we perform\n           our own check here */\n        num_trees = tsk_table_collection_check_integrity(self->tables, TSK_CHECK_TREES);\n        if (num_trees < 0) {\n            ret = (int) num_trees;\n            goto out;\n        }\n\n        ret = tsk_table_collection_compute_mutation_parents(\n            self->tables, TSK_NO_CHECK_INTEGRITY);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        num_trees = tsk_table_collection_check_integrity(\n            self->tables, TSK_CHECK_TREES | TSK_CHECK_MUTATION_PARENTS);\n        if (num_trees < 0) {\n            ret = (int) num_trees;\n            goto out;\n        }\n    }\n    self->num_trees = (tsk_size_t) num_trees;\n    self->discrete_genome = true;\n    self->discrete_time = true;\n    self->min_time = INFINITY;\n    self->max_time = -INFINITY;\n    ret = tsk_treeseq_init_nodes(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_init_sites(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_init_individuals(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_init_trees(self);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_treeseq_init_migrations(self);\n    tsk_treeseq_init_mutations(self);\n\n    if (tsk_treeseq_get_time_units_length(self) == strlen(TSK_TIME_UNITS_UNCALIBRATED)\n        && !strncmp(tsk_treeseq_get_time_units(self), TSK_TIME_UNITS_UNCALIBRATED,\n            strlen(TSK_TIME_UNITS_UNCALIBRATED))) {\n        self->time_uncalibrated = true;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_copy_tables(\n    const tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options)\n{\n    return tsk_table_collection_copy(self->tables, tables, options);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_load(tsk_treeseq_t *self, const char *filename, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = malloc(sizeof(*tables));\n\n    /* Need to make sure that we're zero'd out in case of error */\n    tsk_memset(self, 0, sizeof(*self));\n\n    if (tables == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_table_collection_load(tables, filename, options);\n    if (ret != 0) {\n        tsk_table_collection_free(tables);\n        tsk_safe_free(tables);\n        goto out;\n    }\n    /* TSK_TAKE_OWNERSHIP takes immediate ownership of the tables, regardless\n     * of error conditions. */\n    ret = tsk_treeseq_init(self, tables, TSK_TAKE_OWNERSHIP);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_loadf(tsk_treeseq_t *self, FILE *file, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = malloc(sizeof(*tables));\n\n    /* Need to make sure that we're zero'd out in case of error */\n    tsk_memset(self, 0, sizeof(*self));\n\n    if (tables == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    ret = tsk_table_collection_loadf(tables, file, options);\n    if (ret != 0) {\n        tsk_table_collection_free(tables);\n        tsk_safe_free(tables);\n        goto out;\n    }\n    /* TSK_TAKE_OWNERSHIP takes immediate ownership of the tables, regardless\n     * of error conditions. */\n    ret = tsk_treeseq_init(self, tables, TSK_TAKE_OWNERSHIP);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_dump(const tsk_treeseq_t *self, const char *filename, tsk_flags_t options)\n{\n    return tsk_table_collection_dump(self->tables, filename, options);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_dumpf(const tsk_treeseq_t *self, FILE *file, tsk_flags_t options)\n{\n    return tsk_table_collection_dumpf(self->tables, file, options);\n}\n\n/* Simple attribute getters */\n\nconst char *\ntsk_treeseq_get_metadata(const tsk_treeseq_t *self)\n{\n    return self->tables->metadata;\n}\n\ntsk_size_t\ntsk_treeseq_get_metadata_length(const tsk_treeseq_t *self)\n{\n    return self->tables->metadata_length;\n}\n\nconst char *\ntsk_treeseq_get_metadata_schema(const tsk_treeseq_t *self)\n{\n    return self->tables->metadata_schema;\n}\n\ntsk_size_t\ntsk_treeseq_get_metadata_schema_length(const tsk_treeseq_t *self)\n{\n    return self->tables->metadata_schema_length;\n}\n\nconst char *\ntsk_treeseq_get_time_units(const tsk_treeseq_t *self)\n{\n    return self->tables->time_units;\n}\n\ntsk_size_t\ntsk_treeseq_get_time_units_length(const tsk_treeseq_t *self)\n{\n    return self->tables->time_units_length;\n}\n\ndouble\ntsk_treeseq_get_sequence_length(const tsk_treeseq_t *self)\n{\n    return self->tables->sequence_length;\n}\n\nconst char *\ntsk_treeseq_get_file_uuid(const tsk_treeseq_t *self)\n{\n    return self->tables->file_uuid;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_samples(const tsk_treeseq_t *self)\n{\n    return self->num_samples;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_nodes(const tsk_treeseq_t *self)\n{\n    return self->tables->nodes.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_edges(const tsk_treeseq_t *self)\n{\n    return self->tables->edges.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_migrations(const tsk_treeseq_t *self)\n{\n    return self->tables->migrations.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_sites(const tsk_treeseq_t *self)\n{\n    return self->tables->sites.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_mutations(const tsk_treeseq_t *self)\n{\n    return self->tables->mutations.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_populations(const tsk_treeseq_t *self)\n{\n    return self->tables->populations.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_individuals(const tsk_treeseq_t *self)\n{\n    return self->tables->individuals.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_provenances(const tsk_treeseq_t *self)\n{\n    return self->tables->provenances.num_rows;\n}\n\ntsk_size_t\ntsk_treeseq_get_num_trees(const tsk_treeseq_t *self)\n{\n    return self->num_trees;\n}\n\nconst double *\ntsk_treeseq_get_breakpoints(const tsk_treeseq_t *self)\n{\n    return self->breakpoints;\n}\n\nconst tsk_id_t *\ntsk_treeseq_get_samples(const tsk_treeseq_t *self)\n{\n    return self->samples;\n}\n\nconst tsk_id_t *\ntsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self)\n{\n    return self->sample_index_map;\n}\n\nbool\ntsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u)\n{\n    bool ret = false;\n\n    if (u >= 0 && u < (tsk_id_t) self->tables->nodes.num_rows) {\n        ret = !!(self->tables->nodes.flags[u] & TSK_NODE_IS_SAMPLE);\n    }\n    return ret;\n}\n\nbool\ntsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self)\n{\n    return self->discrete_genome;\n}\n\nbool\ntsk_treeseq_get_discrete_time(const tsk_treeseq_t *self)\n{\n    return self->discrete_time;\n}\n\ndouble\ntsk_treeseq_get_min_time(const tsk_treeseq_t *self)\n{\n    return self->min_time;\n}\n\ndouble\ntsk_treeseq_get_max_time(const tsk_treeseq_t *self)\n{\n    return self->max_time;\n}\n\nbool\ntsk_treeseq_has_reference_sequence(const tsk_treeseq_t *self)\n{\n    return tsk_table_collection_has_reference_sequence(self->tables);\n}\n\nint\ntsk_treeseq_get_individuals_population(const tsk_treeseq_t *self, tsk_id_t *output)\n{\n    int ret = 0;\n    tsk_size_t i, j;\n    tsk_individual_t ind;\n    tsk_id_t ind_pop;\n    const tsk_id_t *node_population = self->tables->nodes.population;\n    const tsk_size_t num_individuals = self->tables->individuals.num_rows;\n\n    tsk_memset(output, TSK_NULL, num_individuals * sizeof(*output));\n\n    for (i = 0; i < num_individuals; i++) {\n        ret = tsk_treeseq_get_individual(self, (tsk_id_t) i, &ind);\n        tsk_bug_assert(ret == 0);\n        if (ind.nodes_length > 0) {\n            ind_pop = -2;\n            for (j = 0; j < ind.nodes_length; j++) {\n                if (ind_pop == -2) {\n                    ind_pop = node_population[ind.nodes[j]];\n                } else if (ind_pop != node_population[ind.nodes[j]]) {\n                    ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH);\n                    goto out;\n                }\n            }\n            output[ind.id] = ind_pop;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_treeseq_get_individuals_time(const tsk_treeseq_t *self, double *output)\n{\n    int ret = 0;\n    tsk_size_t i, j;\n    tsk_individual_t ind;\n    double ind_time;\n    const double *node_time = self->tables->nodes.time;\n    const tsk_size_t num_individuals = self->tables->individuals.num_rows;\n\n    for (i = 0; i < num_individuals; i++) {\n        ret = tsk_treeseq_get_individual(self, (tsk_id_t) i, &ind);\n        tsk_bug_assert(ret == 0);\n        /* the default is UNKNOWN_TIME, but nodes cannot have\n         * UNKNOWN _TIME so this is safe. */\n        ind_time = TSK_UNKNOWN_TIME;\n        for (j = 0; j < ind.nodes_length; j++) {\n            if (j == 0) {\n                ind_time = node_time[ind.nodes[j]];\n            } else if (ind_time != node_time[ind.nodes[j]]) {\n                ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_TIME_MISMATCH);\n                goto out;\n            }\n        }\n        output[ind.id] = ind_time;\n    }\nout:\n    return ret;\n}\n\n/* Stats functions */\n\n#define GET_2D_ROW(array, row_len, row) (array + (((size_t) (row_len)) * (size_t) (row)))\n\nstatic inline double *\nGET_3D_ROW(double *base, tsk_size_t num_nodes, tsk_size_t output_dim,\n    tsk_size_t window_index, tsk_id_t u)\n{\n    tsk_size_t offset\n        = window_index * num_nodes * output_dim + ((tsk_size_t) u) * output_dim;\n    return base + offset;\n}\n\n/* Increments the n-dimensional array with the specified shape by the specified value at\n * the specified coordinate. */\nstatic inline void\nincrement_nd_array_value(double *array, tsk_size_t n, const tsk_size_t *shape,\n    const tsk_size_t *coordinate, double value)\n{\n    tsk_size_t offset = 0;\n    tsk_size_t product = 1;\n    int k;\n\n    for (k = (int) n - 1; k >= 0; k--) {\n        tsk_bug_assert(coordinate[k] < shape[k]);\n        offset += coordinate[k] * product;\n        product *= shape[k];\n    }\n    array[offset] += value;\n}\n\n/* TODO flatten the reference sets input here and follow the same pattern used\n * in diversity, divergence, etc. */\nint TSK_WARN_UNUSED\ntsk_treeseq_genealogical_nearest_neighbours(const tsk_treeseq_t *self,\n    const tsk_id_t *focal, tsk_size_t num_focal, const tsk_id_t *const *reference_sets,\n    const tsk_size_t *reference_set_size, tsk_size_t num_reference_sets,\n    tsk_flags_t TSK_UNUSED(options), double *ret_array)\n{\n    int ret = 0;\n    tsk_id_t u, v, p;\n    tsk_size_t j;\n    /* TODO It's probably not worth bothering with the int16_t here. */\n    int16_t k, focal_reference_set;\n    /* We use the K'th element of the array for the total. */\n    const int16_t K = (int16_t) (num_reference_sets + 1);\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t tj, tk, h;\n    double left, right, *A_row, scale, tree_length;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    double *restrict length = tsk_calloc(num_focal, sizeof(*length));\n    uint32_t *restrict ref_count\n        = tsk_calloc(((tsk_size_t) K) * num_nodes, sizeof(*ref_count));\n    int16_t *restrict reference_set_map\n        = tsk_malloc(num_nodes * sizeof(*reference_set_map));\n    uint32_t *restrict row = NULL;\n    uint32_t *restrict child_row = NULL;\n    uint32_t total, delta;\n\n    /* We support a max of 8K focal sets */\n    if (num_reference_sets == 0 || num_reference_sets > (INT16_MAX - 1)) {\n        /* TODO: more specific error */\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (parent == NULL || ref_count == NULL || reference_set_map == NULL\n        || length == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n    tsk_memset(reference_set_map, 0xff, num_nodes * sizeof(*reference_set_map));\n    tsk_memset(ret_array, 0, num_focal * num_reference_sets * sizeof(*ret_array));\n\n    total = 0; /* keep the compiler happy */\n\n    /* Set the initial conditions and check the input. */\n    for (k = 0; k < (int16_t) num_reference_sets; k++) {\n        for (j = 0; j < reference_set_size[k]; j++) {\n            u = reference_sets[k][j];\n            if (u < 0 || u >= (tsk_id_t) num_nodes) {\n                ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n                goto out;\n            }\n            if (reference_set_map[u] != TSK_NULL) {\n                /* FIXME Technically inaccurate here: duplicate focal not sample */\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            reference_set_map[u] = k;\n            row = GET_2D_ROW(ref_count, K, u);\n            row[k] = 1;\n            /* Also set the count for the total among all sets */\n            row[K - 1] = 1;\n        }\n    }\n    for (j = 0; j < num_focal; j++) {\n        u = focal[j];\n        if (u < 0 || u >= (tsk_id_t) num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n    }\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    left = 0;\n    while (tj < num_edges || left < sequence_length) {\n        while (tk < num_edges && edge_right[O[tk]] == left) {\n            h = O[tk];\n            tk++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = TSK_NULL;\n            child_row = GET_2D_ROW(ref_count, K, u);\n            while (v != TSK_NULL) {\n                row = GET_2D_ROW(ref_count, K, v);\n                for (k = 0; k < K; k++) {\n                    row[k] -= child_row[k];\n                }\n                v = parent[v];\n            }\n        }\n        while (tj < num_edges && edge_left[I[tj]] == left) {\n            h = I[tj];\n            tj++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            child_row = GET_2D_ROW(ref_count, K, u);\n            while (v != TSK_NULL) {\n                row = GET_2D_ROW(ref_count, K, v);\n                for (k = 0; k < K; k++) {\n                    row[k] += child_row[k];\n                }\n                v = parent[v];\n            }\n        }\n        right = sequence_length;\n        if (tj < num_edges) {\n            right = TSK_MIN(right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            right = TSK_MIN(right, edge_right[O[tk]]);\n        }\n\n        tree_length = right - left;\n        /* Process this tree */\n        for (j = 0; j < num_focal; j++) {\n            u = focal[j];\n            focal_reference_set = reference_set_map[u];\n            delta = focal_reference_set != -1;\n            p = u;\n            while (p != TSK_NULL) {\n                row = GET_2D_ROW(ref_count, K, p);\n                total = row[K - 1];\n                if (total > delta) {\n                    break;\n                }\n                p = parent[p];\n            }\n            if (p != TSK_NULL) {\n                length[j] += tree_length;\n                scale = tree_length / (total - delta);\n                A_row = GET_2D_ROW(ret_array, num_reference_sets, j);\n                for (k = 0; k < K - 1; k++) {\n                    A_row[k] += row[k] * scale;\n                }\n                if (focal_reference_set != -1) {\n                    /* Remove the contribution for the reference set u belongs to and\n                     * insert the correct value. The long-hand version is\n                     * A_row[k] = A_row[k] - row[k] * scale + (row[k] - 1) * scale;\n                     * which cancels to give: */\n                    A_row[focal_reference_set] -= scale;\n                }\n            }\n        }\n\n        /* Move on to the next tree */\n        left = right;\n    }\n\n    /* Divide by the accumulated length for each node to normalise */\n    for (j = 0; j < num_focal; j++) {\n        A_row = GET_2D_ROW(ret_array, num_reference_sets, j);\n        if (length[j] > 0) {\n            for (k = 0; k < K - 1; k++) {\n                A_row[k] /= length[j];\n            }\n        }\n    }\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    if (ref_count != NULL) {\n        free(ref_count);\n    }\n    if (reference_set_map != NULL) {\n        free(reference_set_map);\n    }\n    if (length != NULL) {\n        free(length);\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_mean_descendants(const tsk_treeseq_t *self,\n    const tsk_id_t *const *reference_sets, const tsk_size_t *reference_set_size,\n    tsk_size_t num_reference_sets, tsk_flags_t TSK_UNUSED(options), double *ret_array)\n{\n    int ret = 0;\n    tsk_id_t u, v;\n    tsk_size_t j;\n    int32_t k;\n    /* We use the K'th element of the array for the total. */\n    const int32_t K = (int32_t) (num_reference_sets + 1);\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t tj, tk, h;\n    double left, right, length, *restrict C_row;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    uint32_t *restrict ref_count\n        = tsk_calloc(num_nodes * ((size_t) K), sizeof(*ref_count));\n    double *restrict last_update = tsk_calloc(num_nodes, sizeof(*last_update));\n    double *restrict total_length = tsk_calloc(num_nodes, sizeof(*total_length));\n    uint32_t *restrict row, *restrict child_row;\n\n    if (num_reference_sets == 0 || num_reference_sets > (INT32_MAX - 1)) {\n        /* TODO: more specific error */\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    if (parent == NULL || ref_count == NULL || last_update == NULL\n        || total_length == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    /* TODO add check for duplicate values in the reference sets */\n\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n    tsk_memset(ret_array, 0, num_nodes * num_reference_sets * sizeof(*ret_array));\n\n    /* Set the initial conditions and check the input. */\n    for (k = 0; k < (int32_t) num_reference_sets; k++) {\n        for (j = 0; j < reference_set_size[k]; j++) {\n            u = reference_sets[k][j];\n            if (u < 0 || u >= (tsk_id_t) num_nodes) {\n                ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n                goto out;\n            }\n            row = GET_2D_ROW(ref_count, K, u);\n            row[k] = 1;\n            /* Also set the count for the total among all sets */\n            row[K - 1] = 1;\n        }\n    }\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    left = 0;\n    while (tj < num_edges || left < sequence_length) {\n        while (tk < num_edges && edge_right[O[tk]] == left) {\n            h = O[tk];\n            tk++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = TSK_NULL;\n            child_row = GET_2D_ROW(ref_count, K, u);\n            while (v != TSK_NULL) {\n                row = GET_2D_ROW(ref_count, K, v);\n                if (last_update[v] != left) {\n                    if (row[K - 1] > 0) {\n                        length = left - last_update[v];\n                        C_row = GET_2D_ROW(ret_array, num_reference_sets, v);\n                        for (k = 0; k < (int32_t) num_reference_sets; k++) {\n                            C_row[k] += length * row[k];\n                        }\n                        total_length[v] += length;\n                    }\n                    last_update[v] = left;\n                }\n                for (k = 0; k < K; k++) {\n                    row[k] -= child_row[k];\n                }\n                v = parent[v];\n            }\n        }\n        while (tj < num_edges && edge_left[I[tj]] == left) {\n            h = I[tj];\n            tj++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            child_row = GET_2D_ROW(ref_count, K, u);\n            while (v != TSK_NULL) {\n                row = GET_2D_ROW(ref_count, K, v);\n                if (last_update[v] != left) {\n                    if (row[K - 1] > 0) {\n                        length = left - last_update[v];\n                        C_row = GET_2D_ROW(ret_array, num_reference_sets, v);\n                        for (k = 0; k < (int32_t) num_reference_sets; k++) {\n                            C_row[k] += length * row[k];\n                        }\n                        total_length[v] += length;\n                    }\n                    last_update[v] = left;\n                }\n                for (k = 0; k < K; k++) {\n                    row[k] += child_row[k];\n                }\n                v = parent[v];\n            }\n        }\n        right = sequence_length;\n        if (tj < num_edges) {\n            right = TSK_MIN(right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            right = TSK_MIN(right, edge_right[O[tk]]);\n        }\n        left = right;\n    }\n\n    /* Add the stats for the last tree and divide by the total length that\n     * each node was an ancestor to > 0 of the reference nodes. */\n    for (v = 0; v < (tsk_id_t) num_nodes; v++) {\n        row = GET_2D_ROW(ref_count, K, v);\n        C_row = GET_2D_ROW(ret_array, num_reference_sets, v);\n        if (row[K - 1] > 0) {\n            length = sequence_length - last_update[v];\n            total_length[v] += length;\n            for (k = 0; k < (int32_t) num_reference_sets; k++) {\n                C_row[k] += length * row[k];\n            }\n        }\n        if (total_length[v] > 0) {\n            length = total_length[v];\n            for (k = 0; k < (int32_t) num_reference_sets; k++) {\n                C_row[k] /= length;\n            }\n        }\n    }\n\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    if (ref_count != NULL) {\n        free(ref_count);\n    }\n    if (last_update != NULL) {\n        free(last_update);\n    }\n    if (total_length != NULL) {\n        free(total_length);\n    }\n    return ret;\n}\n\n/***********************************\n * General stats framework\n ***********************************/\n\n#define TSK_REQUIRE_FULL_SPAN 1\n\nstatic int\ntsk_treeseq_check_windows(const tsk_treeseq_t *self, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t j;\n\n    if (num_windows < 1) {\n        ret = tsk_trace_error(TSK_ERR_BAD_NUM_WINDOWS);\n        goto out;\n    }\n    if (options & TSK_REQUIRE_FULL_SPAN) {\n        /* TODO the general stat code currently requires that we include the\n         * entire tree sequence span. This should be relaxed, so hopefully\n         * this branch (and the option) can be removed at some point */\n        if (windows[0] != 0) {\n            ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);\n            goto out;\n        }\n        if (windows[num_windows] != self->tables->sequence_length) {\n            ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);\n            goto out;\n        }\n    } else {\n        if (windows[0] < 0) {\n            ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);\n            goto out;\n        }\n        if (windows[num_windows] > self->tables->sequence_length) {\n            ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);\n            goto out;\n        }\n    }\n    for (j = 0; j < num_windows; j++) {\n        if (windows[j] >= windows[j + 1]) {\n            ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ntsk_treeseq_check_time_windows(tsk_size_t num_windows, const double *windows)\n{\n    // This does not check the last window ends at infinity,\n    // which is required for some time window functions.\n    int ret = TSK_ERR_BAD_TIME_WINDOWS;\n    tsk_size_t j;\n\n    if (num_windows < 1) {\n        ret = TSK_ERR_BAD_TIME_WINDOWS_DIM;\n        goto out;\n    }\n\n    if (windows[0] != 0.0) {\n        goto out;\n    }\n\n    for (j = 0; j < num_windows; j++) {\n        if (windows[j] >= windows[j + 1]) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\n/* TODO make these functions more consistent in how the arguments are ordered */\n\nstatic inline void\nupdate_state(double *X, tsk_size_t state_dim, tsk_id_t dest, tsk_id_t source, int sign)\n{\n    tsk_size_t k;\n    double *X_dest = GET_2D_ROW(X, state_dim, dest);\n    double *X_source = GET_2D_ROW(X, state_dim, source);\n\n    for (k = 0; k < state_dim; k++) {\n        X_dest[k] += sign * X_source[k];\n    }\n}\n\nstatic inline int\nupdate_node_summary(tsk_id_t u, tsk_size_t result_dim, double *node_summary, double *X,\n    tsk_size_t state_dim, general_stat_func_t *f, void *f_params)\n{\n    double *X_u = GET_2D_ROW(X, state_dim, u);\n    double *summary_u = GET_2D_ROW(node_summary, result_dim, u);\n\n    return f(state_dim, X_u, result_dim, summary_u, f_params);\n}\n\nstatic inline void\nupdate_running_sum(tsk_id_t u, double sign, const double *restrict branch_length,\n    const double *summary, tsk_size_t result_dim, double *running_sum)\n{\n    const double *summary_u = GET_2D_ROW(summary, result_dim, u);\n    const double x = sign * branch_length[u];\n    tsk_size_t m;\n\n    for (m = 0; m < result_dim; m++) {\n        running_sum[m] += x * summary_u[m];\n    }\n}\n\nstatic int\ntsk_treeseq_branch_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,\n    void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    tsk_id_t u, v;\n    tsk_size_t j, k, window_index;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double *restrict time = self->tables->nodes.time;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    double *restrict branch_length = tsk_calloc(num_nodes, sizeof(*branch_length));\n    tsk_id_t tj, tk, h;\n    double t_left, t_right, w_left, w_right, left, right, scale;\n    const double *weight_u;\n    double *state_u, *result_row, *summary_u;\n    double *state = tsk_calloc(num_nodes * state_dim, sizeof(*state));\n    double *summary = tsk_calloc(num_nodes * result_dim, sizeof(*summary));\n    double *running_sum = tsk_calloc(result_dim, sizeof(*running_sum));\n    double *zero_state = tsk_calloc(state_dim, sizeof(*zero_state));\n    double *zero_summary = tsk_calloc(result_dim, sizeof(*zero_state));\n\n    if (self->time_uncalibrated && !(options & TSK_STAT_ALLOW_TIME_UNCALIBRATED)) {\n        ret = tsk_trace_error(TSK_ERR_TIME_UNCALIBRATED);\n        goto out;\n    }\n\n    if (parent == NULL || branch_length == NULL || state == NULL || running_sum == NULL\n        || summary == NULL || zero_state == NULL || zero_summary == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n\n    /* If f is not strict, we may need to set conditions for non-sample nodes as well. */\n    ret = f(state_dim, zero_state, result_dim, zero_summary, f_params);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_nodes; j++) { // we could skip this if zero_summary is zero\n        summary_u = GET_2D_ROW(summary, result_dim, j);\n        tsk_memcpy(summary_u, zero_summary, result_dim * sizeof(*zero_summary));\n    }\n    /* Set the initial conditions */\n    for (j = 0; j < self->num_samples; j++) {\n        u = self->samples[j];\n        state_u = GET_2D_ROW(state, state_dim, u);\n        weight_u = GET_2D_ROW(sample_weights, state_dim, j);\n        tsk_memcpy(state_u, weight_u, state_dim * sizeof(*state_u));\n        summary_u = GET_2D_ROW(summary, result_dim, u);\n        ret = f(state_dim, state_u, result_dim, summary_u, f_params);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    tsk_memset(result, 0, num_windows * result_dim * sizeof(*result));\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    t_left = 0;\n    window_index = 0;\n    while (tj < num_edges || t_left < sequence_length) {\n        while (tk < num_edges && edge_right[O[tk]] == t_left) {\n            h = O[tk];\n            tk++;\n\n            u = edge_child[h];\n            update_running_sum(u, -1, branch_length, summary, result_dim, running_sum);\n            parent[u] = TSK_NULL;\n            branch_length[u] = 0;\n\n            u = edge_parent[h];\n            while (u != TSK_NULL) {\n                update_running_sum(\n                    u, -1, branch_length, summary, result_dim, running_sum);\n                update_state(state, state_dim, u, edge_child[h], -1);\n                ret = update_node_summary(\n                    u, result_dim, summary, state, state_dim, f, f_params);\n                if (ret != 0) {\n                    goto out;\n                }\n                update_running_sum(\n                    u, +1, branch_length, summary, result_dim, running_sum);\n                u = parent[u];\n            }\n        }\n\n        while (tj < num_edges && edge_left[I[tj]] == t_left) {\n            h = I[tj];\n            tj++;\n\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            branch_length[u] = time[v] - time[u];\n            update_running_sum(u, +1, branch_length, summary, result_dim, running_sum);\n\n            u = v;\n            while (u != TSK_NULL) {\n                update_running_sum(\n                    u, -1, branch_length, summary, result_dim, running_sum);\n                update_state(state, state_dim, u, edge_child[h], +1);\n                ret = update_node_summary(\n                    u, result_dim, summary, state, state_dim, f, f_params);\n                if (ret != 0) {\n                    goto out;\n                }\n                update_running_sum(\n                    u, +1, branch_length, summary, result_dim, running_sum);\n                u = parent[u];\n            }\n        }\n\n        t_right = sequence_length;\n        if (tj < num_edges) {\n            t_right = TSK_MIN(t_right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            t_right = TSK_MIN(t_right, edge_right[O[tk]]);\n        }\n\n        while (windows[window_index] < t_right) {\n            tsk_bug_assert(window_index < num_windows);\n            w_left = windows[window_index];\n            w_right = windows[window_index + 1];\n            left = TSK_MAX(t_left, w_left);\n            right = TSK_MIN(t_right, w_right);\n            scale = (right - left);\n            tsk_bug_assert(scale > 0);\n            result_row = GET_2D_ROW(result, result_dim, window_index);\n            for (k = 0; k < result_dim; k++) {\n                result_row[k] += running_sum[k] * scale;\n            }\n\n            if (w_right <= t_right) {\n                window_index++;\n            } else {\n                /* This interval crosses a tree boundary, so we update it again in the */\n                /* for the next tree */\n                break;\n            }\n        }\n        /* Move to the next tree */\n        t_left = t_right;\n    }\n    tsk_bug_assert(window_index == num_windows);\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    if (branch_length != NULL) {\n        free(branch_length);\n    }\n    tsk_safe_free(state);\n    tsk_safe_free(summary);\n    tsk_safe_free(running_sum);\n    tsk_safe_free(zero_state);\n    tsk_safe_free(zero_summary);\n    return ret;\n}\n\nstatic int\nget_allele_weights(const tsk_site_t *site, const double *state, tsk_size_t state_dim,\n    const double *total_weight, tsk_size_t *ret_num_alleles, double **ret_allele_states)\n{\n    int ret = 0;\n    tsk_size_t k;\n    tsk_mutation_t mutation, parent_mut;\n    tsk_size_t mutation_index, allele, num_alleles, alt_allele_length;\n    /* The allele table */\n    tsk_size_t max_alleles = site->mutations_length + 1;\n    const char **alleles = tsk_malloc(max_alleles * sizeof(*alleles));\n    tsk_size_t *allele_lengths = tsk_calloc(max_alleles, sizeof(*allele_lengths));\n    double *allele_states = tsk_calloc(max_alleles * state_dim, sizeof(*allele_states));\n    double *allele_row;\n    const double *state_row;\n    const char *alt_allele;\n\n    if (alleles == NULL || allele_lengths == NULL || allele_states == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    tsk_bug_assert(state != NULL);\n    alleles[0] = site->ancestral_state;\n    allele_lengths[0] = site->ancestral_state_length;\n    tsk_memcpy(allele_states, total_weight, state_dim * sizeof(*allele_states));\n    num_alleles = 1;\n\n    for (mutation_index = 0; mutation_index < site->mutations_length; mutation_index++) {\n        mutation = site->mutations[mutation_index];\n        /* Compute the allele index for this derived state value. */\n        allele = 0;\n        while (allele < num_alleles) {\n            if (mutation.derived_state_length == allele_lengths[allele]\n                && tsk_memcmp(\n                       mutation.derived_state, alleles[allele], allele_lengths[allele])\n                       == 0) {\n                break;\n            }\n            allele++;\n        }\n        if (allele == num_alleles) {\n            tsk_bug_assert(allele < max_alleles);\n            alleles[allele] = mutation.derived_state;\n            allele_lengths[allele] = mutation.derived_state_length;\n            num_alleles++;\n        }\n\n        /* Add the state for the the mutation's node to this allele */\n        state_row = GET_2D_ROW(state, state_dim, mutation.node);\n        allele_row = GET_2D_ROW(allele_states, state_dim, allele);\n        for (k = 0; k < state_dim; k++) {\n            allele_row[k] += state_row[k];\n        }\n\n        /* Get the index for the alternate allele that we must subtract from */\n        alt_allele = site->ancestral_state;\n        alt_allele_length = site->ancestral_state_length;\n        if (mutation.parent != TSK_NULL) {\n            parent_mut = site->mutations[mutation.parent - site->mutations[0].id];\n            alt_allele = parent_mut.derived_state;\n            alt_allele_length = parent_mut.derived_state_length;\n        }\n        allele = 0;\n        while (allele < num_alleles) {\n            if (alt_allele_length == allele_lengths[allele]\n                && tsk_memcmp(alt_allele, alleles[allele], allele_lengths[allele])\n                       == 0) {\n                break;\n            }\n            allele++;\n        }\n        tsk_bug_assert(allele < num_alleles);\n\n        allele_row = GET_2D_ROW(allele_states, state_dim, allele);\n        for (k = 0; k < state_dim; k++) {\n            allele_row[k] -= state_row[k];\n        }\n    }\n    *ret_num_alleles = num_alleles;\n    *ret_allele_states = allele_states;\n    allele_states = NULL;\nout:\n    tsk_safe_free(alleles);\n    tsk_safe_free(allele_lengths);\n    tsk_safe_free(allele_states);\n    return ret;\n}\n\nstatic int\ncompute_general_stat_site_result(tsk_site_t *site, double *state, tsk_size_t state_dim,\n    tsk_size_t result_dim, general_stat_func_t *f, void *f_params, double *total_weight,\n    bool polarised, double *result)\n{\n    int ret = 0;\n    tsk_size_t k;\n    tsk_size_t allele, num_alleles;\n    double *allele_states;\n    double *result_tmp = tsk_calloc(result_dim, sizeof(*result_tmp));\n\n    if (result_tmp == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(result, 0, result_dim * sizeof(*result));\n\n    ret = get_allele_weights(\n        site, state, state_dim, total_weight, &num_alleles, &allele_states);\n    if (ret != 0) {\n        goto out;\n    }\n    /* Sum over the allele weights. Skip the ancestral state if this is a polarised stat\n     */\n    for (allele = polarised ? 1 : 0; allele < num_alleles; allele++) {\n        ret = f(state_dim, GET_2D_ROW(allele_states, state_dim, allele), result_dim,\n            result_tmp, f_params);\n        if (ret != 0) {\n            goto out;\n        }\n        for (k = 0; k < result_dim; k++) {\n            result[k] += result_tmp[k];\n        }\n    }\nout:\n    tsk_safe_free(result_tmp);\n    tsk_safe_free(allele_states);\n    return ret;\n}\n\nstatic int\ntsk_treeseq_site_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,\n    void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    tsk_id_t u, v;\n    tsk_size_t j, k, tree_site, tree_index, window_index;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    tsk_site_t *site;\n    tsk_id_t tj, tk, h;\n    double t_left, t_right;\n    const double *weight_u;\n    double *state_u, *result_row;\n    double *state = tsk_calloc(num_nodes * state_dim, sizeof(*state));\n    double *total_weight = tsk_calloc(state_dim, sizeof(*total_weight));\n    double *site_result = tsk_calloc(result_dim, sizeof(*site_result));\n    bool polarised = false;\n\n    if (parent == NULL || state == NULL || total_weight == NULL || site_result == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n\n    if (options & TSK_STAT_POLARISED) {\n        polarised = true;\n    }\n\n    /* Set the initial conditions */\n    for (j = 0; j < self->num_samples; j++) {\n        u = self->samples[j];\n        state_u = GET_2D_ROW(state, state_dim, u);\n        weight_u = GET_2D_ROW(sample_weights, state_dim, j);\n        tsk_memcpy(state_u, weight_u, state_dim * sizeof(*state_u));\n        for (k = 0; k < state_dim; k++) {\n            total_weight[k] += weight_u[k];\n        }\n    }\n    tsk_memset(result, 0, num_windows * result_dim * sizeof(*result));\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    t_left = 0;\n    tree_index = 0;\n    window_index = 0;\n    while (tj < num_edges || t_left < sequence_length) {\n        while (tk < num_edges && edge_right[O[tk]] == t_left) {\n            h = O[tk];\n            tk++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            while (v != TSK_NULL) {\n                update_state(state, state_dim, v, u, -1);\n                v = parent[v];\n            }\n            parent[u] = TSK_NULL;\n        }\n\n        while (tj < num_edges && edge_left[I[tj]] == t_left) {\n            h = I[tj];\n            tj++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            while (v != TSK_NULL) {\n                update_state(state, state_dim, v, u, +1);\n                v = parent[v];\n            }\n        }\n        t_right = sequence_length;\n        if (tj < num_edges) {\n            t_right = TSK_MIN(t_right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            t_right = TSK_MIN(t_right, edge_right[O[tk]]);\n        }\n\n        /* Update the sites */\n        for (tree_site = 0; tree_site < self->tree_sites_length[tree_index];\n            tree_site++) {\n            site = self->tree_sites[tree_index] + tree_site;\n            ret = compute_general_stat_site_result(site, state, state_dim, result_dim, f,\n                f_params, total_weight, polarised, site_result);\n            if (ret != 0) {\n                goto out;\n            }\n\n            while (windows[window_index + 1] <= site->position) {\n                window_index++;\n                tsk_bug_assert(window_index < num_windows);\n            }\n            tsk_bug_assert(windows[window_index] <= site->position);\n            tsk_bug_assert(site->position < windows[window_index + 1]);\n            result_row = GET_2D_ROW(result, result_dim, window_index);\n            for (k = 0; k < result_dim; k++) {\n                result_row[k] += site_result[k];\n            }\n        }\n        tree_index++;\n        t_left = t_right;\n    }\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    tsk_safe_free(state);\n    tsk_safe_free(total_weight);\n    tsk_safe_free(site_result);\n    return ret;\n}\n\nstatic inline void\nincrement_row(tsk_size_t length, double multiplier, double *source, double *dest)\n{\n    tsk_size_t j;\n\n    for (j = 0; j < length; j++) {\n        dest[j] += multiplier * source[j];\n    }\n}\n\nstatic int\ntsk_treeseq_node_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,\n    void *f_params, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t TSK_UNUSED(options), double *result)\n{\n    int ret = 0;\n    tsk_id_t u, v;\n    tsk_size_t j, window_index;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    tsk_id_t tj, tk, h;\n    const double *weight_u;\n    double *state_u;\n    double *state = tsk_calloc(num_nodes * state_dim, sizeof(*state));\n    double *node_summary = tsk_calloc(num_nodes * result_dim, sizeof(*node_summary));\n    double *last_update = tsk_calloc(num_nodes, sizeof(*last_update));\n    double t_left, t_right, w_right;\n\n    if (parent == NULL || state == NULL || node_summary == NULL || last_update == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n    tsk_memset(result, 0, num_windows * num_nodes * result_dim * sizeof(*result));\n\n    /* Set the initial conditions */\n    for (j = 0; j < self->num_samples; j++) {\n        u = self->samples[j];\n        state_u = GET_2D_ROW(state, state_dim, u);\n        weight_u = GET_2D_ROW(sample_weights, state_dim, j);\n        tsk_memcpy(state_u, weight_u, state_dim * sizeof(*state_u));\n    }\n    for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n        ret = update_node_summary(\n            u, result_dim, node_summary, state, state_dim, f, f_params);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    t_left = 0;\n    window_index = 0;\n    while (tj < num_edges || t_left < sequence_length) {\n        tsk_bug_assert(window_index < num_windows);\n        while (tk < num_edges && edge_right[O[tk]] == t_left) {\n            h = O[tk];\n            tk++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            while (v != TSK_NULL) {\n                increment_row(result_dim, t_left - last_update[v],\n                    GET_2D_ROW(node_summary, result_dim, v),\n                    GET_3D_ROW(result, num_nodes, result_dim, window_index, v));\n                last_update[v] = t_left;\n                update_state(state, state_dim, v, u, -1);\n                ret = update_node_summary(\n                    v, result_dim, node_summary, state, state_dim, f, f_params);\n                if (ret != 0) {\n                    goto out;\n                }\n                v = parent[v];\n            }\n            parent[u] = TSK_NULL;\n        }\n\n        while (tj < num_edges && edge_left[I[tj]] == t_left) {\n            h = I[tj];\n            tj++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            while (v != TSK_NULL) {\n                increment_row(result_dim, t_left - last_update[v],\n                    GET_2D_ROW(node_summary, result_dim, v),\n                    GET_3D_ROW(result, num_nodes, result_dim, window_index, v));\n                last_update[v] = t_left;\n                update_state(state, state_dim, v, u, +1);\n                ret = update_node_summary(\n                    v, result_dim, node_summary, state, state_dim, f, f_params);\n                if (ret != 0) {\n                    goto out;\n                }\n                v = parent[v];\n            }\n        }\n\n        t_right = sequence_length;\n        if (tj < num_edges) {\n            t_right = TSK_MIN(t_right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            t_right = TSK_MIN(t_right, edge_right[O[tk]]);\n        }\n\n        while (window_index < num_windows && windows[window_index + 1] <= t_right) {\n            w_right = windows[window_index + 1];\n            /* Flush the contributions of all nodes to the current window */\n            for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n                tsk_bug_assert(last_update[u] < w_right);\n                increment_row(result_dim, w_right - last_update[u],\n                    GET_2D_ROW(node_summary, result_dim, u),\n                    GET_3D_ROW(result, num_nodes, result_dim, window_index, u));\n                last_update[u] = w_right;\n            }\n            window_index++;\n        }\n\n        t_left = t_right;\n    }\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    tsk_safe_free(state);\n    tsk_safe_free(node_summary);\n    tsk_safe_free(last_update);\n    return ret;\n}\n\nstatic void\nspan_normalise(\n    tsk_size_t num_windows, const double *windows, tsk_size_t row_size, double *array)\n{\n    tsk_size_t window_index, k;\n    double span, *row;\n\n    for (window_index = 0; window_index < num_windows; window_index++) {\n        span = windows[window_index + 1] - windows[window_index];\n        row = GET_2D_ROW(array, row_size, window_index);\n        for (k = 0; k < row_size; k++) {\n            row[k] /= span;\n        }\n    }\n}\n\ntypedef struct {\n    general_stat_func_t *f;\n    void *f_params;\n    double *total_weight;\n    double *total_minus_state;\n    double *result_tmp;\n} unpolarised_summary_func_args;\n\nstatic int\nunpolarised_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    int ret = 0;\n    unpolarised_summary_func_args *upargs = (unpolarised_summary_func_args *) params;\n    const double *total_weight = upargs->total_weight;\n    double *total_minus_state = upargs->total_minus_state;\n    double *result_tmp = upargs->result_tmp;\n    tsk_size_t k, m;\n\n    ret = upargs->f(state_dim, state, result_dim, result, upargs->f_params);\n    if (ret != 0) {\n        goto out;\n    }\n    for (k = 0; k < state_dim; k++) {\n        total_minus_state[k] = total_weight[k] - state[k];\n    }\n    ret = upargs->f(\n        state_dim, total_minus_state, result_dim, result_tmp, upargs->f_params);\n    if (ret != 0) {\n        goto out;\n    }\n    for (m = 0; m < result_dim; m++) {\n        result[m] += result_tmp[m];\n    }\nout:\n    return ret;\n}\n\n/* Abstracts the running of node and branch stats where the summary function\n * is run twice when non-polarised. We replace the call to the input summary\n * function with a call of the required form when non-polarised, simplifying\n * the implementation and memory management for the node and branch stats.\n */\nstatic int\ntsk_polarisable_func_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,\n    void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    bool stat_branch = !!(options & TSK_STAT_BRANCH);\n    bool polarised = options & TSK_STAT_POLARISED;\n    general_stat_func_t *wrapped_f = f;\n    void *wrapped_f_params = f_params;\n    const double *weight_u;\n    unpolarised_summary_func_args upargs;\n    tsk_size_t j, k;\n\n    tsk_memset(&upargs, 0, sizeof(upargs));\n    if (!polarised) {\n        upargs.f = f;\n        upargs.f_params = f_params;\n        upargs.total_weight = tsk_calloc(state_dim, sizeof(double));\n        upargs.total_minus_state = tsk_calloc(state_dim, sizeof(double));\n        upargs.result_tmp = tsk_calloc(result_dim, sizeof(double));\n\n        if (upargs.total_weight == NULL || upargs.total_minus_state == NULL\n            || upargs.result_tmp == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n\n        /* Compute the total weight */\n        for (j = 0; j < self->num_samples; j++) {\n            weight_u = GET_2D_ROW(sample_weights, state_dim, j);\n            for (k = 0; k < state_dim; k++) {\n                upargs.total_weight[k] += weight_u[k];\n            }\n        }\n\n        wrapped_f = unpolarised_summary_func;\n        wrapped_f_params = &upargs;\n    }\n\n    if (stat_branch) {\n        ret = tsk_treeseq_branch_general_stat(self, state_dim, sample_weights,\n            result_dim, wrapped_f, wrapped_f_params, num_windows, windows, options,\n            result);\n    } else {\n        ret = tsk_treeseq_node_general_stat(self, state_dim, sample_weights, result_dim,\n            wrapped_f, wrapped_f_params, num_windows, windows, options, result);\n    }\nout:\n    tsk_safe_free(upargs.total_weight);\n    tsk_safe_free(upargs.total_minus_state);\n    tsk_safe_free(upargs.result_tmp);\n    return ret;\n}\n\nint\ntsk_treeseq_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,\n    void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    bool stat_site = !!(options & TSK_STAT_SITE);\n    bool stat_branch = !!(options & TSK_STAT_BRANCH);\n    bool stat_node = !!(options & TSK_STAT_NODE);\n    double default_windows[] = { 0, self->tables->sequence_length };\n    tsk_size_t row_size;\n\n    /* If no mode is specified, we default to site mode */\n    if (!(stat_site || stat_branch || stat_node)) {\n        stat_site = true;\n    }\n    /* It's an error to specify more than one mode */\n    if (stat_site + stat_branch + stat_node > 1) {\n        ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);\n        goto out;\n    }\n\n    if (state_dim < 1) {\n        ret = tsk_trace_error(TSK_ERR_BAD_STATE_DIMS);\n        goto out;\n    }\n    if (result_dim < 1) {\n        ret = tsk_trace_error(TSK_ERR_BAD_RESULT_DIMS);\n        goto out;\n    }\n    if (windows == NULL) {\n        num_windows = 1;\n        windows = default_windows;\n    } else {\n        ret = tsk_treeseq_check_windows(\n            self, num_windows, windows, TSK_REQUIRE_FULL_SPAN);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    if (stat_site) {\n        ret = tsk_treeseq_site_general_stat(self, state_dim, sample_weights, result_dim,\n            f, f_params, num_windows, windows, options, result);\n    } else {\n        ret = tsk_polarisable_func_general_stat(self, state_dim, sample_weights,\n            result_dim, f, f_params, num_windows, windows, options, result);\n    }\n\n    if (options & TSK_STAT_SPAN_NORMALISE) {\n        row_size = result_dim;\n        if (stat_node) {\n            row_size = result_dim * tsk_treeseq_get_num_nodes(self);\n        }\n        span_normalise(num_windows, windows, row_size, result);\n    }\n\nout:\n    return ret;\n}\n\nstatic int\ncheck_set_indexes(\n    tsk_size_t num_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes)\n{\n    int ret = 0;\n    tsk_size_t j;\n\n    for (j = 0; j < num_set_indexes; j++) {\n        if (set_indexes[j] < 0 || set_indexes[j] >= (tsk_id_t) num_sets) {\n            ret = tsk_trace_error(TSK_ERR_BAD_SAMPLE_SET_INDEX);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ntsk_treeseq_check_sample_sets(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets)\n{\n    int ret = 0;\n    tsk_size_t j, k, l;\n    const tsk_id_t num_nodes = (tsk_id_t) self->tables->nodes.num_rows;\n    tsk_id_t u, sample_index;\n\n    if (num_sample_sets == 0) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n        goto out;\n    }\n    j = 0;\n    for (k = 0; k < num_sample_sets; k++) {\n        if (sample_set_sizes[k] == 0) {\n            ret = tsk_trace_error(TSK_ERR_EMPTY_SAMPLE_SET);\n            goto out;\n        }\n        for (l = 0; l < sample_set_sizes[k]; l++) {\n            u = sample_sets[j];\n            if (u < 0 || u >= num_nodes) {\n                ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n                goto out;\n            }\n            sample_index = self->sample_index_map[u];\n            if (sample_index == TSK_NULL) {\n                ret = tsk_trace_error(TSK_ERR_BAD_SAMPLES);\n                goto out;\n            }\n            j++;\n        }\n    }\nout:\n    return ret;\n}\n\ntypedef struct {\n    tsk_size_t num_samples;\n} weight_stat_params_t;\n\ntypedef struct {\n    tsk_size_t num_samples;\n    tsk_size_t num_covariates;\n    double *V;\n} covariates_stat_params_t;\n\ntypedef struct {\n    const tsk_id_t *sample_sets;\n    tsk_size_t num_sample_sets;\n    const tsk_size_t *sample_set_sizes;\n    const tsk_id_t *set_indexes;\n} sample_count_stat_params_t;\n\ntypedef struct {\n    tsk_size_t num_samples;\n    double *total_weights;\n    const tsk_id_t *index_tuples;\n} indexed_weight_stat_params_t;\n\nstatic int\ntsk_treeseq_sample_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t result_dim, const tsk_id_t *set_indexes, general_stat_func_t *f,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    const tsk_size_t num_samples = self->num_samples;\n    tsk_size_t j, k, l;\n    tsk_id_t u, sample_index;\n    double *weights = NULL;\n    double *weight_row;\n    sample_count_stat_params_t args = { .sample_sets = sample_sets,\n        .num_sample_sets = num_sample_sets,\n        .sample_set_sizes = sample_set_sizes,\n        .set_indexes = set_indexes };\n\n    ret = tsk_treeseq_check_sample_sets(\n        self, num_sample_sets, sample_set_sizes, sample_sets);\n    if (ret != 0) {\n        goto out;\n    }\n    weights = tsk_calloc(num_samples * num_sample_sets, sizeof(*weights));\n    if (weights == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    j = 0;\n    for (k = 0; k < num_sample_sets; k++) {\n        for (l = 0; l < sample_set_sizes[k]; l++) {\n            u = sample_sets[j];\n            sample_index = self->sample_index_map[u];\n            weight_row = GET_2D_ROW(weights, num_sample_sets, sample_index);\n            if (weight_row[k] != 0) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            weight_row[k] = 1;\n            j++;\n        }\n    }\n    ret = tsk_treeseq_general_stat(self, num_sample_sets, weights, result_dim, f, &args,\n        num_windows, windows, options, result);\nout:\n    tsk_safe_free(weights);\n    return ret;\n}\n\n/***********************************\n * Two Locus Statistics\n ***********************************/\n\nstatic int\nget_allele_samples(const tsk_site_t *site, tsk_size_t site_offset,\n    const tsk_bitset_t *state, tsk_bitset_t *out_allele_samples,\n    tsk_size_t *out_num_alleles)\n{\n    int ret = 0;\n    tsk_mutation_t mutation, parent_mut;\n    tsk_size_t mutation_index, allele, alt_allele, alt_allele_length;\n    /* The allele table */\n    tsk_size_t max_alleles = site->mutations_length + 1;\n    const char **alleles = tsk_malloc(max_alleles * sizeof(*alleles));\n    tsk_size_t *allele_lengths = tsk_calloc(max_alleles, sizeof(*allele_lengths));\n    const char *alt_allele_state;\n    tsk_size_t num_alleles = 1;\n\n    if (alleles == NULL || allele_lengths == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    tsk_bug_assert(state != NULL);\n    alleles[0] = site->ancestral_state;\n    allele_lengths[0] = site->ancestral_state_length;\n\n    for (mutation_index = 0; mutation_index < site->mutations_length; mutation_index++) {\n        mutation = site->mutations[mutation_index];\n        /* Compute the allele index for this derived state value. */\n        for (allele = 0; allele < num_alleles; allele++) {\n            if (mutation.derived_state_length == allele_lengths[allele]\n                && tsk_memcmp(\n                       mutation.derived_state, alleles[allele], allele_lengths[allele])\n                       == 0) {\n                break;\n            }\n        }\n        if (allele == num_alleles) {\n            tsk_bug_assert(allele < max_alleles);\n            alleles[allele] = mutation.derived_state;\n            allele_lengths[allele] = mutation.derived_state_length;\n            num_alleles++;\n        }\n\n        /* Add the mutation's samples to this allele */\n        tsk_bitset_union(\n            out_allele_samples, allele + site_offset, state, mutation_index);\n\n        /* Get the index for the alternate allele that we must subtract from */\n        alt_allele_state = site->ancestral_state;\n        alt_allele_length = site->ancestral_state_length;\n        if (mutation.parent != TSK_NULL) {\n            parent_mut = site->mutations[mutation.parent - site->mutations[0].id];\n            alt_allele_state = parent_mut.derived_state;\n            alt_allele_length = parent_mut.derived_state_length;\n        }\n        for (alt_allele = 0; alt_allele < num_alleles; alt_allele++) {\n            if (alt_allele_length == allele_lengths[alt_allele]\n                && tsk_memcmp(\n                       alt_allele_state, alleles[alt_allele], allele_lengths[alt_allele])\n                       == 0) {\n                break;\n            }\n        }\n        tsk_bug_assert(allele < num_alleles);\n\n        tsk_bitset_subtract(out_allele_samples, alt_allele + site_offset,\n            out_allele_samples, allele + site_offset);\n    }\n    *out_num_alleles = num_alleles;\nout:\n    tsk_safe_free(alleles);\n    tsk_safe_free(allele_lengths);\n    return ret;\n}\n\nstatic int\nnorm_hap_weighted(tsk_size_t result_dim, const double *hap_weights,\n    tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *weight_row;\n    double n;\n    tsk_size_t k;\n\n    for (k = 0; k < result_dim; k++) {\n        weight_row = GET_2D_ROW(hap_weights, 3, k);\n        n = (double) args.sample_set_sizes[k];\n        result[k] = weight_row[0] / n;\n    }\n    return 0;\n}\n\nstatic int\nnorm_hap_weighted_ij(tsk_size_t result_dim, const double *hap_weights,\n    tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *weight_row;\n    double ni, nj, wAB_i, wAB_j;\n    tsk_id_t i, j;\n    tsk_size_t k;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        weight_row = GET_2D_ROW(hap_weights, 3, i);\n        wAB_i = weight_row[0];\n        weight_row = GET_2D_ROW(hap_weights, 3, j);\n        wAB_j = weight_row[0];\n\n        result[k] = (wAB_i + wAB_j) / (ni + nj);\n    }\n\n    return 0;\n}\n\nstatic int\nnorm_total_weighted(tsk_size_t result_dim, const double *TSK_UNUSED(hap_weights),\n    tsk_size_t n_a, tsk_size_t n_b, double *result, void *TSK_UNUSED(params))\n{\n    tsk_size_t k;\n    double norm = 1 / (double) (n_a * n_b);\n\n    for (k = 0; k < result_dim; k++) {\n        result[k] = norm;\n    }\n    return 0;\n}\n\nstatic void\nget_all_samples_bits(tsk_bitset_t *all_samples, tsk_size_t n)\n{\n    tsk_size_t i;\n    const tsk_bitset_val_t all = ~((tsk_bitset_val_t) 0);\n    const tsk_bitset_val_t remainder_samples = n % TSK_BITSET_BITS;\n\n    all_samples->data[all_samples->row_len - 1]\n        = remainder_samples ? ~(all << remainder_samples) : all;\n    for (i = 0; i < all_samples->row_len - 1; i++) {\n        all_samples->data[i] = all;\n    }\n}\n\n// Stores the intermediate values for computing two-locus statistics.\ntypedef struct {\n    double *weights;\n    double *norm;\n    double *result_tmp;\n    tsk_bitset_t AB_samples;\n} two_locus_work_t;\n\nstatic int\ntwo_locus_work_init(tsk_size_t max_alleles, tsk_size_t num_samples,\n    tsk_size_t result_dim, tsk_size_t state_dim, two_locus_work_t *out)\n{\n    int ret = 0;\n\n    out->weights = tsk_malloc(3 * state_dim * sizeof(*out->weights));\n    out->norm = tsk_malloc(result_dim * sizeof(*out->norm));\n    out->result_tmp\n        = tsk_malloc(result_dim * max_alleles * max_alleles * sizeof(*out->result_tmp));\n    tsk_memset(&out->AB_samples, 0, sizeof(out->AB_samples));\n    if (out->weights == NULL || out->norm == NULL || out->result_tmp == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_bitset_init(&out->AB_samples, num_samples, 1);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic void\ntwo_locus_work_free(two_locus_work_t *work)\n{\n    tsk_safe_free(work->weights);\n    tsk_safe_free(work->norm);\n    tsk_safe_free(work->result_tmp);\n    tsk_bitset_free(&work->AB_samples);\n}\n\nstatic int\ncompute_general_normed_two_site_stat_result(const tsk_bitset_t *state,\n    const tsk_size_t *allele_counts, tsk_size_t a_off, tsk_size_t b_off,\n    tsk_size_t num_a_alleles, tsk_size_t num_b_alleles, tsk_size_t state_dim,\n    tsk_size_t result_dim, general_stat_func_t *f, sample_count_stat_params_t *f_params,\n    norm_func_t *norm_f, bool polarised, two_locus_work_t *restrict work, double *result)\n{\n    int ret = 0;\n    // Sample sets and b sites are rows, a sites are columns\n    //       b1           b2           b3\n    // a1   [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]\n    // a2   [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]\n    // a3   [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]\n    tsk_size_t k, mut_a, mut_b, result_row_len = num_b_alleles * result_dim;\n    uint8_t is_polarised = polarised ? 1 : 0;\n    double *restrict hap_row, *restrict result_tmp_row;\n    double *restrict norm = work->norm;\n    double *restrict weights = work->weights;\n    double *restrict result_tmp = work->result_tmp;\n    tsk_bitset_t AB_samples = work->AB_samples;\n\n    for (mut_a = is_polarised; mut_a < num_a_alleles; mut_a++) {\n        result_tmp_row = GET_2D_ROW(result_tmp, result_row_len, mut_a);\n        for (mut_b = is_polarised; mut_b < num_b_alleles; mut_b++) {\n            for (k = 0; k < state_dim; k++) {\n                tsk_bitset_intersect(state, a_off + (mut_a * state_dim) + k, state,\n                    b_off + (mut_b * state_dim) + k, &AB_samples);\n                hap_row = GET_2D_ROW(weights, 3, k);\n                hap_row[0] = (double) tsk_bitset_count(&AB_samples, 0);\n                hap_row[1] = (double) allele_counts[a_off + (mut_a * state_dim) + k]\n                             - hap_row[0];\n                hap_row[2] = (double) allele_counts[b_off + (mut_b * state_dim) + k]\n                             - hap_row[0];\n            }\n            ret = f(state_dim, weights, result_dim, result_tmp_row, f_params);\n            if (ret != 0) {\n                goto out;\n            }\n            ret = norm_f(result_dim, weights, num_a_alleles - is_polarised,\n                num_b_alleles - is_polarised, norm, f_params);\n            if (ret != 0) {\n                goto out;\n            }\n            for (k = 0; k < result_dim; k++) {\n                result[k] += result_tmp_row[k] * norm[k];\n            }\n            result_tmp_row += result_dim; // Advance to the next column\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ncompute_general_two_site_stat_result(const tsk_bitset_t *state,\n    const tsk_size_t *allele_counts, tsk_size_t a_off, tsk_size_t b_off,\n    tsk_size_t state_dim, tsk_size_t result_dim, general_stat_func_t *f,\n    sample_count_stat_params_t *f_params, two_locus_work_t *restrict work,\n    double *result)\n{\n    int ret = 0;\n    tsk_size_t k;\n    tsk_bitset_t AB_samples = work->AB_samples;\n    tsk_size_t mut_a = 1, mut_b = 1;\n    double *restrict hap_row, *restrict weights = work->weights;\n\n    for (k = 0; k < state_dim; k++) {\n        tsk_bitset_intersect(state, a_off + (mut_a * state_dim) + k, state,\n            b_off + (mut_b * state_dim) + k, &AB_samples);\n        hap_row = GET_2D_ROW(weights, 3, k);\n        hap_row[0] = (double) tsk_bitset_count(&AB_samples, 0);\n        hap_row[1]\n            = (double) allele_counts[a_off + (mut_a * state_dim) + k] - hap_row[0];\n        hap_row[2]\n            = (double) allele_counts[b_off + (mut_b * state_dim) + k] - hap_row[0];\n    }\n    ret = f(state_dim, weights, result_dim, result, f_params);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic void\nget_site_row_col_indices(tsk_size_t n_rows, const tsk_id_t *row_sites, tsk_size_t n_cols,\n    const tsk_id_t *col_sites, tsk_id_t *sites, tsk_size_t *n_sites, tsk_size_t *row_idx,\n    tsk_size_t *col_idx)\n{\n    tsk_size_t r = 0, c = 0, s = 0;\n\n    // Iterate rows and columns until we've exhaused one of the lists\n    while ((r < n_rows) && (c < n_cols)) {\n        if (row_sites[r] < col_sites[c]) {\n            sites[s] = row_sites[r];\n            row_idx[r] = s;\n            s++;\n            r++;\n        } else if (col_sites[c] < row_sites[r]) {\n            sites[s] = col_sites[c];\n            col_idx[c] = s;\n            s++;\n            c++;\n        } else { // row == col\n            sites[s] = row_sites[r];\n            col_idx[c] = s;\n            row_idx[r] = s;\n            s++;\n            r++;\n            c++;\n        }\n    }\n\n    // If there are any items remaining in the other list, drain it\n    while (r < n_rows) {\n        sites[s] = row_sites[r];\n        row_idx[r] = s;\n        s++;\n        r++;\n    }\n    while (c < n_cols) {\n        sites[s] = col_sites[c];\n        col_idx[c] = s;\n        s++;\n        c++;\n    }\n    *n_sites = s;\n}\n\nstatic int\nget_mutation_samples(const tsk_treeseq_t *ts, const tsk_id_t *sites, tsk_size_t n_sites,\n    tsk_size_t *num_alleles, tsk_bitset_t *allele_samples)\n{\n    int ret = 0;\n    const tsk_flags_t *restrict flags = ts->tables->nodes.flags;\n    const tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    const tsk_size_t *restrict site_muts_len = ts->site_mutations_length;\n    tsk_site_t site;\n    tsk_tree_t tree;\n    tsk_bitset_t all_samples_bits, mut_samples;\n    tsk_size_t max_muts_len, site_offset, num_nodes, site_idx, s, m, n;\n    tsk_id_t node, *nodes = NULL;\n    void *tmp_nodes;\n\n    tsk_memset(&mut_samples, 0, sizeof(mut_samples));\n    tsk_memset(&all_samples_bits, 0, sizeof(all_samples_bits));\n\n    max_muts_len = 0;\n    for (s = 0; s < n_sites; s++) {\n        max_muts_len = TSK_MAX(site_muts_len[sites[s]], max_muts_len);\n    }\n    // Allocate a bit array of size max alleles for all sites\n    ret = tsk_bitset_init(&mut_samples, num_samples, max_muts_len);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_bitset_init(&all_samples_bits, num_samples, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    get_all_samples_bits(&all_samples_bits, num_samples);\n    ret = tsk_tree_init(&tree, ts, TSK_NO_SAMPLE_COUNTS);\n    if (ret != 0) {\n        goto out;\n    }\n\n    // For each mutation within each site, perform one preorder traversal to gather\n    // the samples under each mutation's node.\n    site_offset = 0;\n    for (site_idx = 0; site_idx < n_sites; site_idx++) {\n        tsk_treeseq_get_site(ts, sites[site_idx], &site);\n        ret = tsk_tree_seek(&tree, site.position, 0);\n        if (ret != 0) {\n            goto out;\n        }\n        tmp_nodes = tsk_realloc(nodes, tsk_tree_get_size_bound(&tree) * sizeof(*nodes));\n        if (tmp_nodes == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        nodes = tmp_nodes;\n        tsk_bitset_union(allele_samples, site_offset, &all_samples_bits, 0);\n        // Zero out results before the start of each iteration\n        tsk_memset(mut_samples.data, 0,\n            mut_samples.row_len * max_muts_len * sizeof(tsk_bitset_val_t));\n        for (m = 0; m < site.mutations_length; m++) {\n            node = site.mutations[m].node;\n            ret = tsk_tree_preorder_from(&tree, node, nodes, &num_nodes);\n            if (ret != 0) {\n                goto out;\n            }\n            for (n = 0; n < num_nodes; n++) {\n                node = nodes[n];\n                if (flags[node] & TSK_NODE_IS_SAMPLE) {\n                    tsk_bitset_set_bit(\n                        &mut_samples, m, (tsk_bitset_val_t) ts->sample_index_map[node]);\n                }\n            }\n        }\n        get_allele_samples(\n            &site, site_offset, &mut_samples, allele_samples, &(num_alleles[site_idx]));\n        site_offset += site.mutations_length + 1;\n    }\n// if adding code below, check ret before continuing\nout:\n    tsk_safe_free(nodes);\n    tsk_tree_free(&tree);\n    tsk_bitset_free(&mut_samples);\n    tsk_bitset_free(&all_samples_bits);\n    return ret == TSK_TREE_OK ? 0 : ret;\n}\n\n// Given the samples under each allele's node and the sample sets, get the samples under\n// each allele's node for each sample set. We pack this data into a bitset\n// (`allele_sample_sets`) that is size m x n, where m is (n_alleles * num_sample_sets)\n// and n is the size of the largest sample set. In addition, we compute the number of\n// samples contained in the intersection of each allele's samples and each sample set in\n// an array (`allele_sample_sets`) of length (n_alleles * num_sample_sets).\nstatic void\nget_mutation_sample_sets(const tsk_bitset_t *allele_samples, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    const tsk_id_t *sample_index_map, tsk_bitset_t *allele_sample_sets,\n    tsk_size_t *allele_sample_set_counts)\n{\n    tsk_bitset_val_t k, sample;\n    tsk_size_t i, j, ss_off;\n\n    for (i = 0; i < allele_samples->len; i++) {\n        ss_off = 0;\n        for (j = 0; j < num_sample_sets; j++) {\n            for (k = 0; k < sample_set_sizes[j]; k++) {\n                sample = (tsk_bitset_val_t) sample_index_map[sample_sets[k + ss_off]];\n                if (tsk_bitset_contains(allele_samples, i, sample)) {\n                    tsk_bitset_set_bit(allele_sample_sets, j + i * num_sample_sets, k);\n                    allele_sample_set_counts[j + i * num_sample_sets]++;\n                }\n            }\n            ss_off += sample_set_sizes[j];\n        }\n    }\n}\n\nstatic int\ntsk_treeseq_two_site_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f,\n    sample_count_stat_params_t *f_params, norm_func_t *norm_f, tsk_size_t n_rows,\n    const tsk_id_t *row_sites, tsk_size_t n_cols, const tsk_id_t *col_sites,\n    tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    tsk_bitset_t allele_samples, allele_sample_sets;\n    bool polarised = options & TSK_STAT_POLARISED;\n    tsk_id_t *sites;\n    tsk_size_t i, j, n_sites, *row_idx, *col_idx;\n    double *result_row;\n    const tsk_size_t num_samples = self->num_samples;\n    tsk_size_t *num_alleles = NULL, *site_offsets = NULL, *allele_counts = NULL;\n    tsk_size_t result_row_len = n_cols * result_dim;\n    tsk_size_t max_ss_size = 0, max_alleles = 0, n_alleles = 0;\n    two_locus_work_t work;\n\n    tsk_memset(&work, 0, sizeof(work));\n    tsk_memset(&allele_samples, 0, sizeof(allele_samples));\n    tsk_memset(&allele_sample_sets, 0, sizeof(allele_sample_sets));\n    sites = tsk_malloc(self->tables->sites.num_rows * sizeof(*sites));\n    row_idx = tsk_malloc(self->tables->sites.num_rows * sizeof(*row_idx));\n    col_idx = tsk_malloc(self->tables->sites.num_rows * sizeof(*col_idx));\n    if (sites == NULL || row_idx == NULL || col_idx == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    get_site_row_col_indices(\n        n_rows, row_sites, n_cols, col_sites, sites, &n_sites, row_idx, col_idx);\n    // depends on n_sites\n    num_alleles = tsk_malloc(n_sites * sizeof(*num_alleles));\n    site_offsets = tsk_malloc(n_sites * sizeof(*site_offsets));\n    if (num_alleles == NULL || site_offsets == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (i = 0; i < n_sites; i++) {\n        site_offsets[i] = n_alleles * num_sample_sets;\n        n_alleles += self->site_mutations_length[sites[i]] + 1;\n        max_alleles = TSK_MAX(self->site_mutations_length[sites[i]], max_alleles);\n    }\n    max_alleles++; // add 1 for the ancestral allele\n    // depends on n_alleles\n    ret = tsk_bitset_init(&allele_samples, num_samples, n_alleles);\n    if (ret != 0) {\n        goto out;\n    }\n    for (i = 0; i < num_sample_sets; i++) {\n        max_ss_size = TSK_MAX(sample_set_sizes[i], max_ss_size);\n    }\n    // depend on n_alleles and max_ss_size\n    ret = tsk_bitset_init(&allele_sample_sets, max_ss_size, n_alleles * num_sample_sets);\n    if (ret != 0) {\n        goto out;\n    }\n    allele_counts = tsk_calloc(n_alleles * num_sample_sets, sizeof(*allele_counts));\n    if (allele_counts == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    // depends on max_ss_size and max_alleles\n    ret = two_locus_work_init(max_alleles, max_ss_size, result_dim, state_dim, &work);\n    if (ret != 0) {\n        goto out;\n    }\n    // we track the number of alleles to account for backmutations\n    ret = get_mutation_samples(self, sites, n_sites, num_alleles, &allele_samples);\n    if (ret != 0) {\n        goto out;\n    }\n    get_mutation_sample_sets(&allele_samples, num_sample_sets, sample_set_sizes,\n        sample_sets, self->sample_index_map, &allele_sample_sets, allele_counts);\n    // For each row/column pair, fill in the sample set in the result matrix.\n    for (i = 0; i < n_rows; i++) {\n        result_row = GET_2D_ROW(result, result_row_len, i);\n        for (j = 0; j < n_cols; j++) {\n            if (num_alleles[row_idx[i]] == 2 && num_alleles[col_idx[j]] == 2) {\n                // both sites are biallelic\n                ret = compute_general_two_site_stat_result(&allele_sample_sets,\n                    allele_counts, site_offsets[row_idx[i]], site_offsets[col_idx[j]],\n                    state_dim, result_dim, f, f_params, &work,\n                    &(result_row[j * result_dim]));\n            } else {\n                // at least one site is multiallelic\n                ret = compute_general_normed_two_site_stat_result(&allele_sample_sets,\n                    allele_counts, site_offsets[row_idx[i]], site_offsets[col_idx[j]],\n                    num_alleles[row_idx[i]], num_alleles[col_idx[j]], state_dim,\n                    result_dim, f, f_params, norm_f, polarised, &work,\n                    &(result_row[j * result_dim]));\n            }\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\n\nout:\n    tsk_safe_free(sites);\n    tsk_safe_free(row_idx);\n    tsk_safe_free(col_idx);\n    tsk_safe_free(num_alleles);\n    tsk_safe_free(site_offsets);\n    tsk_safe_free(allele_counts);\n    two_locus_work_free(&work);\n    tsk_bitset_free(&allele_samples);\n    tsk_bitset_free(&allele_sample_sets);\n    return ret;\n}\n\nstatic int\nsample_sets_to_bitset(const tsk_treeseq_t *self, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_sample_sets,\n    tsk_bitset_t *sample_sets_bits)\n{\n    int ret;\n    tsk_size_t j, k, l;\n    tsk_id_t u, sample_index;\n\n    ret = tsk_bitset_init(sample_sets_bits, self->num_samples, num_sample_sets);\n    if (ret != 0) {\n        return ret;\n    }\n    j = 0;\n    for (k = 0; k < num_sample_sets; k++) {\n        for (l = 0; l < sample_set_sizes[k]; l++) {\n            u = sample_sets[j];\n            sample_index = self->sample_index_map[u];\n            if (tsk_bitset_contains(\n                    sample_sets_bits, k, (tsk_bitset_val_t) sample_index)) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            tsk_bitset_set_bit(sample_sets_bits, k, (tsk_bitset_val_t) sample_index);\n            j++;\n        }\n    }\n\nout:\n    return ret;\n}\n\nstatic int\ncheck_sites(const tsk_id_t *sites, tsk_size_t num_sites, tsk_size_t num_site_rows)\n{\n    int ret = 0;\n    tsk_size_t i;\n\n    if (num_sites == 0) {\n        return ret; // No need to verify sites if there aren't any\n    }\n\n    for (i = 0; i < num_sites - 1; i++) {\n        if (sites[i] < 0 || sites[i] >= (tsk_id_t) num_site_rows) {\n            ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (sites[i] > sites[i + 1]) {\n            ret = tsk_trace_error(TSK_ERR_STAT_UNSORTED_SITES);\n            goto out;\n        }\n        if (sites[i] == sites[i + 1]) {\n            ret = tsk_trace_error(TSK_ERR_STAT_DUPLICATE_SITES);\n            goto out;\n        }\n    }\n    // check the last value\n    if (sites[i] < 0 || sites[i] >= (tsk_id_t) num_site_rows) {\n        ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ncheck_positions(\n    const double *positions, tsk_size_t num_positions, double sequence_length)\n{\n    int ret = 0;\n    tsk_size_t i;\n\n    if (num_positions == 0) {\n        return ret; // No need to verify positions if there aren't any\n    }\n\n    for (i = 0; i < num_positions - 1; i++) {\n        if (positions[i] < 0 || positions[i] >= sequence_length) {\n            ret = tsk_trace_error(TSK_ERR_POSITION_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (positions[i] > positions[i + 1]) {\n            ret = tsk_trace_error(TSK_ERR_STAT_UNSORTED_POSITIONS);\n            goto out;\n        }\n        if (positions[i] == positions[i + 1]) {\n            ret = tsk_trace_error(TSK_ERR_STAT_DUPLICATE_POSITIONS);\n            goto out;\n        }\n    }\n    // check bounds of last value\n    if (positions[i] < 0 || positions[i] >= sequence_length) {\n        ret = tsk_trace_error(TSK_ERR_POSITION_OUT_OF_BOUNDS);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\npositions_to_tree_indexes(const tsk_treeseq_t *ts, const double *positions,\n    tsk_size_t num_positions, tsk_id_t **tree_indexes)\n{\n    int ret = 0;\n    tsk_id_t tree_index = 0;\n    tsk_size_t i, num_trees = ts->num_trees;\n\n    // This is tricky. If there are 0 positions, we calloc a size of 1\n    // we must calloc, because memset will have no effect when called with size 0\n    *tree_indexes = tsk_calloc(num_positions, sizeof(*tree_indexes));\n    if (tree_indexes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(*tree_indexes, TSK_NULL, num_positions * sizeof(**tree_indexes));\n    for (i = 0; i < num_positions; i++) {\n        while (ts->breakpoints[tree_index + 1] <= positions[i]) {\n            tree_index++;\n        }\n        (*tree_indexes)[i] = tree_index;\n    }\n    tsk_bug_assert(tree_index <= (tsk_id_t) (num_trees - 1));\n\nout:\n    return ret;\n}\n\nstatic int\nget_index_counts(\n    const tsk_id_t *indexes, tsk_size_t num_indexes, tsk_size_t **out_counts)\n{\n    int ret = 0;\n    tsk_id_t index = indexes[0];\n    tsk_size_t count, i;\n    tsk_size_t *counts = tsk_calloc(\n        (tsk_size_t) (indexes[num_indexes ? num_indexes - 1 : 0] - indexes[0] + 1),\n        sizeof(*counts));\n    if (counts == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    count = 1;\n    for (i = 1; i < num_indexes; i++) {\n        if (indexes[i] == indexes[i - 1]) {\n            count++;\n        } else {\n            counts[index - indexes[0]] = count;\n            count = 1;\n            index = indexes[i];\n        }\n    }\n    counts[index - indexes[0]] = count;\n    *out_counts = counts;\nout:\n    return ret;\n}\n\ntypedef struct {\n    tsk_tree_t tree;\n    tsk_bitset_t *node_samples;\n    tsk_id_t *parent;\n    tsk_id_t *edges_out;\n    tsk_id_t *edges_in;\n    double *branch_len;\n    tsk_size_t n_edges_out;\n    tsk_size_t n_edges_in;\n} iter_state;\n\nstatic int\niter_state_init(iter_state *self, const tsk_treeseq_t *ts, tsk_size_t state_dim)\n{\n    int ret = 0;\n    const tsk_size_t num_nodes = ts->tables->nodes.num_rows;\n\n    ret = tsk_tree_init(&self->tree, ts, TSK_NO_SAMPLE_COUNTS);\n    if (ret != 0) {\n        goto out;\n    }\n    self->node_samples = tsk_calloc(1, sizeof(*self->node_samples));\n    if (self->node_samples == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_bitset_init(self->node_samples, ts->num_samples, state_dim * num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n    self->parent = tsk_malloc(num_nodes * sizeof(*self->parent));\n    self->edges_out = tsk_malloc(num_nodes * sizeof(*self->edges_out));\n    self->edges_in = tsk_malloc(num_nodes * sizeof(*self->edges_in));\n    self->branch_len = tsk_calloc(num_nodes, sizeof(*self->branch_len));\n    if (self->parent == NULL || self->edges_out == NULL || self->edges_in == NULL\n        || self->branch_len == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nget_node_samples(const tsk_treeseq_t *ts, tsk_size_t state_dim,\n    const tsk_bitset_t *sample_sets, tsk_bitset_t *node_samples)\n{\n    int ret = 0;\n    tsk_size_t n, k;\n    tsk_size_t num_nodes = ts->tables->nodes.num_rows;\n    tsk_bitset_val_t sample;\n    const tsk_id_t *restrict sample_index_map = ts->sample_index_map;\n    const tsk_flags_t *restrict flags = ts->tables->nodes.flags;\n\n    ret = tsk_bitset_init(node_samples, ts->num_samples, num_nodes * state_dim);\n    if (ret != 0) {\n        goto out;\n    }\n    for (k = 0; k < state_dim; k++) {\n        for (n = 0; n < num_nodes; n++) {\n            if (flags[n] & TSK_NODE_IS_SAMPLE) {\n                sample = (tsk_bitset_val_t) sample_index_map[n];\n                if (tsk_bitset_contains(sample_sets, k, sample)) {\n                    tsk_bitset_set_bit(node_samples, (state_dim * n) + k, sample);\n                }\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic void\niter_state_clear(iter_state *self, tsk_size_t state_dim, tsk_size_t num_nodes,\n    const tsk_bitset_t *node_samples)\n{\n    self->n_edges_out = 0;\n    self->n_edges_in = 0;\n    tsk_tree_clear(&self->tree);\n    tsk_memset(self->parent, TSK_NULL, num_nodes * sizeof(*self->parent));\n    tsk_memset(self->edges_out, TSK_NULL, num_nodes * sizeof(*self->edges_out));\n    tsk_memset(self->edges_in, TSK_NULL, num_nodes * sizeof(*self->edges_in));\n    tsk_memset(self->branch_len, 0, num_nodes * sizeof(*self->branch_len));\n    tsk_memcpy(self->node_samples->data, node_samples->data,\n        node_samples->row_len * state_dim * num_nodes * sizeof(*node_samples->data));\n}\n\nstatic void\niter_state_free(iter_state *self)\n{\n    tsk_tree_free(&self->tree);\n    tsk_bitset_free(self->node_samples);\n    tsk_safe_free(self->node_samples);\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->edges_out);\n    tsk_safe_free(self->edges_in);\n    tsk_safe_free(self->branch_len);\n}\n\nstatic int\nadvance_collect_edges(iter_state *s, tsk_id_t index)\n{\n    int ret = 0;\n    tsk_id_t j, e;\n    tsk_size_t i;\n    double left, right;\n    tsk_tree_position_t pos;\n    tsk_tree_t *tree = &s->tree;\n    const double *restrict edge_left = tree->tree_sequence->tables->edges.left;\n    const double *restrict edge_right = tree->tree_sequence->tables->edges.right;\n\n    // Either we're seeking forward one step from some nonzero position in the tree, or\n    // from the beginning of the tree sequence.\n    if (tree->index != TSK_NULL || index == 0) {\n        ret = tsk_tree_next(tree);\n        if (ret < 0) {\n            goto out;\n        }\n        pos = tree->tree_pos;\n        i = 0;\n        for (j = pos.out.start; j != pos.out.stop; j++) {\n            s->edges_out[i] = pos.out.order[j];\n            i++;\n        }\n        s->n_edges_out = i;\n        i = 0;\n        for (j = pos.in.start; j != pos.in.stop; j++) {\n            s->edges_in[i] = pos.in.order[j];\n            i++;\n        }\n        s->n_edges_in = i;\n    } else {\n        // Seek from an arbitrary nonzero position from an uninitialized tree.\n        tsk_bug_assert(tree->index == -1);\n        ret = tsk_tree_seek_index(tree, index, 0);\n        if (ret < 0) {\n            goto out;\n        }\n        pos = tree->tree_pos;\n        i = 0;\n        if (pos.direction == TSK_DIR_FORWARD) {\n            left = pos.interval.left;\n            for (j = pos.in.start; j != pos.in.stop; j++) {\n                e = pos.in.order[j];\n                if (edge_left[e] <= left && left < edge_right[e]) {\n                    s->edges_in[i] = pos.in.order[j];\n                    i++;\n                }\n            }\n        } else {\n            right = pos.interval.right;\n            for (j = pos.in.start; j != pos.in.stop; j--) {\n                e = pos.in.order[j];\n                if (edge_right[e] >= right && right > edge_left[e]) {\n                    s->edges_in[i] = pos.in.order[j];\n                    i++;\n                }\n            }\n        }\n        s->n_edges_out = 0;\n        s->n_edges_in = i;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\ncompute_two_tree_branch_state_update(const tsk_treeseq_t *ts, tsk_id_t c,\n    const iter_state *A_state, const iter_state *B_state, tsk_size_t state_dim,\n    tsk_size_t result_dim, int sign, general_stat_func_t *f,\n    sample_count_stat_params_t *f_params, two_locus_work_t *restrict work,\n    double *result)\n{\n    int ret = 0;\n    double a_len, b_len;\n    double *restrict B_branch_len = B_state->branch_len;\n    double *weights_row;\n    tsk_size_t n, k, a_row, b_row;\n    const double *restrict A_branch_len = A_state->branch_len;\n    const tsk_bitset_t *restrict A_state_samples = A_state->node_samples;\n    const tsk_bitset_t *restrict B_state_samples = B_state->node_samples;\n    tsk_size_t num_nodes = ts->tables->nodes.num_rows;\n    double *weights = work->weights;\n    double *result_tmp = work->result_tmp;\n    tsk_bitset_t AB_samples = work->AB_samples;\n\n    b_len = B_branch_len[c] * sign;\n    if (b_len == 0) {\n        return ret;\n    }\n    for (n = 0; n < num_nodes; n++) {\n        a_len = A_branch_len[n];\n        if (a_len == 0) {\n            continue;\n        }\n        for (k = 0; k < state_dim; k++) {\n            a_row = (state_dim * n) + k;\n            b_row = (state_dim * (tsk_size_t) c) + k;\n            weights_row = GET_2D_ROW(weights, 3, k);\n            tsk_bitset_intersect(\n                A_state_samples, a_row, B_state_samples, b_row, &AB_samples);\n            weights_row[0] = (double) tsk_bitset_count(&AB_samples, 0);\n            weights_row[1]\n                = (double) tsk_bitset_count(A_state_samples, a_row) - weights_row[0];\n            weights_row[2]\n                = (double) tsk_bitset_count(B_state_samples, b_row) - weights_row[0];\n        }\n        ret = f(state_dim, weights, result_dim, result_tmp, f_params);\n        if (ret != 0) {\n            goto out;\n        }\n        for (k = 0; k < result_dim; k++) {\n            result[k] += result_tmp[k] * a_len * b_len;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ncompute_two_tree_branch_stat(const tsk_treeseq_t *ts, const iter_state *l_state,\n    iter_state *r_state, general_stat_func_t *f, sample_count_stat_params_t *f_params,\n    tsk_size_t result_dim, tsk_size_t state_dim, double *result)\n{\n    int ret = 0;\n    tsk_id_t e, c, ec, p, *updated_nodes = NULL;\n    tsk_size_t j, k, n_updates;\n    const double *restrict time = ts->tables->nodes.time;\n    const tsk_id_t *restrict edges_child = ts->tables->edges.child;\n    const tsk_id_t *restrict edges_parent = ts->tables->edges.parent;\n    const tsk_size_t num_nodes = ts->tables->nodes.num_rows;\n    tsk_bitset_t updates, *r_samples = r_state->node_samples;\n    two_locus_work_t work;\n\n    tsk_memset(&work, 0, sizeof(work));\n    tsk_memset(&updates, 0, sizeof(updates));\n    // only two alleles are possible for branch stats\n    ret = two_locus_work_init(2, ts->num_samples, result_dim, state_dim, &work);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_bitset_init(&updates, num_nodes, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    updated_nodes = tsk_calloc(num_nodes, sizeof(*updated_nodes));\n    if (updated_nodes == NULL) {\n        ret = TSK_ERR_NO_MEMORY;\n        goto out;\n    }\n    // Identify modified nodes both added and removed\n    for (j = 0; j < r_state->n_edges_out + r_state->n_edges_in; j++) {\n        e = j < r_state->n_edges_out ? r_state->edges_out[j]\n                                     : r_state->edges_in[j - r_state->n_edges_out];\n        p = edges_parent[e];\n        c = edges_child[e];\n        // Identify affected nodes above child\n        while (p != TSK_NULL) {\n            tsk_bitset_set_bit(&updates, 0, (tsk_bitset_val_t) c);\n            c = p;\n            p = r_state->parent[p];\n        }\n    }\n    // Subtract the whole contribution from the child node\n    tsk_bitset_get_items(&updates, 0, updated_nodes, &n_updates);\n    while (n_updates != 0) {\n        n_updates--;\n        c = updated_nodes[n_updates];\n        compute_two_tree_branch_state_update(ts, c, l_state, r_state, state_dim,\n            result_dim, -1, f, f_params, &work, result);\n    }\n    // Remove samples under nodes from removed edges to parent nodes\n    for (j = 0; j < r_state->n_edges_out; j++) {\n        e = r_state->edges_out[j];\n        p = edges_parent[e];\n        ec = edges_child[e]; // edge child\n        while (p != TSK_NULL) {\n            for (k = 0; k < state_dim; k++) {\n                tsk_bitset_subtract(r_samples, (state_dim * (tsk_size_t) p) + k,\n                    r_samples, (state_dim * (tsk_size_t) ec) + k);\n            }\n            p = r_state->parent[p];\n        }\n        r_state->branch_len[ec] = 0;\n        r_state->parent[ec] = TSK_NULL;\n    }\n    // Add samples under nodes from added edges\n    for (j = 0; j < r_state->n_edges_in; j++) {\n        e = r_state->edges_in[j];\n        p = edges_parent[e];\n        ec = c = edges_child[e];\n        r_state->branch_len[c] = time[p] - time[c];\n        r_state->parent[c] = p;\n        while (p != TSK_NULL) {\n            tsk_bitset_set_bit(&updates, 0, (tsk_bitset_val_t) c);\n            for (k = 0; k < state_dim; k++) {\n                tsk_bitset_union(r_samples, (state_dim * (tsk_size_t) p) + k, r_samples,\n                    (state_dim * (tsk_size_t) ec) + k);\n            }\n            c = p;\n            p = r_state->parent[p];\n        }\n    }\n    // Update all affected child nodes (fully subtracted, deferred from addition)\n    n_updates = 0;\n    tsk_bitset_get_items(&updates, 0, updated_nodes, &n_updates);\n    while (n_updates != 0) {\n        n_updates--;\n        c = updated_nodes[n_updates];\n        compute_two_tree_branch_state_update(ts, c, l_state, r_state, state_dim,\n            result_dim, +1, f, f_params, &work, result);\n    }\nout:\n    tsk_safe_free(updated_nodes);\n    two_locus_work_free(&work);\n    tsk_bitset_free(&updates);\n    return ret;\n}\n\nstatic int\ntsk_treeseq_two_branch_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f,\n    sample_count_stat_params_t *f_params, norm_func_t *TSK_UNUSED(norm_f),\n    tsk_size_t n_rows, const double *row_positions, tsk_size_t n_cols,\n    const double *col_positions, tsk_flags_t TSK_UNUSED(options), double *result)\n{\n    int ret = 0;\n    int r, c;\n    tsk_id_t *row_indexes = NULL, *col_indexes = NULL;\n    tsk_size_t i, j, k, row, col, *row_repeats = NULL, *col_repeats = NULL;\n    tsk_bitset_t node_samples, sample_sets_bits;\n    iter_state l_state, r_state;\n    double *result_tmp = NULL, *result_row;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n\n    tsk_memset(&sample_sets_bits, 0, sizeof(sample_sets_bits));\n    tsk_memset(&node_samples, 0, sizeof(node_samples));\n    tsk_memset(&l_state, 0, sizeof(l_state));\n    tsk_memset(&r_state, 0, sizeof(r_state));\n    result_tmp = tsk_malloc(result_dim * sizeof(*result_tmp));\n    if (result_tmp == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = iter_state_init(&l_state, self, state_dim);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = iter_state_init(&r_state, self, state_dim);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = sample_sets_to_bitset(\n        self, sample_set_sizes, sample_sets, num_sample_sets, &sample_sets_bits);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = positions_to_tree_indexes(self, row_positions, n_rows, &row_indexes);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = positions_to_tree_indexes(self, col_positions, n_cols, &col_indexes);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = get_index_counts(row_indexes, n_rows, &row_repeats);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = get_index_counts(col_indexes, n_cols, &col_repeats);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = get_node_samples(self, state_dim, &sample_sets_bits, &node_samples);\n    if (ret != 0) {\n        goto out;\n    }\n    iter_state_clear(&l_state, state_dim, num_nodes, &node_samples);\n    row = 0;\n    for (r = 0; r < (row_indexes[n_rows ? n_rows - 1U : 0] - row_indexes[0] + 1); r++) {\n        tsk_memset(result_tmp, 0, result_dim * sizeof(*result_tmp));\n        iter_state_clear(&r_state, state_dim, num_nodes, &node_samples);\n        ret = advance_collect_edges(&l_state, (tsk_id_t) r + row_indexes[0]);\n        if (ret != 0) {\n            goto out;\n        }\n        result_row = GET_2D_ROW(result, result_dim * n_cols, row);\n        ret = compute_two_tree_branch_stat(\n            self, &r_state, &l_state, f, f_params, result_dim, state_dim, result_tmp);\n        if (ret != 0) {\n            goto out;\n        }\n        col = 0;\n        for (c = 0; c < (col_indexes[n_cols ? n_cols - 1 : 0] - col_indexes[0] + 1);\n            c++) {\n            ret = advance_collect_edges(&r_state, (tsk_id_t) c + col_indexes[0]);\n            if (ret != 0) {\n                goto out;\n            }\n            ret = compute_two_tree_branch_stat(self, &l_state, &r_state, f, f_params,\n                result_dim, state_dim, result_tmp);\n            if (ret != 0) {\n                goto out;\n            }\n            for (i = 0; i < row_repeats[r]; i++) {\n                for (j = 0; j < col_repeats[c]; j++) {\n                    result_row = GET_2D_ROW(result, result_dim * n_cols, row + i);\n                    for (k = 0; k < result_dim; k++) {\n                        result_row[col + (j * result_dim) + k] = result_tmp[k];\n                    }\n                }\n            }\n            col += (col_repeats[c] * result_dim);\n        }\n        row += row_repeats[r];\n    }\nout:\n    tsk_safe_free(result_tmp);\n    tsk_safe_free(row_indexes);\n    tsk_safe_free(col_indexes);\n    tsk_safe_free(row_repeats);\n    tsk_safe_free(col_repeats);\n    iter_state_free(&l_state);\n    iter_state_free(&r_state);\n    tsk_bitset_free(&node_samples);\n    tsk_bitset_free(&sample_sets_bits);\n    return ret;\n}\n\nstatic int\ncheck_sample_set_dups(tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, const tsk_id_t *restrict sample_index_map,\n    tsk_size_t num_samples)\n{\n    int ret;\n    tsk_size_t j, k, l;\n    tsk_id_t u, sample_index;\n    tsk_bitset_t tmp;\n\n    tsk_memset(&tmp, 0, sizeof(tmp));\n    ret = tsk_bitset_init(&tmp, num_samples, 1);\n    if (ret != 0) {\n        goto out;\n    }\n    j = 0;\n    for (k = 0; k < num_sample_sets; k++) {\n        tsk_memset(tmp.data, 0, sizeof(*tmp.data) * tmp.row_len);\n        for (l = 0; l < sample_set_sizes[k]; l++) {\n            u = sample_sets[j];\n            sample_index = sample_index_map[u];\n            if (tsk_bitset_contains(&tmp, 0, (tsk_bitset_val_t) sample_index)) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            tsk_bitset_set_bit(&tmp, 0, (tsk_bitset_val_t) sample_index);\n            j++;\n        }\n    }\nout:\n    tsk_bitset_free(&tmp);\n    return ret;\n}\n\nint\ntsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t result_dim, const tsk_id_t *set_indexes, general_stat_func_t *f,\n    norm_func_t *norm_f, tsk_size_t out_rows, const tsk_id_t *row_sites,\n    const double *row_positions, tsk_size_t out_cols, const tsk_id_t *col_sites,\n    const double *col_positions, tsk_flags_t options, double *result)\n{\n    // TODO: generalize this function if we ever decide to do weighted two_locus stats.\n    //       We only implement count stats and therefore we don't handle weights.\n    int ret = 0;\n    bool stat_site = !!(options & TSK_STAT_SITE);\n    bool stat_branch = !!(options & TSK_STAT_BRANCH);\n    tsk_size_t state_dim = num_sample_sets;\n    sample_count_stat_params_t f_params = { .sample_sets = sample_sets,\n        .num_sample_sets = num_sample_sets,\n        .sample_set_sizes = sample_set_sizes,\n        .set_indexes = set_indexes };\n\n    // We do not support two-locus node stats\n    if (!!(options & TSK_STAT_NODE)) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);\n        goto out;\n    }\n    // If no mode is specified, we default to site mode\n    if (!(stat_site || stat_branch)) {\n        stat_site = true;\n    }\n    // It's an error to specify more than one mode\n    if (stat_site + stat_branch > 1) {\n        ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);\n        goto out;\n    }\n    ret = tsk_treeseq_check_sample_sets(\n        self, num_sample_sets, sample_set_sizes, sample_sets);\n    if (ret != 0) {\n        goto out;\n    }\n    if (result_dim < 1) {\n        ret = tsk_trace_error(TSK_ERR_BAD_RESULT_DIMS);\n        goto out;\n    }\n    if (stat_site) {\n        ret = check_sites(row_sites, out_rows, self->tables->sites.num_rows);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = check_sites(col_sites, out_cols, self->tables->sites.num_rows);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = check_sample_set_dups(num_sample_sets, sample_set_sizes, sample_sets,\n            self->sample_index_map, self->num_samples);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_treeseq_two_site_count_stat(self, state_dim, num_sample_sets,\n            sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows,\n            row_sites, out_cols, col_sites, options, result);\n    } else if (stat_branch) {\n        ret = check_positions(\n            row_positions, out_rows, tsk_treeseq_get_sequence_length(self));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = check_positions(\n            col_positions, out_cols, tsk_treeseq_get_sequence_length(self));\n        if (ret != 0) {\n            goto out;\n        }\n        ret = tsk_treeseq_two_branch_count_stat(self, state_dim, num_sample_sets,\n            sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows,\n            row_positions, out_cols, col_positions, options, result);\n    }\nout:\n    return ret;\n}\n\n/***********************************\n * Allele frequency spectrum\n ***********************************/\n\nstatic inline void\nfold(tsk_size_t *restrict coordinate, const tsk_size_t *restrict dims,\n    tsk_size_t num_dims)\n{\n    tsk_size_t k;\n    double n = 0;\n    int s = 0;\n\n    for (k = 0; k < num_dims; k++) {\n        tsk_bug_assert(coordinate[k] < dims[k]);\n        n += (double) dims[k] - 1;\n        s += (int) coordinate[k];\n    }\n    n /= 2;\n    k = num_dims;\n    while (s == n && k > 0) {\n        k--;\n        n -= ((double) (dims[k] - 1)) / 2;\n        s -= (int) coordinate[k];\n    }\n    if (s > n) {\n        for (k = 0; k < num_dims; k++) {\n            s = (int) (dims[k] - 1 - coordinate[k]);\n            tsk_bug_assert(s >= 0);\n            coordinate[k] = (tsk_size_t) s;\n        }\n    }\n}\n\nstatic int\ntsk_treeseq_update_site_afs(const tsk_treeseq_t *self, const tsk_site_t *site,\n    const double *total_counts, const double *counts, tsk_size_t num_sample_sets,\n    tsk_size_t window_index, tsk_size_t *result_dims, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    tsk_size_t afs_size;\n    tsk_size_t k, allele, num_alleles, all_samples;\n    double increment, *afs, *allele_counts, *allele_count;\n    tsk_size_t *coordinate = tsk_malloc(num_sample_sets * sizeof(*coordinate));\n    bool polarised = !!(options & TSK_STAT_POLARISED);\n    const tsk_size_t K = num_sample_sets + 1;\n\n    if (coordinate == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = get_allele_weights(\n        site, counts, K, total_counts, &num_alleles, &allele_counts);\n    if (ret != 0) {\n        goto out;\n    }\n\n    afs_size = result_dims[num_sample_sets];\n    afs = result + afs_size * window_index;\n\n    increment = polarised ? 1 : 0.5;\n    /* Sum over the allele weights. Skip the ancestral state if polarised. */\n    for (allele = polarised ? 1 : 0; allele < num_alleles; allele++) {\n        allele_count = GET_2D_ROW(allele_counts, K, allele);\n        all_samples = (tsk_size_t) allele_count[num_sample_sets];\n        if (all_samples > 0 && all_samples < self->num_samples) {\n            for (k = 0; k < num_sample_sets; k++) {\n                coordinate[k] = (tsk_size_t) allele_count[k];\n            }\n            if (!polarised) {\n                fold(coordinate, result_dims, num_sample_sets);\n            }\n            increment_nd_array_value(\n                afs, num_sample_sets, result_dims, coordinate, increment);\n        }\n    }\nout:\n    tsk_safe_free(coordinate);\n    tsk_safe_free(allele_counts);\n    return ret;\n}\n\nstatic int\ntsk_treeseq_site_allele_frequency_spectrum(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes, double *counts,\n    tsk_size_t num_windows, const double *windows, tsk_size_t *result_dims,\n    tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    tsk_id_t u, v;\n    tsk_size_t tree_site, tree_index, window_index;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    tsk_site_t *site;\n    tsk_id_t tj, tk, h;\n    tsk_size_t j;\n    const tsk_size_t K = num_sample_sets + 1;\n    double t_left, t_right;\n    double *total_counts = tsk_malloc((1 + num_sample_sets) * sizeof(*total_counts));\n\n    if (parent == NULL || total_counts == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n\n    for (j = 0; j < num_sample_sets; j++) {\n        total_counts[j] = (double) sample_set_sizes[j];\n    }\n    total_counts[num_sample_sets] = (double) self->num_samples;\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    t_left = 0;\n    tree_index = 0;\n    window_index = 0;\n    while (tj < num_edges || t_left < sequence_length) {\n        while (tk < num_edges && edge_right[O[tk]] == t_left) {\n            h = O[tk];\n            tk++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            while (v != TSK_NULL) {\n                update_state(counts, K, v, u, -1);\n                v = parent[v];\n            }\n            parent[u] = TSK_NULL;\n        }\n\n        while (tj < num_edges && edge_left[I[tj]] == t_left) {\n            h = I[tj];\n            tj++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            while (v != TSK_NULL) {\n                update_state(counts, K, v, u, +1);\n                v = parent[v];\n            }\n        }\n        t_right = sequence_length;\n        if (tj < num_edges) {\n            t_right = TSK_MIN(t_right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            t_right = TSK_MIN(t_right, edge_right[O[tk]]);\n        }\n\n        /* Update the sites */\n        for (tree_site = 0; tree_site < self->tree_sites_length[tree_index];\n            tree_site++) {\n            site = self->tree_sites[tree_index] + tree_site;\n            while (windows[window_index + 1] <= site->position) {\n                window_index++;\n                tsk_bug_assert(window_index < num_windows);\n            }\n            ret = tsk_treeseq_update_site_afs(self, site, total_counts, counts,\n                num_sample_sets, window_index, result_dims, options, result);\n            if (ret != 0) {\n                goto out;\n            }\n            tsk_bug_assert(windows[window_index] <= site->position);\n            tsk_bug_assert(site->position < windows[window_index + 1]);\n        }\n        tree_index++;\n        t_left = t_right;\n    }\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    tsk_safe_free(total_counts);\n    return ret;\n}\n\nstatic void\ntsk_treeseq_update_branch_afs(const tsk_treeseq_t *self, tsk_id_t u, double right,\n    double *restrict last_update, const double *restrict time, tsk_id_t *restrict parent,\n    tsk_size_t *restrict coordinate, const double *counts, tsk_size_t num_sample_sets,\n    tsk_size_t num_time_windows, const double *time_windows, tsk_size_t window_index,\n    const tsk_size_t *result_dims, tsk_flags_t options, double *result)\n{\n    tsk_size_t afs_size;\n    tsk_size_t k;\n    tsk_size_t time_window_index;\n    double *afs;\n    bool polarised = !!(options & TSK_STAT_POLARISED);\n    const double *count_row = GET_2D_ROW(counts, num_sample_sets + 1, u);\n    double x = 0;\n    double t_u, t_v;\n    double tw_branch_length = 0;\n    const tsk_size_t all_samples = (tsk_size_t) count_row[num_sample_sets];\n    if (parent[u] != TSK_NULL) {\n        t_u = time[u];\n        t_v = time[parent[u]];\n        if (0 < all_samples && all_samples < self->num_samples) {\n            time_window_index = 0;\n            afs_size = result_dims[num_sample_sets];\n            while (time_window_index < num_time_windows\n                   && time_windows[time_window_index] < t_v) {\n                afs = result\n                      + afs_size * (window_index * num_time_windows + time_window_index);\n                for (k = 0; k < num_sample_sets; k++) {\n                    coordinate[k] = (tsk_size_t) count_row[k];\n                }\n                if (!polarised) {\n                    fold(coordinate, result_dims, num_sample_sets);\n                }\n                tw_branch_length\n                    = TSK_MAX(0.0, TSK_MIN(time_windows[time_window_index + 1], t_v)\n                                       - TSK_MAX(time_windows[time_window_index], t_u));\n                x = (right - last_update[u]) * tw_branch_length;\n                increment_nd_array_value(\n                    afs, num_sample_sets, result_dims, coordinate, x);\n                time_window_index++;\n            }\n        }\n    }\n    last_update[u] = right;\n}\n\nstatic int\ntsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, double *counts, tsk_size_t num_windows,\n    const double *windows, tsk_size_t num_time_windows, const double *time_windows,\n    const tsk_size_t *result_dims, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    tsk_id_t u, v;\n    tsk_size_t window_index;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;\n    const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;\n    const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;\n    const double *restrict edge_left = self->tables->edges.left;\n    const double *restrict edge_right = self->tables->edges.right;\n    const tsk_id_t *restrict edge_parent = self->tables->edges.parent;\n    const tsk_id_t *restrict edge_child = self->tables->edges.child;\n    const double *restrict node_time = self->tables->nodes.time;\n    const double sequence_length = self->tables->sequence_length;\n    tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));\n    double *restrict last_update = tsk_calloc(num_nodes, sizeof(*last_update));\n    double *restrict branch_length = tsk_calloc(num_nodes, sizeof(*branch_length));\n    tsk_size_t *restrict coordinate = tsk_malloc(num_sample_sets * sizeof(*coordinate));\n    tsk_id_t tj, tk, h;\n    double t_left, t_right, w_right;\n    const tsk_size_t K = num_sample_sets + 1;\n\n    if (self->time_uncalibrated && !(options & TSK_STAT_ALLOW_TIME_UNCALIBRATED)) {\n        ret = tsk_trace_error(TSK_ERR_TIME_UNCALIBRATED);\n        goto out;\n    }\n\n    if (parent == NULL || last_update == NULL || coordinate == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));\n\n    /* Iterate over the trees */\n    tj = 0;\n    tk = 0;\n    t_left = 0;\n    window_index = 0;\n    while (tj < num_edges || t_left < sequence_length) {\n        tsk_bug_assert(window_index < num_windows);\n        while (tk < num_edges && edge_right[O[tk]] == t_left) {\n            h = O[tk];\n            tk++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            tsk_treeseq_update_branch_afs(self, u, t_left, last_update, node_time,\n                parent, coordinate, counts, num_sample_sets, num_time_windows,\n                time_windows, window_index, result_dims, options, result);\n            while (v != TSK_NULL) {\n                tsk_treeseq_update_branch_afs(self, v, t_left, last_update, node_time,\n                    parent, coordinate, counts, num_sample_sets, num_time_windows,\n                    time_windows, window_index, result_dims, options, result);\n                update_state(counts, K, v, u, -1);\n                v = parent[v];\n            }\n            parent[u] = TSK_NULL;\n            branch_length[u] = 0;\n        }\n\n        while (tj < num_edges && edge_left[I[tj]] == t_left) {\n            h = I[tj];\n            tj++;\n            u = edge_child[h];\n            v = edge_parent[h];\n            parent[u] = v;\n            branch_length[u] = node_time[v] - node_time[u];\n            while (v != TSK_NULL) {\n                tsk_treeseq_update_branch_afs(self, v, t_left, last_update, node_time,\n                    parent, coordinate, counts, num_sample_sets, num_time_windows,\n                    time_windows, window_index, result_dims, options, result);\n                update_state(counts, K, v, u, +1);\n                v = parent[v];\n            }\n        }\n\n        t_right = sequence_length;\n        if (tj < num_edges) {\n            t_right = TSK_MIN(t_right, edge_left[I[tj]]);\n        }\n        if (tk < num_edges) {\n            t_right = TSK_MIN(t_right, edge_right[O[tk]]);\n        }\n\n        while (window_index < num_windows && windows[window_index + 1] <= t_right) {\n            w_right = windows[window_index + 1];\n            /* Flush the contributions of all nodes to the current window */\n            for (u = 0; u < (tsk_id_t) num_nodes; u++) {\n                tsk_bug_assert(last_update[u] < w_right);\n                tsk_treeseq_update_branch_afs(self, u, w_right, last_update, node_time,\n                    parent, coordinate, counts, num_sample_sets, num_time_windows,\n                    time_windows, window_index, result_dims, options, result);\n            }\n            window_index++;\n        }\n\n        t_left = t_right;\n    }\nout:\n    /* Can't use msp_safe_free here because of restrict */\n    if (parent != NULL) {\n        free(parent);\n    }\n    if (last_update != NULL) {\n        free(last_update);\n    }\n    if (branch_length != NULL) {\n        free(branch_length);\n    }\n    if (coordinate != NULL) {\n        free(coordinate);\n    }\n    return ret;\n}\n\nint\ntsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,\n    tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    bool stat_site = !!(options & TSK_STAT_SITE);\n    bool stat_branch = !!(options & TSK_STAT_BRANCH);\n    bool stat_node = !!(options & TSK_STAT_NODE);\n    const double default_windows[] = { 0, self->tables->sequence_length };\n    const double default_time_windows[] = { 0, INFINITY };\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_size_t K = num_sample_sets + 1;\n    tsk_size_t j, k, l, afs_size;\n    tsk_id_t u;\n    tsk_size_t *result_dims = NULL;\n    /* These counts should really be ints, but we use doubles so that we can\n     * reuse code from the general_stats code paths. */\n    double *counts = NULL;\n    double *count_row;\n    if (stat_node) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);\n        goto out;\n    }\n    /* If no mode is specified, we default to site mode */\n    if (!(stat_site || stat_branch)) {\n        stat_site = true;\n    }\n    /* It's an error to specify more than one mode */\n    if (stat_site + stat_branch > 1) {\n        ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);\n        goto out;\n    }\n    if (windows == NULL) {\n        num_windows = 1;\n        windows = default_windows;\n    } else {\n        ret = tsk_treeseq_check_windows(\n            self, num_windows, windows, TSK_REQUIRE_FULL_SPAN);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (time_windows == NULL) {\n        num_time_windows = 1;\n        time_windows = default_time_windows;\n    } else {\n        ret = tsk_treeseq_check_time_windows(num_time_windows, time_windows);\n        if (ret != 0) {\n            goto out;\n        }\n        // Site mode does not support time windows\n        if (stat_site && !(time_windows[0] == 0.0 && isinf((float) time_windows[1]))) {\n            ret = TSK_ERR_UNSUPPORTED_STAT_MODE;\n            goto out;\n        }\n    }\n    ret = tsk_treeseq_check_sample_sets(\n        self, num_sample_sets, sample_set_sizes, sample_sets);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* the last element of result_dims stores the total size of the dimensions */\n    result_dims = tsk_malloc((num_sample_sets + 1) * sizeof(*result_dims));\n    counts = tsk_calloc(num_nodes * K, sizeof(*counts));\n    if (counts == NULL || result_dims == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    afs_size = 1;\n    j = 0;\n    for (k = 0; k < num_sample_sets; k++) {\n        result_dims[k] = 1 + sample_set_sizes[k];\n        afs_size *= result_dims[k];\n        for (l = 0; l < sample_set_sizes[k]; l++) {\n            u = sample_sets[j];\n            count_row = GET_2D_ROW(counts, K, u);\n            if (count_row[k] != 0) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            count_row[k] = 1;\n            j++;\n        }\n    }\n    for (j = 0; j < self->num_samples; j++) {\n        u = self->samples[j];\n        count_row = GET_2D_ROW(counts, K, u);\n        count_row[num_sample_sets] = 1;\n    }\n    result_dims[num_sample_sets] = (tsk_size_t) afs_size;\n    tsk_memset(result, 0, num_windows * num_time_windows * afs_size * sizeof(*result));\n\n    if (stat_site) {\n        ret = tsk_treeseq_site_allele_frequency_spectrum(self, num_sample_sets,\n            sample_set_sizes, counts, num_windows, windows, result_dims, options,\n            result);\n    } else {\n        ret = tsk_treeseq_branch_allele_frequency_spectrum(self, num_sample_sets, counts,\n            num_windows, windows, num_time_windows, time_windows, result_dims, options,\n            result);\n    }\n\n    if (options & TSK_STAT_SPAN_NORMALISE) {\n        span_normalise(num_windows, windows, afs_size * num_time_windows, result);\n    }\nout:\n    tsk_safe_free(counts);\n    tsk_safe_free(result_dims);\n    return ret;\n}\n\n/***********************************\n * One way stats\n ***********************************/\n\nstatic int\ndiversity_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double n;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        result[j] = x[j] * (n - x[j]) / (n * (n - 1));\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_diversity(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)\n{\n    return tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, diversity_summary_func, num_windows, windows,\n        options, result);\n}\n\nstatic int\ntrait_covariance_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    weight_stat_params_t args = *(weight_stat_params_t *) params;\n    const double n = (double) args.num_samples;\n    const double *x = state;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        result[j] = (x[j] * x[j]) / (2 * (n - 1) * (n - 1));\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_trait_covariance(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result)\n{\n    tsk_size_t num_samples = self->num_samples;\n    tsk_size_t j, k;\n    int ret;\n    const double *row;\n    double *new_row;\n    double *means = tsk_calloc(num_weights, sizeof(double));\n    double *new_weights = tsk_malloc((num_weights + 1) * num_samples * sizeof(double));\n    weight_stat_params_t args = { num_samples = self->num_samples };\n\n    if (new_weights == NULL || means == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (num_weights == 0) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);\n        goto out;\n    }\n\n    // center weights\n    for (j = 0; j < num_samples; j++) {\n        row = GET_2D_ROW(weights, num_weights, j);\n        for (k = 0; k < num_weights; k++) {\n            means[k] += row[k];\n        }\n    }\n    for (k = 0; k < num_weights; k++) {\n        means[k] /= (double) num_samples;\n    }\n    for (j = 0; j < num_samples; j++) {\n        row = GET_2D_ROW(weights, num_weights, j);\n        new_row = GET_2D_ROW(new_weights, num_weights, j);\n        for (k = 0; k < num_weights; k++) {\n            new_row[k] = row[k] - means[k];\n        }\n    }\n\n    ret = tsk_treeseq_general_stat(self, num_weights, new_weights, num_weights,\n        trait_covariance_summary_func, &args, num_windows, windows, options, result);\n\nout:\n    tsk_safe_free(means);\n    tsk_safe_free(new_weights);\n    return ret;\n}\n\nstatic int\ntrait_correlation_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    weight_stat_params_t args = *(weight_stat_params_t *) params;\n    const double n = (double) args.num_samples;\n    const double *x = state;\n    double p;\n    tsk_size_t j;\n\n    p = x[state_dim - 1];\n    for (j = 0; j < state_dim - 1; j++) {\n        if ((p > 0.0) && (p < 1.0)) {\n            result[j] = (x[j] * x[j]) / (2 * (p * (1 - p)) * n * (n - 1));\n        } else {\n            result[j] = 0.0;\n        }\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_trait_correlation(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result)\n{\n    tsk_size_t num_samples = self->num_samples;\n    tsk_size_t j, k;\n    int ret;\n    double *means = tsk_calloc(num_weights, sizeof(double));\n    double *meansqs = tsk_calloc(num_weights, sizeof(double));\n    double *sds = tsk_calloc(num_weights, sizeof(double));\n    const double *row;\n    double *new_row;\n    double *new_weights = tsk_malloc((num_weights + 1) * num_samples * sizeof(double));\n    weight_stat_params_t args = { num_samples = self->num_samples };\n\n    if (new_weights == NULL || means == NULL || meansqs == NULL || sds == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    if (num_weights < 1) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);\n        goto out;\n    }\n\n    // center and scale weights\n    for (j = 0; j < num_samples; j++) {\n        row = GET_2D_ROW(weights, num_weights, j);\n        for (k = 0; k < num_weights; k++) {\n            means[k] += row[k];\n            meansqs[k] += row[k] * row[k];\n        }\n    }\n    for (k = 0; k < num_weights; k++) {\n        means[k] /= (double) num_samples;\n        meansqs[k] -= means[k] * means[k] * (double) num_samples;\n        meansqs[k] /= (double) (num_samples - 1);\n        sds[k] = sqrt(meansqs[k]);\n    }\n    for (j = 0; j < num_samples; j++) {\n        row = GET_2D_ROW(weights, num_weights, j);\n        new_row = GET_2D_ROW(new_weights, num_weights + 1, j);\n        for (k = 0; k < num_weights; k++) {\n            new_row[k] = (row[k] - means[k]) / sds[k];\n        }\n        // set final row to 1/n to compute frequency\n        new_row[num_weights] = 1.0 / (double) num_samples;\n    }\n\n    ret = tsk_treeseq_general_stat(self, num_weights + 1, new_weights, num_weights,\n        trait_correlation_summary_func, &args, num_windows, windows, options, result);\n\nout:\n    tsk_safe_free(means);\n    tsk_safe_free(meansqs);\n    tsk_safe_free(sds);\n    tsk_safe_free(new_weights);\n    return ret;\n}\n\nstatic int\ntrait_linear_model_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    covariates_stat_params_t args = *(covariates_stat_params_t *) params;\n    const double num_samples = (double) args.num_samples;\n    const tsk_size_t k = args.num_covariates;\n    const double *V = args.V;\n    ;\n    const double *x = state;\n    const double *v;\n    double m, a, denom, z;\n    tsk_size_t i, j;\n    // x[0], ..., x[result_dim - 1] contains the traits, W\n    // x[result_dim], ..., x[state_dim - 2] contains the covariates, Z\n    // x[state_dim - 1] has the number of samples below the node\n\n    m = x[state_dim - 1];\n    for (i = 0; i < result_dim; i++) {\n        if ((m > 0.0) && (m < num_samples)) {\n            v = GET_2D_ROW(V, k, i);\n            a = x[i];\n            denom = m;\n            for (j = 0; j < k; j++) {\n                z = x[result_dim + j];\n                a -= z * v[j];\n                denom -= z * z;\n            }\n            // denom is the length of projection of the trait onto the subspace\n            // spanned by the covariates, so if it is zero then the system is\n            // singular and the solution is nonunique. This numerical tolerance\n            // could be smaller without hitting floating-point error, but being\n            // a tiny bit conservative about when the trait is almost in the\n            // span of the covariates is probably good.\n            if (denom < 1e-8) {\n                result[i] = 0.0;\n            } else {\n                result[i] = (a * a) / (2 * denom * denom);\n            }\n        } else {\n            result[i] = 0.0;\n        }\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_trait_linear_model(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_covariates, const double *covariates,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)\n{\n    tsk_size_t num_samples = self->num_samples;\n    tsk_size_t i, j, k;\n    int ret;\n    const double *w, *z;\n    double *v, *new_row;\n    double *V = tsk_calloc(num_covariates * num_weights, sizeof(double));\n    double *new_weights\n        = tsk_malloc((num_weights + num_covariates + 1) * num_samples * sizeof(double));\n\n    covariates_stat_params_t args\n        = { .num_samples = self->num_samples, .num_covariates = num_covariates, .V = V };\n\n    // We assume that the covariates have been *already standardised*,\n    // so that (a) 1 is in the span of the columns, and\n    // (b) their crossproduct is the identity.\n    // We could do this instead here with gsl linalg.\n\n    if (new_weights == NULL || V == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    if (num_weights < 1) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);\n        goto out;\n    }\n\n    // V = weights^T (matrix mult) covariates\n    for (k = 0; k < num_samples; k++) {\n        w = GET_2D_ROW(weights, num_weights, k);\n        z = GET_2D_ROW(covariates, num_covariates, k);\n        for (i = 0; i < num_weights; i++) {\n            v = GET_2D_ROW(V, num_covariates, i);\n            for (j = 0; j < num_covariates; j++) {\n                v[j] += w[i] * z[j];\n            }\n        }\n    }\n\n    for (k = 0; k < num_samples; k++) {\n        w = GET_2D_ROW(weights, num_weights, k);\n        z = GET_2D_ROW(covariates, num_covariates, k);\n        new_row = GET_2D_ROW(new_weights, num_covariates + num_weights + 1, k);\n        for (i = 0; i < num_weights; i++) {\n            new_row[i] = w[i];\n        }\n        for (i = 0; i < num_covariates; i++) {\n            new_row[i + num_weights] = z[i];\n        }\n        // set final row to 1 to count alleles\n        new_row[num_weights + num_covariates] = 1.0;\n    }\n\n    ret = tsk_treeseq_general_stat(self, num_weights + num_covariates + 1, new_weights,\n        num_weights, trait_linear_model_summary_func, &args, num_windows, windows,\n        options, result);\n\nout:\n    tsk_safe_free(V);\n    tsk_safe_free(new_weights);\n    return ret;\n}\n\nstatic int\nsegregating_sites_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double n;\n    tsk_size_t j;\n\n    // this works because sum_{i=1}^k (1-p_i) = k-1\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        result[j] = (x[j] > 0) * (1 - x[j] / n);\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_segregating_sites(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)\n{\n    return tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, segregating_sites_summary_func, num_windows,\n        windows, options, result);\n}\n\nstatic int\nY1_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, denom, numer;\n    tsk_size_t i;\n\n    for (i = 0; i < result_dim; i++) {\n        ni = (double) args.sample_set_sizes[i];\n        denom = ni * (ni - 1) * (ni - 2);\n        numer = x[i] * (ni - x[i]) * (ni - x[i] - 1);\n        result[i] = numer / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_Y1(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)\n{\n    return tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, Y1_summary_func, num_windows, windows,\n        options, result);\n}\n\nstatic int\nD_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n        result[j] = p_AB - (p_A * p_B);\n    }\n\n    return 0;\n}\n\nint\ntsk_treeseq_D(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    options |= TSK_STAT_POLARISED; // TODO: allow user to pick?\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, D_summary_func, norm_total_weighted,\n        num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,\n        result);\n}\n\nstatic int\nD2_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n        result[j] = p_AB - (p_A * p_B);\n        result[j] *= result[j];\n    }\n\n    return 0;\n}\n\nint\ntsk_treeseq_D2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, D2_summary_func, norm_total_weighted,\n        num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,\n        result);\n}\n\nstatic int\nr2_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n\n        double D = p_AB - (p_A * p_B);\n        double denom = p_A * p_B * (1 - p_A) * (1 - p_B);\n\n        result[j] = (D * D) / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_r2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, r2_summary_func, norm_hap_weighted, num_rows,\n        row_sites, row_positions, num_cols, col_sites, col_positions, options, result);\n}\n\nstatic int\nD_prime_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n\n        double D = p_AB - (p_A * p_B);\n\n        if (D >= 0) {\n            result[j] = D / TSK_MIN(p_A * (1 - p_B), (1 - p_A) * p_B);\n        } else if (D < 0) {\n            result[j] = D / TSK_MIN(p_A * p_B, (1 - p_A) * (1 - p_B));\n        }\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_D_prime(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    options |= TSK_STAT_POLARISED; // TODO: allow user to pick?\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, D_prime_summary_func, norm_total_weighted,\n        num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,\n        result);\n}\n\nstatic int\nr_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n\n        double D = p_AB - (p_A * p_B);\n        double denom = p_A * p_B * (1 - p_A) * (1 - p_B);\n\n        result[j] = D / sqrt(denom);\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_r(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    options |= TSK_STAT_POLARISED; // TODO: allow user to pick?\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, r_summary_func, norm_total_weighted,\n        num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,\n        result);\n}\n\nstatic int\nDz_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n\n        double D = p_AB - (p_A * p_B);\n\n        result[j] = D * (1 - 2 * p_A) * (1 - 2 * p_B);\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_Dz(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, Dz_summary_func, norm_total_weighted,\n        num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,\n        result);\n}\n\nstatic int\npi2_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double p_AB = state_row[0] / n;\n        double p_Ab = state_row[1] / n;\n        double p_aB = state_row[2] / n;\n\n        double p_A = p_AB + p_Ab;\n        double p_B = p_AB + p_aB;\n        result[j] = p_A * (1 - p_A) * p_B * (1 - p_B);\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_pi2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, pi2_summary_func, norm_total_weighted,\n        num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,\n        result);\n}\n\nstatic int\nD2_unbiased_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double w_AB = state_row[0];\n        double w_Ab = state_row[1];\n        double w_aB = state_row[2];\n        double w_ab = n - (w_AB + w_Ab + w_aB);\n        result[j] = (1 / (n * (n - 1) * (n - 2) * (n - 3)))\n                    * ((w_aB * w_aB * (w_Ab - 1) * w_Ab)\n                        + ((w_ab - 1) * w_ab * (w_AB - 1) * w_AB)\n                        - (w_aB * w_Ab * (w_Ab + (2 * w_ab * w_AB) - 1)));\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_D2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, D2_unbiased_summary_func,\n        norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,\n        col_positions, options, result);\n}\n\nstatic int\nDz_unbiased_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double w_AB = state_row[0];\n        double w_Ab = state_row[1];\n        double w_aB = state_row[2];\n        double w_ab = n - (w_AB + w_Ab + w_aB);\n        result[j] = (1 / (n * (n - 1) * (n - 2) * (n - 3)))\n                    * ((((w_AB * w_ab) - (w_Ab * w_aB)) * (w_aB + w_ab - w_AB - w_Ab)\n                           * (w_Ab + w_ab - w_AB - w_aB))\n                        - ((w_AB * w_ab) * (w_AB + w_ab - w_Ab - w_aB - 2))\n                        - ((w_Ab * w_aB) * (w_Ab + w_aB - w_AB - w_ab - 2)));\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_Dz_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, Dz_unbiased_summary_func,\n        norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,\n        col_positions, options, result);\n}\n\nstatic int\npi2_unbiased_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    double n;\n    const double *state_row;\n    tsk_size_t j;\n\n    for (j = 0; j < state_dim; j++) {\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        double w_AB = state_row[0];\n        double w_Ab = state_row[1];\n        double w_aB = state_row[2];\n        double w_ab = n - (w_AB + w_Ab + w_aB);\n        result[j] = (1 / (n * (n - 1) * (n - 2) * (n - 3)))\n                    * (((w_AB + w_Ab) * (w_aB + w_ab) * (w_AB + w_aB) * (w_Ab + w_ab))\n                        - ((w_AB * w_ab) * (w_AB + w_ab + (3 * w_Ab) + (3 * w_aB) - 1))\n                        - ((w_Ab * w_aB) * (w_Ab + w_aB + (3 * w_AB) + (3 * w_ab) - 1)));\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_pi2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_sample_sets, NULL, pi2_unbiased_summary_func,\n        norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,\n        col_positions, options, result);\n}\n\n/***********************************\n * Two way stats\n ***********************************/\n\nstatic int\ncheck_sample_stat_inputs(tsk_size_t num_sample_sets, tsk_size_t tuple_size,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples)\n{\n    int ret = 0;\n\n    if (num_sample_sets < 1) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);\n        goto out;\n    }\n    if (num_index_tuples < 1) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_INDEX_TUPLES);\n        goto out;\n    }\n    ret = check_set_indexes(\n        num_sample_sets, tuple_size * num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\ndivergence_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, nj, denom;\n    tsk_id_t i, j;\n    tsk_size_t k;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        denom = ni * (nj - (i == j));\n        result[k] = x[i] * (nj - x[j]) / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_divergence(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, divergence_summary_func,\n        num_windows, windows, options, result);\nout:\n    return ret;\n}\n\nstatic int\ngenetic_relatedness_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    tsk_id_t i, j;\n    tsk_size_t k;\n    double sumx = 0;\n    double meanx, ni, nj;\n\n    for (k = 0; k < state_dim; k++) {\n        sumx += x[k] / (double) args.sample_set_sizes[k];\n    }\n\n    meanx = sumx / (double) state_dim;\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        result[k] = (x[i] / ni - meanx) * (x[j] / nj - meanx);\n    }\n    return 0;\n}\n\nstatic int\ngenetic_relatedness_noncentred_summary_func(tsk_size_t TSK_UNUSED(state_dim),\n    const double *state, tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    tsk_id_t i, j;\n    tsk_size_t k;\n    double ni, nj;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        result[k] = x[i] * x[j] / (ni * nj);\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_genetic_relatedness(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    if (!(options & TSK_STAT_NONCENTRED)) {\n        ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n            sample_sets, num_index_tuples, index_tuples,\n            genetic_relatedness_summary_func, num_windows, windows, options, result);\n    } else {\n        ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n            sample_sets, num_index_tuples, index_tuples,\n            genetic_relatedness_noncentred_summary_func, num_windows, windows, options,\n            result);\n    }\nout:\n    return ret;\n}\n\nstatic int\ngenetic_relatedness_weighted_summary_func(tsk_size_t state_dim, const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    indexed_weight_stat_params_t args = *(indexed_weight_stat_params_t *) params;\n    const double *x = state;\n    tsk_id_t i, j;\n    tsk_size_t k;\n    double pn, ni, nj;\n\n    pn = state[state_dim - 1];\n    for (k = 0; k < result_dim; k++) {\n        i = args.index_tuples[2 * k];\n        j = args.index_tuples[2 * k + 1];\n        ni = args.total_weights[i];\n        nj = args.total_weights[j];\n        result[k] = (x[i] - ni * pn) * (x[j] - nj * pn);\n    }\n    return 0;\n}\n\nstatic int\ngenetic_relatedness_weighted_noncentred_summary_func(tsk_size_t TSK_UNUSED(state_dim),\n    const double *state, tsk_size_t result_dim, double *result, void *params)\n{\n    indexed_weight_stat_params_t args = *(indexed_weight_stat_params_t *) params;\n    const double *x = state;\n    tsk_id_t i, j;\n    tsk_size_t k;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.index_tuples[2 * k];\n        j = args.index_tuples[2 * k + 1];\n        result[k] = x[i] * x[j];\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_genetic_relatedness_weighted(const tsk_treeseq_t *self,\n    tsk_size_t num_weights, const double *weights, tsk_size_t num_index_tuples,\n    const tsk_id_t *index_tuples, tsk_size_t num_windows, const double *windows,\n    double *result, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t num_samples = self->num_samples;\n    size_t j, k;\n    indexed_weight_stat_params_t args;\n    const double *row;\n    double *new_row;\n    double *total_weights = tsk_calloc((num_weights + 1), sizeof(*total_weights));\n    double *new_weights\n        = tsk_malloc((num_weights + 1) * num_samples * sizeof(*new_weights));\n\n    if (total_weights == NULL || new_weights == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (num_weights == 0) {\n        ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);\n        goto out;\n    }\n\n    // Add a column of ones to W\n    for (j = 0; j < num_samples; j++) {\n        row = GET_2D_ROW(weights, num_weights, j);\n        new_row = GET_2D_ROW(new_weights, num_weights + 1, j);\n        for (k = 0; k < num_weights; k++) {\n            new_row[k] = row[k];\n            total_weights[k] += row[k];\n        }\n        new_row[num_weights] = 1.0 / (double) num_samples;\n    }\n    total_weights[num_weights] = 1.0;\n\n    args.total_weights = total_weights;\n    args.index_tuples = index_tuples;\n    if (!(options & TSK_STAT_NONCENTRED)) {\n        ret = tsk_treeseq_general_stat(self, num_weights + 1, new_weights,\n            num_index_tuples, genetic_relatedness_weighted_summary_func, &args,\n            num_windows, windows, options, result);\n        if (ret != 0) {\n            goto out;\n        }\n    } else {\n        ret = tsk_treeseq_general_stat(self, num_weights + 1, new_weights,\n            num_index_tuples, genetic_relatedness_weighted_noncentred_summary_func,\n            &args, num_windows, windows, options, result);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\nout:\n    tsk_safe_free(total_weights);\n    tsk_safe_free(new_weights);\n    return ret;\n}\n\nstatic int\nY2_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, nj, denom;\n    tsk_id_t i, j;\n    tsk_size_t k;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        denom = ni * nj * (nj - 1);\n        result[k] = x[i] * (nj - x[j]) * (nj - x[j] - 1) / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_Y2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, Y2_summary_func, num_windows,\n        windows, options, result);\nout:\n    return ret;\n}\n\nstatic int\nf2_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, nj, denom, numer;\n    tsk_id_t i, j;\n    tsk_size_t k;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        denom = ni * (ni - 1) * nj * (nj - 1);\n        numer = x[i] * (x[i] - 1) * (nj - x[j]) * (nj - x[j] - 1)\n                - x[i] * (ni - x[i]) * (nj - x[j]) * x[j];\n        result[k] = numer / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, f2_summary_func, num_windows,\n        windows, options, result);\nout:\n    return ret;\n}\n\nstatic int\nD2_ij_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *state_row;\n    double n;\n    tsk_size_t k;\n    tsk_id_t i, j;\n    double p_A, p_B, p_AB, p_Ab, p_aB, D_i, D_j;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n\n        n = (double) args.sample_set_sizes[i];\n        state_row = GET_2D_ROW(state, 3, i);\n        p_AB = state_row[0] / n;\n        p_Ab = state_row[1] / n;\n        p_aB = state_row[2] / n;\n        p_A = p_AB + p_Ab;\n        p_B = p_AB + p_aB;\n        D_i = p_AB - (p_A * p_B);\n\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        p_AB = state_row[0] / n;\n        p_Ab = state_row[1] / n;\n        p_aB = state_row[2] / n;\n        p_A = p_AB + p_Ab;\n        p_B = p_AB + p_aB;\n        D_j = p_AB - (p_A * p_B);\n\n        result[k] = D_i * D_j;\n    }\n\n    return 0;\n}\n\nint\ntsk_treeseq_D2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, D2_ij_summary_func,\n        norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,\n        col_positions, options, result);\nout:\n    return ret;\n}\n\nstatic int\nD2_ij_unbiased_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *state_row;\n    tsk_size_t k;\n    tsk_id_t i, j;\n    double n_i, n_j;\n    double w_AB_i, w_Ab_i, w_aB_i, w_ab_i;\n    double w_AB_j, w_Ab_j, w_aB_j, w_ab_j;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n        if (i == j) {\n            // We require disjoint sample sets because we test equality here\n            n_i = (double) args.sample_set_sizes[i];\n            state_row = GET_2D_ROW(state, 3, i);\n            w_AB_i = state_row[0];\n            w_Ab_i = state_row[1];\n            w_aB_i = state_row[2];\n            w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i);\n            result[k] = (w_AB_i * (w_AB_i - 1) * w_ab_i * (w_ab_i - 1)\n                            + w_Ab_i * (w_Ab_i - 1) * w_aB_i * (w_aB_i - 1)\n                            - 2 * w_AB_i * w_Ab_i * w_aB_i * w_ab_i)\n                        / n_i / (n_i - 1) / (n_i - 2) / (n_i - 3);\n        }\n\n        else {\n            n_i = (double) args.sample_set_sizes[i];\n            state_row = GET_2D_ROW(state, 3, i);\n            w_AB_i = state_row[0];\n            w_Ab_i = state_row[1];\n            w_aB_i = state_row[2];\n            w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i);\n\n            n_j = (double) args.sample_set_sizes[j];\n            state_row = GET_2D_ROW(state, 3, j);\n            w_AB_j = state_row[0];\n            w_Ab_j = state_row[1];\n            w_aB_j = state_row[2];\n            w_ab_j = n_j - (w_AB_j + w_Ab_j + w_aB_j);\n\n            result[k] = (w_Ab_i * w_aB_i - w_AB_i * w_ab_i)\n                        * (w_Ab_j * w_aB_j - w_AB_j * w_ab_j) / n_i / (n_i - 1) / n_j\n                        / (n_j - 1);\n        }\n    }\n\n    return 0;\n}\n\nint\ntsk_treeseq_D2_ij_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, D2_ij_unbiased_summary_func,\n        norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,\n        col_positions, options, result);\nout:\n    return ret;\n}\n\nstatic int\nr2_ij_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *state_row;\n    tsk_size_t k;\n    tsk_id_t i, j;\n    double n, pAB, pAb, paB, pA, pB, D_i, D_j, denom_i, denom_j;\n\n    for (k = 0; k < result_dim; k++) {\n        i = args.set_indexes[2 * k];\n        j = args.set_indexes[2 * k + 1];\n\n        n = (double) args.sample_set_sizes[i];\n        state_row = GET_2D_ROW(state, 3, i);\n        pAB = state_row[0] / n;\n        pAb = state_row[1] / n;\n        paB = state_row[2] / n;\n        pA = pAB + pAb;\n        pB = pAB + paB;\n        D_i = pAB - (pA * pB);\n        denom_i = sqrt(pA * (1 - pA) * pB * (1 - pB));\n\n        n = (double) args.sample_set_sizes[j];\n        state_row = GET_2D_ROW(state, 3, j);\n        pAB = state_row[0] / n;\n        pAb = state_row[1] / n;\n        paB = state_row[2] / n;\n        pA = pAB + pAb;\n        pB = pAB + paB;\n        D_j = pAB - (pA * pB);\n        denom_j = sqrt(pA * (1 - pA) * pB * (1 - pB));\n\n        result[k] = (D_i * D_j) / (denom_i * denom_j);\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_r2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, r2_ij_summary_func,\n        norm_hap_weighted_ij, num_rows, row_sites, row_positions, num_cols, col_sites,\n        col_positions, options, result);\nout:\n    return ret;\n}\n\n/***********************************\n * Three way stats\n ***********************************/\n\nstatic int\nY3_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, nj, nk, denom, numer;\n    tsk_id_t i, j, k;\n    tsk_size_t tuple_index;\n\n    for (tuple_index = 0; tuple_index < result_dim; tuple_index++) {\n        i = args.set_indexes[3 * tuple_index];\n        j = args.set_indexes[3 * tuple_index + 1];\n        k = args.set_indexes[3 * tuple_index + 2];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        nk = (double) args.sample_set_sizes[k];\n        denom = ni * nj * nk;\n        numer = x[i] * (nj - x[j]) * (nk - x[k]);\n        result[tuple_index] = numer / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_Y3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 3, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, Y3_summary_func, num_windows,\n        windows, options, result);\nout:\n    return ret;\n}\n\nstatic int\nf3_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, nj, nk, denom, numer;\n    tsk_id_t i, j, k;\n    tsk_size_t tuple_index;\n\n    for (tuple_index = 0; tuple_index < result_dim; tuple_index++) {\n        i = args.set_indexes[3 * tuple_index];\n        j = args.set_indexes[3 * tuple_index + 1];\n        k = args.set_indexes[3 * tuple_index + 2];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        nk = (double) args.sample_set_sizes[k];\n        denom = ni * (ni - 1) * nj * nk;\n        numer = x[i] * (x[i] - 1) * (nj - x[j]) * (nk - x[k])\n                - x[i] * (ni - x[i]) * (nj - x[j]) * x[k];\n        result[tuple_index] = numer / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_f3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 3, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, f3_summary_func, num_windows,\n        windows, options, result);\nout:\n    return ret;\n}\n\n/***********************************\n * Four way stats\n ***********************************/\n\nstatic int\nf4_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,\n    tsk_size_t result_dim, double *result, void *params)\n{\n    sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;\n    const double *x = state;\n    double ni, nj, nk, nl, denom, numer;\n    tsk_id_t i, j, k, l;\n    tsk_size_t tuple_index;\n\n    for (tuple_index = 0; tuple_index < result_dim; tuple_index++) {\n        i = args.set_indexes[4 * tuple_index];\n        j = args.set_indexes[4 * tuple_index + 1];\n        k = args.set_indexes[4 * tuple_index + 2];\n        l = args.set_indexes[4 * tuple_index + 3];\n        ni = (double) args.sample_set_sizes[i];\n        nj = (double) args.sample_set_sizes[j];\n        nk = (double) args.sample_set_sizes[k];\n        nl = (double) args.sample_set_sizes[l];\n        denom = ni * nj * nk * nl;\n        numer = x[i] * x[k] * (nj - x[j]) * (nl - x[l])\n                - x[i] * x[l] * (nj - x[j]) * (nk - x[k]);\n        result[tuple_index] = numer / denom;\n    }\n    return 0;\n}\n\nint\ntsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    ret = check_sample_stat_inputs(num_sample_sets, 4, num_index_tuples, index_tuples);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_index_tuples, index_tuples, f4_summary_func, num_windows,\n        windows, options, result);\nout:\n    return ret;\n}\n\n/* Error-raising getter functions */\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node)\n{\n    return tsk_node_table_get_row(&self->tables->nodes, index, node);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge)\n{\n    return tsk_edge_table_get_row(&self->tables->edges, index, edge);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_migration(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_migration_t *migration)\n{\n    return tsk_migration_table_get_row(&self->tables->migrations, index, migration);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_mutation(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_mutation_t *mutation)\n{\n    int ret = 0;\n\n    ret = tsk_mutation_table_get_row(&self->tables->mutations, index, mutation);\n    if (ret != 0) {\n        goto out;\n    }\n    mutation->edge = self->site_mutations_mem[index].edge;\n    mutation->inherited_state = self->site_mutations_mem[index].inherited_state;\n    mutation->inherited_state_length\n        = self->site_mutations_mem[index].inherited_state_length;\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_site(const tsk_treeseq_t *self, tsk_id_t index, tsk_site_t *site)\n{\n    int ret = 0;\n\n    ret = tsk_site_table_get_row(&self->tables->sites, index, site);\n    if (ret != 0) {\n        goto out;\n    }\n    site->mutations = self->site_mutations[index];\n    site->mutations_length = self->site_mutations_length[index];\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_individual(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_individual_t *individual)\n{\n    int ret = 0;\n\n    ret = tsk_individual_table_get_row(&self->tables->individuals, index, individual);\n    if (ret != 0) {\n        goto out;\n    }\n    individual->nodes = self->individual_nodes[index];\n    individual->nodes_length = self->individual_nodes_length[index];\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_population(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_population_t *population)\n{\n    return tsk_population_table_get_row(&self->tables->populations, index, population);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_get_provenance(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_provenance_t *provenance)\n{\n    return tsk_provenance_table_get_row(&self->tables->provenances, index, provenance);\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_simplify(const tsk_treeseq_t *self, const tsk_id_t *samples,\n    tsk_size_t num_samples, tsk_flags_t options, tsk_treeseq_t *output,\n    tsk_id_t *node_map)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = tsk_malloc(sizeof(*tables));\n\n    if (tables == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_treeseq_copy_tables(self, tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_table_collection_simplify(tables, samples, num_samples, options, node_map);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_init(\n        output, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TAKE_OWNERSHIP);\n    /* Once tsk_treeseq_init has returned ownership of tables is transferred */\n    tables = NULL;\nout:\n    if (tables != NULL) {\n        tsk_table_collection_free(tables);\n        tsk_safe_free(tables);\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_split_edges(const tsk_treeseq_t *self, double time, tsk_flags_t flags,\n    tsk_id_t population, const char *metadata, tsk_size_t metadata_length,\n    tsk_flags_t TSK_UNUSED(options), tsk_treeseq_t *output)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = tsk_malloc(sizeof(*tables));\n    const double *restrict node_time = self->tables->nodes.time;\n    const tsk_size_t num_edges = self->tables->edges.num_rows;\n    const tsk_size_t num_mutations = self->tables->mutations.num_rows;\n    tsk_id_t *split_edge = tsk_malloc(num_edges * sizeof(*split_edge));\n    tsk_id_t j, u, mapped_node, ret_id;\n    double mutation_time;\n    tsk_edge_t edge;\n    tsk_mutation_t mutation;\n    tsk_bookmark_t sort_start;\n\n    memset(output, 0, sizeof(*output));\n    if (split_edge == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_treeseq_copy_tables(self, tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    if (tables->migrations.num_rows > 0) {\n        ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n        goto out;\n    }\n    /* We could catch this below in add_row, but it's simpler to guarantee\n     * that we always catch the error in corner cases where the values\n     * aren't used. */\n    if (population < -1 || population >= (tsk_id_t) self->tables->populations.num_rows) {\n        ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);\n        goto out;\n    }\n    if (!tsk_isfinite(time)) {\n        ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);\n        goto out;\n    }\n\n    tsk_edge_table_clear(&tables->edges);\n    tsk_memset(split_edge, TSK_NULL, num_edges * sizeof(*split_edge));\n\n    for (j = 0; j < (tsk_id_t) num_edges; j++) {\n        /* Would prefer to use tsk_edge_table_get_row_unsafe, but it's\n         * currently static to tables.c */\n        ret = tsk_edge_table_get_row(&self->tables->edges, j, &edge);\n        tsk_bug_assert(ret == 0);\n        if (node_time[edge.child] < time && time < node_time[edge.parent]) {\n            u = tsk_node_table_add_row(&tables->nodes, flags, time, population, TSK_NULL,\n                metadata, metadata_length);\n            if (u < 0) {\n                ret = (int) u;\n                goto out;\n            }\n            ret_id = tsk_edge_table_add_row(&tables->edges, edge.left, edge.right, u,\n                edge.child, edge.metadata, edge.metadata_length);\n            if (ret_id < 0) {\n                ret = (int) ret_id;\n                goto out;\n            }\n            edge.child = u;\n            split_edge[j] = u;\n        }\n        ret_id = tsk_edge_table_add_row(&tables->edges, edge.left, edge.right,\n            edge.parent, edge.child, edge.metadata, edge.metadata_length);\n        if (ret_id < 0) {\n            ret = (int) ret_id;\n            goto out;\n        }\n    }\n\n    for (j = 0; j < (tsk_id_t) num_mutations; j++) {\n        /* Note: we could speed this up a bit by accessing the local\n         * memory for mutations directly. */\n        ret = tsk_treeseq_get_mutation(self, j, &mutation);\n        tsk_bug_assert(ret == 0);\n        mapped_node = TSK_NULL;\n        if (mutation.edge != TSK_NULL) {\n            mapped_node = split_edge[mutation.edge];\n        }\n        mutation_time = tsk_is_unknown_time(mutation.time) ? node_time[mutation.node]\n                                                           : mutation.time;\n        if (mapped_node != TSK_NULL && mutation_time >= time) {\n            /* Update the column in-place to save a bit of time. */\n            tables->mutations.node[j] = mapped_node;\n        }\n    }\n\n    /* Skip mutations and sites as they haven't been altered */\n    /* Note we can probably optimise the edge sort a bit here also by\n     * reasoning about when the first edge gets altered in the table.\n     */\n    memset(&sort_start, 0, sizeof(sort_start));\n    sort_start.sites = tables->sites.num_rows;\n    sort_start.mutations = tables->mutations.num_rows;\n    ret = tsk_table_collection_sort(tables, &sort_start, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_treeseq_init(\n        output, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TAKE_OWNERSHIP);\n    tables = NULL;\nout:\n    if (tables != NULL) {\n        tsk_table_collection_free(tables);\n        tsk_safe_free(tables);\n    }\n    tsk_safe_free(split_edge);\n    return ret;\n}\n\n/* ======================================================== *\n * tree_position\n * ======================================================== */\n\nstatic void\ntsk_tree_position_set_null(tsk_tree_position_t *self)\n{\n    self->index = -1;\n    self->interval.left = 0;\n    self->interval.right = 0;\n}\n\nint\ntsk_tree_position_init(tsk_tree_position_t *self, const tsk_treeseq_t *tree_sequence,\n    tsk_flags_t TSK_UNUSED(options))\n{\n    memset(self, 0, sizeof(*self));\n    self->tree_sequence = tree_sequence;\n    tsk_tree_position_set_null(self);\n    return 0;\n}\n\nint\ntsk_tree_position_free(tsk_tree_position_t *TSK_UNUSED(self))\n{\n    return 0;\n}\n\nint\ntsk_tree_position_print_state(const tsk_tree_position_t *self, FILE *out)\n{\n    fprintf(out, \"Tree position state\\n\");\n    fprintf(out, \"index = %d\\n\", (int) self->index);\n    fprintf(out, \"interval = [%f,\\t%f)\\n\", self->interval.left, self->interval.right);\n    fprintf(\n        out, \"out   = start=%d\\tstop=%d\\n\", (int) self->out.start, (int) self->out.stop);\n    fprintf(\n        out, \"in    = start=%d\\tstop=%d\\n\", (int) self->in.start, (int) self->in.stop);\n    return 0;\n}\n\nbool\ntsk_tree_position_next(tsk_tree_position_t *self)\n{\n    const tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;\n    const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;\n    const double *restrict left_coords = tables->edges.left;\n    const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;\n    const double *restrict right_coords = tables->edges.right;\n    const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;\n    const double *restrict breakpoints = self->tree_sequence->breakpoints;\n    tsk_id_t j, left_current_index, right_current_index;\n    double left;\n\n    if (self->index == -1) {\n        self->interval.right = 0;\n        self->in.stop = 0;\n        self->out.stop = 0;\n        self->direction = TSK_DIR_FORWARD;\n    }\n\n    if (self->direction == TSK_DIR_FORWARD) {\n        left_current_index = self->in.stop;\n        right_current_index = self->out.stop;\n    } else {\n        left_current_index = self->out.stop + 1;\n        right_current_index = self->in.stop + 1;\n    }\n\n    left = self->interval.right;\n\n    j = right_current_index;\n    self->out.start = j;\n    while (j < M && right_coords[right_order[j]] == left) {\n        j++;\n    }\n    self->out.stop = j;\n    self->out.order = right_order;\n\n    j = left_current_index;\n    self->in.start = j;\n    while (j < M && left_coords[left_order[j]] == left) {\n        j++;\n    }\n    self->in.stop = j;\n    self->in.order = left_order;\n\n    self->direction = TSK_DIR_FORWARD;\n    self->index++;\n    if (self->index == num_trees) {\n        tsk_tree_position_set_null(self);\n    } else {\n        self->interval.left = left;\n        self->interval.right = breakpoints[self->index + 1];\n    }\n    return self->index != -1;\n}\n\nbool\ntsk_tree_position_prev(tsk_tree_position_t *self)\n{\n    const tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;\n    const double sequence_length = tables->sequence_length;\n    const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;\n    const double *restrict left_coords = tables->edges.left;\n    const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;\n    const double *restrict right_coords = tables->edges.right;\n    const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;\n    const double *restrict breakpoints = self->tree_sequence->breakpoints;\n    tsk_id_t j, left_current_index, right_current_index;\n    double right;\n\n    if (self->index == -1) {\n        self->index = num_trees;\n        self->interval.left = sequence_length;\n        self->in.stop = M - 1;\n        self->out.stop = M - 1;\n        self->direction = TSK_DIR_REVERSE;\n    }\n\n    if (self->direction == TSK_DIR_REVERSE) {\n        left_current_index = self->out.stop;\n        right_current_index = self->in.stop;\n    } else {\n        left_current_index = self->in.stop - 1;\n        right_current_index = self->out.stop - 1;\n    }\n\n    right = self->interval.left;\n\n    j = left_current_index;\n    self->out.start = j;\n    while (j >= 0 && left_coords[left_order[j]] == right) {\n        j--;\n    }\n    self->out.stop = j;\n    self->out.order = left_order;\n\n    j = right_current_index;\n    self->in.start = j;\n    while (j >= 0 && right_coords[right_order[j]] == right) {\n        j--;\n    }\n    self->in.stop = j;\n    self->in.order = right_order;\n\n    self->index--;\n    self->direction = TSK_DIR_REVERSE;\n    if (self->index == -1) {\n        tsk_tree_position_set_null(self);\n    } else {\n        self->interval.left = breakpoints[self->index];\n        self->interval.right = right;\n    }\n    return self->index != -1;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_position_seek_forward(tsk_tree_position_t *self, tsk_id_t index)\n{\n    int ret = 0;\n    const tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;\n    const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;\n    const double *restrict left_coords = tables->edges.left;\n    const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;\n    const double *restrict right_coords = tables->edges.right;\n    const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;\n    const double *restrict breakpoints = self->tree_sequence->breakpoints;\n    tsk_id_t j, left_current_index, right_current_index;\n    double left;\n\n    tsk_bug_assert(index >= self->index && index < num_trees);\n\n    if (self->index == -1) {\n        self->interval.right = 0;\n        self->in.stop = 0;\n        self->out.stop = 0;\n        self->direction = TSK_DIR_FORWARD;\n    }\n\n    if (self->direction == TSK_DIR_FORWARD) {\n        left_current_index = self->in.stop;\n        right_current_index = self->out.stop;\n    } else {\n        left_current_index = self->out.stop + 1;\n        right_current_index = self->in.stop + 1;\n    }\n\n    self->direction = TSK_DIR_FORWARD;\n    left = breakpoints[index];\n\n    j = right_current_index;\n    self->out.start = j;\n    while (j < M && right_coords[right_order[j]] <= left) {\n        j++;\n    }\n    self->out.stop = j;\n\n    if (self->index == -1) {\n        self->out.start = self->out.stop;\n    }\n\n    j = left_current_index;\n    while (j < M && right_coords[left_order[j]] <= left) {\n        j++;\n    }\n    self->in.start = j;\n    while (j < M && left_coords[left_order[j]] <= left) {\n        j++;\n    }\n    self->in.stop = j;\n\n    self->interval.left = left;\n    self->interval.right = breakpoints[index + 1];\n    self->out.order = right_order;\n    self->in.order = left_order;\n    self->index = index;\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_position_seek_backward(tsk_tree_position_t *self, tsk_id_t index)\n{\n    int ret = 0;\n    const tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;\n    const double sequence_length = tables->sequence_length;\n    const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;\n    const double *restrict left_coords = tables->edges.left;\n    const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;\n    const double *restrict right_coords = tables->edges.right;\n    const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;\n    const double *restrict breakpoints = self->tree_sequence->breakpoints;\n    tsk_id_t j, left_current_index, right_current_index;\n    double right;\n\n    if (self->index == -1) {\n        self->index = num_trees;\n        self->interval.left = sequence_length;\n        self->in.stop = M - 1;\n        self->out.stop = M - 1;\n        self->direction = TSK_DIR_REVERSE;\n    }\n    tsk_bug_assert(index <= self->index);\n\n    if (self->direction == TSK_DIR_REVERSE) {\n        left_current_index = self->out.stop;\n        right_current_index = self->in.stop;\n    } else {\n        left_current_index = self->in.stop - 1;\n        right_current_index = self->out.stop - 1;\n    }\n\n    self->direction = TSK_DIR_REVERSE;\n    right = breakpoints[index + 1];\n\n    j = left_current_index;\n    self->out.start = j;\n    while (j >= 0 && left_coords[left_order[j]] >= right) {\n        j--;\n    }\n    self->out.stop = j;\n\n    if (self->index == num_trees) {\n        self->out.start = self->out.stop;\n    }\n\n    j = right_current_index;\n    while (j >= 0 && left_coords[right_order[j]] >= right) {\n        j--;\n    }\n    self->in.start = j;\n    while (j >= 0 && right_coords[right_order[j]] >= right) {\n        j--;\n    }\n    self->in.stop = j;\n\n    self->interval.right = right;\n    self->interval.left = breakpoints[index];\n    self->out.order = left_order;\n    self->in.order = right_order;\n    self->index = index;\n\n    return ret;\n}\n\n/* ======================================================== *\n * Tree\n * ======================================================== */\n\n/* Return the root for the specified node.\n * NOTE: no bounds checking is done here.\n */\nstatic tsk_id_t\ntsk_tree_get_node_root(const tsk_tree_t *self, tsk_id_t u)\n{\n    const tsk_id_t *restrict parent = self->parent;\n\n    while (parent[u] != TSK_NULL) {\n        u = parent[u];\n    }\n    return u;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_init(tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options)\n{\n    int ret = 0;\n    tsk_size_t num_samples, num_nodes, N;\n\n    tsk_memset(self, 0, sizeof(tsk_tree_t));\n    if (tree_sequence == NULL) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    num_nodes = tree_sequence->tables->nodes.num_rows;\n    num_samples = tree_sequence->num_samples;\n    self->num_nodes = num_nodes;\n    self->virtual_root = (tsk_id_t) num_nodes;\n    self->tree_sequence = tree_sequence;\n    self->samples = tree_sequence->samples;\n    self->options = options;\n    self->root_threshold = 1;\n\n    /* Allocate space in the quintuply linked tree for the virtual root */\n    N = num_nodes + 1;\n    self->parent = tsk_malloc(N * sizeof(*self->parent));\n    self->left_child = tsk_malloc(N * sizeof(*self->left_child));\n    self->right_child = tsk_malloc(N * sizeof(*self->right_child));\n    self->left_sib = tsk_malloc(N * sizeof(*self->left_sib));\n    self->right_sib = tsk_malloc(N * sizeof(*self->right_sib));\n    self->num_children = tsk_calloc(N, sizeof(*self->num_children));\n    self->edge = tsk_malloc(N * sizeof(*self->edge));\n    if (self->parent == NULL || self->left_child == NULL || self->right_child == NULL\n        || self->left_sib == NULL || self->right_sib == NULL\n        || self->num_children == NULL || self->edge == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {\n        self->num_samples = tsk_calloc(N, sizeof(*self->num_samples));\n        self->num_tracked_samples = tsk_calloc(N, sizeof(*self->num_tracked_samples));\n        if (self->num_samples == NULL || self->num_tracked_samples == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n    if (self->options & TSK_SAMPLE_LISTS) {\n        self->left_sample = tsk_malloc(N * sizeof(*self->left_sample));\n        self->right_sample = tsk_malloc(N * sizeof(*self->right_sample));\n        self->next_sample = tsk_malloc(num_samples * sizeof(*self->next_sample));\n        if (self->left_sample == NULL || self->right_sample == NULL\n            || self->next_sample == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n\n    ret = tsk_tree_position_init(&self->tree_pos, tree_sequence, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_clear(self);\nout:\n    return ret;\n}\n\nint\ntsk_tree_set_root_threshold(tsk_tree_t *self, tsk_size_t root_threshold)\n{\n    int ret = 0;\n\n    if (root_threshold == 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    /* Don't allow the value to be set when the tree is out of the null\n     * state */\n    if (self->index != -1) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n        goto out;\n    }\n    self->root_threshold = root_threshold;\n    /* Reset the roots */\n    ret = tsk_tree_clear(self);\nout:\n    return ret;\n}\n\ntsk_size_t\ntsk_tree_get_root_threshold(const tsk_tree_t *self)\n{\n    return self->root_threshold;\n}\n\nint\ntsk_tree_free(tsk_tree_t *self)\n{\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->left_child);\n    tsk_safe_free(self->right_child);\n    tsk_safe_free(self->left_sib);\n    tsk_safe_free(self->right_sib);\n    tsk_safe_free(self->num_samples);\n    tsk_safe_free(self->num_tracked_samples);\n    tsk_safe_free(self->left_sample);\n    tsk_safe_free(self->right_sample);\n    tsk_safe_free(self->next_sample);\n    tsk_safe_free(self->num_children);\n    tsk_safe_free(self->edge);\n    tsk_tree_position_free(&self->tree_pos);\n    return 0;\n}\n\nbool\ntsk_tree_has_sample_lists(const tsk_tree_t *self)\n{\n    return !!(self->options & TSK_SAMPLE_LISTS);\n}\n\nbool\ntsk_tree_has_sample_counts(const tsk_tree_t *self)\n{\n    return !(self->options & TSK_NO_SAMPLE_COUNTS);\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_tree_reset_tracked_samples(tsk_tree_t *self)\n{\n    int ret = 0;\n\n    if (!tsk_tree_has_sample_counts(self)) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n        goto out;\n    }\n    tsk_memset(self->num_tracked_samples, 0,\n        (self->num_nodes + 1) * sizeof(*self->num_tracked_samples));\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_set_tracked_samples(\n    tsk_tree_t *self, tsk_size_t num_tracked_samples, const tsk_id_t *tracked_samples)\n{\n    int ret = TSK_ERR_GENERIC;\n    tsk_size_t *tree_num_tracked_samples = self->num_tracked_samples;\n    const tsk_id_t *parent = self->parent;\n    tsk_size_t j;\n    tsk_id_t u;\n\n    /* TODO This is not needed when the tree is new. We should use the\n     * state machine to check and only reset the tracked samples when needed.\n     */\n    ret = tsk_tree_reset_tracked_samples(self);\n    if (ret != 0) {\n        goto out;\n    }\n    self->num_tracked_samples[self->virtual_root] = num_tracked_samples;\n    for (j = 0; j < num_tracked_samples; j++) {\n        u = tracked_samples[j];\n        if (u < 0 || u >= (tsk_id_t) self->num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n        if (!tsk_treeseq_is_sample(self->tree_sequence, u)) {\n            ret = tsk_trace_error(TSK_ERR_BAD_SAMPLES);\n            goto out;\n        }\n        if (self->num_tracked_samples[u] != 0) {\n            ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n            goto out;\n        }\n        /* Propagate this upwards */\n        while (u != TSK_NULL) {\n            tree_num_tracked_samples[u]++;\n            u = parent[u];\n        }\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_track_descendant_samples(tsk_tree_t *self, tsk_id_t node)\n{\n    int ret = 0;\n    tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));\n    const tsk_id_t *restrict parent = self->parent;\n    const tsk_id_t *restrict left_child = self->left_child;\n    const tsk_id_t *restrict right_sib = self->right_sib;\n    const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;\n    tsk_size_t *num_tracked_samples = self->num_tracked_samples;\n    tsk_size_t n, j, num_nodes;\n    tsk_id_t u, v;\n\n    if (nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_tree_postorder_from(self, node, nodes, &num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_reset_tracked_samples(self);\n    if (ret != 0) {\n        goto out;\n    }\n    u = 0; /* keep the compiler happy */\n    for (j = 0; j < num_nodes; j++) {\n        u = nodes[j];\n        for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {\n            num_tracked_samples[u] += num_tracked_samples[v];\n        }\n        num_tracked_samples[u] += flags[u] & TSK_NODE_IS_SAMPLE ? 1 : 0;\n    }\n    n = num_tracked_samples[u];\n    u = parent[u];\n    while (u != TSK_NULL) {\n        num_tracked_samples[u] = n;\n        u = parent[u];\n    }\n    num_tracked_samples[self->virtual_root] = n;\nout:\n    tsk_safe_free(nodes);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options)\n{\n    int ret = TSK_ERR_GENERIC;\n    tsk_size_t N = self->num_nodes + 1;\n\n    if (!(options & TSK_NO_INIT)) {\n        ret = tsk_tree_init(dest, self->tree_sequence, options);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n    if (self->tree_sequence != dest->tree_sequence) {\n        ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);\n        goto out;\n    }\n    dest->interval = self->interval;\n    dest->left_index = self->left_index;\n    dest->right_index = self->right_index;\n    dest->direction = self->direction;\n    dest->index = self->index;\n    dest->sites = self->sites;\n    dest->sites_length = self->sites_length;\n    dest->root_threshold = self->root_threshold;\n    dest->num_edges = self->num_edges;\n    dest->tree_pos = self->tree_pos;\n\n    tsk_memcpy(dest->parent, self->parent, N * sizeof(*self->parent));\n    tsk_memcpy(dest->left_child, self->left_child, N * sizeof(*self->left_child));\n    tsk_memcpy(dest->right_child, self->right_child, N * sizeof(*self->right_child));\n    tsk_memcpy(dest->left_sib, self->left_sib, N * sizeof(*self->left_sib));\n    tsk_memcpy(dest->right_sib, self->right_sib, N * sizeof(*self->right_sib));\n    tsk_memcpy(dest->num_children, self->num_children, N * sizeof(*self->num_children));\n    tsk_memcpy(dest->edge, self->edge, N * sizeof(*self->edge));\n    if (!(dest->options & TSK_NO_SAMPLE_COUNTS)) {\n        if (self->options & TSK_NO_SAMPLE_COUNTS) {\n            ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n            goto out;\n        }\n        tsk_memcpy(dest->num_samples, self->num_samples, N * sizeof(*self->num_samples));\n        tsk_memcpy(dest->num_tracked_samples, self->num_tracked_samples,\n            N * sizeof(*self->num_tracked_samples));\n    }\n    if (dest->options & TSK_SAMPLE_LISTS) {\n        if (!(self->options & TSK_SAMPLE_LISTS)) {\n            ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n            goto out;\n        }\n        tsk_memcpy(dest->left_sample, self->left_sample, N * sizeof(*self->left_sample));\n        tsk_memcpy(\n            dest->right_sample, self->right_sample, N * sizeof(*self->right_sample));\n        tsk_memcpy(dest->next_sample, self->next_sample,\n            self->tree_sequence->num_samples * sizeof(*self->next_sample));\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nbool TSK_WARN_UNUSED\ntsk_tree_equals(const tsk_tree_t *self, const tsk_tree_t *other)\n{\n    bool ret = false;\n\n    if (self->tree_sequence == other->tree_sequence) {\n        ret = self->index == other->index;\n    }\n    return ret;\n}\n\nstatic int\ntsk_tree_check_node(const tsk_tree_t *self, tsk_id_t u)\n{\n    int ret = 0;\n    if (u < 0 || u > (tsk_id_t) self->num_nodes) {\n        ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n    }\n    return ret;\n}\n\nbool\ntsk_tree_is_descendant(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v)\n{\n    bool ret = false;\n    tsk_id_t w = u;\n    tsk_id_t *restrict parent = self->parent;\n\n    if (tsk_tree_check_node(self, u) == 0 && tsk_tree_check_node(self, v) == 0) {\n        while (w != v && w != TSK_NULL) {\n            w = parent[w];\n        }\n        ret = w == v;\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_mrca(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v, tsk_id_t *mrca)\n{\n    int ret = 0;\n    double tu, tv;\n    const tsk_id_t *restrict parent = self->parent;\n    const double *restrict time = self->tree_sequence->tables->nodes.time;\n\n    ret = tsk_tree_check_node(self, u);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_check_node(self, v);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* Simplest to make the virtual_root a special case here to avoid\n     * doing the time lookup. */\n    if (u == self->virtual_root || v == self->virtual_root) {\n        *mrca = self->virtual_root;\n        return 0;\n    }\n\n    tu = time[u];\n    tv = time[v];\n    while (u != v) {\n        if (tu < tv) {\n            u = parent[u];\n            if (u == TSK_NULL) {\n                break;\n            }\n            tu = time[u];\n        } else {\n            v = parent[v];\n            if (v == TSK_NULL) {\n                break;\n            }\n            tv = time[v];\n        }\n    }\n    *mrca = u == v ? u : TSK_NULL;\nout:\n    return ret;\n}\n\nstatic int\ntsk_tree_get_num_samples_by_traversal(\n    const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_samples)\n{\n    int ret = 0;\n    tsk_size_t num_nodes, j;\n    tsk_size_t count = 0;\n    const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;\n    tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));\n    tsk_id_t v;\n\n    if (nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_tree_preorder_from(self, u, nodes, &num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_nodes; j++) {\n        v = nodes[j];\n        if (flags[v] & TSK_NODE_IS_SAMPLE) {\n            count++;\n        }\n    }\n    *num_samples = count;\nout:\n    tsk_safe_free(nodes);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_num_samples(const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_samples)\n{\n    int ret = 0;\n\n    ret = tsk_tree_check_node(self, u);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {\n        *num_samples = (tsk_size_t) self->num_samples[u];\n    } else {\n        ret = tsk_tree_get_num_samples_by_traversal(self, u, num_samples);\n    }\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_num_tracked_samples(\n    const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_tracked_samples)\n{\n    int ret = 0;\n\n    ret = tsk_tree_check_node(self, u);\n    if (ret != 0) {\n        goto out;\n    }\n    if (self->options & TSK_NO_SAMPLE_COUNTS) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n        goto out;\n    }\n    *num_tracked_samples = self->num_tracked_samples[u];\nout:\n    return ret;\n}\n\nbool\ntsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u)\n{\n    return tsk_treeseq_is_sample(self->tree_sequence, u);\n}\n\ntsk_id_t\ntsk_tree_get_left_root(const tsk_tree_t *self)\n{\n    return self->left_child[self->virtual_root];\n}\n\ntsk_id_t\ntsk_tree_get_right_root(const tsk_tree_t *self)\n{\n    return self->right_child[self->virtual_root];\n}\n\ntsk_size_t\ntsk_tree_get_num_roots(const tsk_tree_t *self)\n{\n    return (tsk_size_t) self->num_children[self->virtual_root];\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_parent(const tsk_tree_t *self, tsk_id_t u, tsk_id_t *parent)\n{\n    int ret = 0;\n\n    ret = tsk_tree_check_node(self, u);\n    if (ret != 0) {\n        goto out;\n    }\n    *parent = self->parent[u];\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_time(const tsk_tree_t *self, tsk_id_t u, double *t)\n{\n    int ret = 0;\n    tsk_node_t node;\n\n    if (u == self->virtual_root) {\n        *t = INFINITY;\n    } else {\n        ret = tsk_treeseq_get_node(self->tree_sequence, u, &node);\n        if (ret != 0) {\n            goto out;\n        }\n        *t = node.time;\n    }\nout:\n    return ret;\n}\n\nstatic inline double\ntsk_tree_get_branch_length_unsafe(const tsk_tree_t *self, tsk_id_t u)\n{\n    const double *times = self->tree_sequence->tables->nodes.time;\n    const tsk_id_t parent = self->parent[u];\n\n    return parent == TSK_NULL ? 0 : times[parent] - times[u];\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_branch_length(const tsk_tree_t *self, tsk_id_t u, double *ret_branch_length)\n{\n    int ret = 0;\n\n    ret = tsk_tree_check_node(self, u);\n    if (ret != 0) {\n        goto out;\n    }\n    *ret_branch_length = tsk_tree_get_branch_length_unsafe(self, u);\nout:\n    return ret;\n}\n\nint\ntsk_tree_get_total_branch_length(const tsk_tree_t *self, tsk_id_t node, double *ret_tbl)\n{\n    int ret = 0;\n    tsk_size_t j, num_nodes;\n    tsk_id_t u, v;\n    const tsk_id_t *restrict parent = self->parent;\n    const double *restrict time = self->tree_sequence->tables->nodes.time;\n    tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));\n    double sum = 0;\n\n    if (nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_tree_preorder_from(self, node, nodes, &num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n    /* We always skip the first node because we don't return the branch length\n     * over the input node. */\n    for (j = 1; j < num_nodes; j++) {\n        u = nodes[j];\n        v = parent[u];\n        if (v != TSK_NULL) {\n            sum += time[v] - time[u];\n        }\n    }\n    *ret_tbl = sum;\nout:\n    tsk_safe_free(nodes);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_sites(\n    const tsk_tree_t *self, const tsk_site_t **sites, tsk_size_t *sites_length)\n{\n    *sites = self->sites;\n    *sites_length = self->sites_length;\n    return 0;\n}\n\n/* u must be a valid node in the tree. For internal use */\nstatic int\ntsk_tree_get_depth_unsafe(const tsk_tree_t *self, tsk_id_t u)\n{\n    tsk_id_t v;\n    const tsk_id_t *restrict parent = self->parent;\n    int depth = 0;\n\n    if (u == self->virtual_root) {\n        return -1;\n    }\n    for (v = parent[u]; v != TSK_NULL; v = parent[v]) {\n        depth++;\n    }\n    return depth;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_get_depth(const tsk_tree_t *self, tsk_id_t u, int *depth_ret)\n{\n    int ret = 0;\n\n    ret = tsk_tree_check_node(self, u);\n    if (ret != 0) {\n        goto out;\n    }\n\n    *depth_ret = tsk_tree_get_depth_unsafe(self, u);\nout:\n    return ret;\n}\n\nstatic tsk_id_t\ntsk_tree_node_root(tsk_tree_t *self, tsk_id_t u)\n{\n    tsk_id_t v = u;\n    while (self->parent[v] != TSK_NULL) {\n        v = self->parent[v];\n    }\n\n    return v;\n}\n\nstatic void\ntsk_tree_check_state(const tsk_tree_t *self)\n{\n    tsk_id_t u, v;\n    tsk_size_t j, num_samples;\n    int err, c;\n    tsk_site_t site;\n    tsk_id_t *children = tsk_malloc(self->num_nodes * sizeof(tsk_id_t));\n    bool *is_root = tsk_calloc(self->num_nodes, sizeof(bool));\n\n    tsk_bug_assert(children != NULL);\n\n    /* Check the virtual root properties */\n    tsk_bug_assert(self->parent[self->virtual_root] == TSK_NULL);\n    tsk_bug_assert(self->left_sib[self->virtual_root] == TSK_NULL);\n    tsk_bug_assert(self->right_sib[self->virtual_root] == TSK_NULL);\n\n    for (j = 0; j < self->tree_sequence->num_samples; j++) {\n        u = self->samples[j];\n        while (self->parent[u] != TSK_NULL) {\n            u = self->parent[u];\n        }\n        is_root[u] = true;\n    }\n    if (self->tree_sequence->num_samples == 0) {\n        tsk_bug_assert(self->left_child[self->virtual_root] == TSK_NULL);\n    }\n\n    /* Iterate over the roots and make sure they are set */\n    for (u = tsk_tree_get_left_root(self); u != TSK_NULL; u = self->right_sib[u]) {\n        tsk_bug_assert(is_root[u]);\n        is_root[u] = false;\n    }\n    for (u = 0; u < (tsk_id_t) self->num_nodes; u++) {\n        tsk_bug_assert(!is_root[u]);\n        c = 0;\n        for (v = self->left_child[u]; v != TSK_NULL; v = self->right_sib[v]) {\n            tsk_bug_assert(self->parent[v] == u);\n            children[c] = v;\n            c++;\n        }\n        for (v = self->right_child[u]; v != TSK_NULL; v = self->left_sib[v]) {\n            tsk_bug_assert(c > 0);\n            c--;\n            tsk_bug_assert(v == children[c]);\n        }\n    }\n    for (j = 0; j < self->sites_length; j++) {\n        site = self->sites[j];\n        tsk_bug_assert(self->interval.left <= site.position);\n        tsk_bug_assert(site.position < self->interval.right);\n    }\n\n    if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {\n        tsk_bug_assert(self->num_samples != NULL);\n        tsk_bug_assert(self->num_tracked_samples != NULL);\n        for (u = 0; u < (tsk_id_t) self->num_nodes; u++) {\n            err = tsk_tree_get_num_samples_by_traversal(self, u, &num_samples);\n            tsk_bug_assert(err == 0);\n            tsk_bug_assert(num_samples == (tsk_size_t) self->num_samples[u]);\n        }\n    } else {\n        tsk_bug_assert(self->num_samples == NULL);\n        tsk_bug_assert(self->num_tracked_samples == NULL);\n    }\n    if (self->options & TSK_SAMPLE_LISTS) {\n        tsk_bug_assert(self->right_sample != NULL);\n        tsk_bug_assert(self->left_sample != NULL);\n        tsk_bug_assert(self->next_sample != NULL);\n    } else {\n        tsk_bug_assert(self->right_sample == NULL);\n        tsk_bug_assert(self->left_sample == NULL);\n        tsk_bug_assert(self->next_sample == NULL);\n    }\n\n    free(children);\n    free(is_root);\n}\n\nvoid\ntsk_tree_print_state(const tsk_tree_t *self, FILE *out)\n{\n    tsk_size_t j;\n    tsk_site_t site;\n\n    fprintf(out, \"Tree state:\\n\");\n    fprintf(out, \"options = %d\\n\", self->options);\n    fprintf(out, \"root_threshold = %lld\\n\", (long long) self->root_threshold);\n    fprintf(out, \"left = %f\\n\", self->interval.left);\n    fprintf(out, \"right = %f\\n\", self->interval.right);\n    fprintf(out, \"index = %lld\\n\", (long long) self->index);\n    fprintf(out, \"num_edges = %d\\n\", (int) self->num_edges);\n    fprintf(out, \"node\\tedge\\tparent\\tlchild\\trchild\\tlsib\\trsib\");\n    if (self->options & TSK_SAMPLE_LISTS) {\n        fprintf(out, \"\\thead\\ttail\");\n    }\n    fprintf(out, \"\\n\");\n\n    for (j = 0; j < self->num_nodes + 1; j++) {\n        fprintf(out, \"%lld\\t%lld\\t%lld\\t%lld\\t%lld\\t%lld\\t%lld\", (long long) j,\n            (long long) self->edge[j], (long long) self->parent[j],\n            (long long) self->left_child[j], (long long) self->right_child[j],\n            (long long) self->left_sib[j], (long long) self->right_sib[j]);\n        if (self->options & TSK_SAMPLE_LISTS) {\n            fprintf(out, \"\\t%lld\\t%lld\\t\", (long long) self->left_sample[j],\n                (long long) self->right_sample[j]);\n        }\n        if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {\n            fprintf(out, \"\\t%lld\\t%lld\", (long long) self->num_samples[j],\n                (long long) self->num_tracked_samples[j]);\n        }\n        fprintf(out, \"\\n\");\n    }\n    fprintf(out, \"sites = \\n\");\n    for (j = 0; j < self->sites_length; j++) {\n        site = self->sites[j];\n        fprintf(out, \"\\t%lld\\t%f\\n\", (long long) site.id, site.position);\n    }\n    tsk_tree_check_state(self);\n}\n\n/* Methods for positioning the tree along the sequence */\n\n/* The following methods are performance sensitive and so we use a\n * lot of restrict pointers. Because we are saying that we don't have\n * any aliases to these pointers, we pass around the reference to parent\n * since it's used in all the functions. */\nstatic inline void\ntsk_tree_update_sample_lists(\n    tsk_tree_t *self, tsk_id_t node, const tsk_id_t *restrict parent)\n{\n    tsk_id_t u, v, sample_index;\n    tsk_id_t *restrict left_child = self->left_child;\n    tsk_id_t *restrict right_sib = self->right_sib;\n    tsk_id_t *restrict left = self->left_sample;\n    tsk_id_t *restrict right = self->right_sample;\n    tsk_id_t *restrict next = self->next_sample;\n    const tsk_id_t *restrict sample_index_map = self->tree_sequence->sample_index_map;\n\n    for (u = node; u != TSK_NULL; u = parent[u]) {\n        sample_index = sample_index_map[u];\n        if (sample_index != TSK_NULL) {\n            right[u] = left[u];\n        } else {\n            left[u] = TSK_NULL;\n            right[u] = TSK_NULL;\n        }\n        for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {\n            if (left[v] != TSK_NULL) {\n                tsk_bug_assert(right[v] != TSK_NULL);\n                if (left[u] == TSK_NULL) {\n                    left[u] = left[v];\n                    right[u] = right[v];\n                } else {\n                    next[right[u]] = left[v];\n                    right[u] = right[v];\n                }\n            }\n        }\n    }\n}\n\nstatic inline void\ntsk_tree_remove_branch(\n    tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t *restrict parent)\n{\n    tsk_id_t *restrict left_child = self->left_child;\n    tsk_id_t *restrict right_child = self->right_child;\n    tsk_id_t *restrict left_sib = self->left_sib;\n    tsk_id_t *restrict right_sib = self->right_sib;\n    tsk_id_t *restrict num_children = self->num_children;\n    tsk_id_t lsib = left_sib[c];\n    tsk_id_t rsib = right_sib[c];\n\n    if (lsib == TSK_NULL) {\n        left_child[p] = rsib;\n    } else {\n        right_sib[lsib] = rsib;\n    }\n    if (rsib == TSK_NULL) {\n        right_child[p] = lsib;\n    } else {\n        left_sib[rsib] = lsib;\n    }\n    parent[c] = TSK_NULL;\n    left_sib[c] = TSK_NULL;\n    right_sib[c] = TSK_NULL;\n    num_children[p]--;\n}\n\nstatic inline void\ntsk_tree_insert_branch(\n    tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t *restrict parent)\n{\n    tsk_id_t *restrict left_child = self->left_child;\n    tsk_id_t *restrict right_child = self->right_child;\n    tsk_id_t *restrict left_sib = self->left_sib;\n    tsk_id_t *restrict right_sib = self->right_sib;\n    tsk_id_t *restrict num_children = self->num_children;\n    tsk_id_t u;\n\n    parent[c] = p;\n    u = right_child[p];\n    if (u == TSK_NULL) {\n        left_child[p] = c;\n        left_sib[c] = TSK_NULL;\n        right_sib[c] = TSK_NULL;\n    } else {\n        right_sib[u] = c;\n        left_sib[c] = u;\n        right_sib[c] = TSK_NULL;\n    }\n    right_child[p] = c;\n    num_children[p]++;\n}\n\nstatic inline void\ntsk_tree_insert_root(tsk_tree_t *self, tsk_id_t root, tsk_id_t *restrict parent)\n{\n    tsk_tree_insert_branch(self, self->virtual_root, root, parent);\n    parent[root] = TSK_NULL;\n}\n\nstatic inline void\ntsk_tree_remove_root(tsk_tree_t *self, tsk_id_t root, tsk_id_t *restrict parent)\n{\n    tsk_tree_remove_branch(self, self->virtual_root, root, parent);\n}\n\nstatic void\ntsk_tree_remove_edge(\n    tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t TSK_UNUSED(edge_id))\n{\n    tsk_id_t *restrict parent = self->parent;\n    tsk_size_t *restrict num_samples = self->num_samples;\n    tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples;\n    tsk_id_t *restrict edge = self->edge;\n    const tsk_size_t root_threshold = self->root_threshold;\n    tsk_id_t u;\n    tsk_id_t path_end = TSK_NULL;\n    bool path_end_was_root = false;\n\n#define POTENTIAL_ROOT(U) (num_samples[U] >= root_threshold)\n\n    tsk_tree_remove_branch(self, p, c, parent);\n    self->num_edges--;\n    edge[c] = TSK_NULL;\n\n    if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {\n        u = p;\n        while (u != TSK_NULL) {\n            path_end = u;\n            path_end_was_root = POTENTIAL_ROOT(u);\n            num_samples[u] -= num_samples[c];\n            num_tracked_samples[u] -= num_tracked_samples[c];\n            u = parent[u];\n        }\n\n        if (path_end_was_root && !POTENTIAL_ROOT(path_end)) {\n            tsk_tree_remove_root(self, path_end, parent);\n        }\n        if (POTENTIAL_ROOT(c)) {\n            tsk_tree_insert_root(self, c, parent);\n        }\n    }\n\n    if (self->options & TSK_SAMPLE_LISTS) {\n        tsk_tree_update_sample_lists(self, p, parent);\n    }\n}\n\nstatic void\ntsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t edge_id)\n{\n    tsk_id_t *restrict parent = self->parent;\n    tsk_size_t *restrict num_samples = self->num_samples;\n    tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples;\n    tsk_id_t *restrict edge = self->edge;\n    const tsk_size_t root_threshold = self->root_threshold;\n    tsk_id_t u;\n    tsk_id_t path_end = TSK_NULL;\n    bool path_end_was_root = false;\n\n#define POTENTIAL_ROOT(U) (num_samples[U] >= root_threshold)\n\n    if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {\n        u = p;\n        while (u != TSK_NULL) {\n            path_end = u;\n            path_end_was_root = POTENTIAL_ROOT(u);\n            num_samples[u] += num_samples[c];\n            num_tracked_samples[u] += num_tracked_samples[c];\n            u = parent[u];\n        }\n\n        if (POTENTIAL_ROOT(c)) {\n            tsk_tree_remove_root(self, c, parent);\n        }\n        if (POTENTIAL_ROOT(path_end) && !path_end_was_root) {\n            tsk_tree_insert_root(self, path_end, parent);\n        }\n    }\n\n    tsk_tree_insert_branch(self, p, c, parent);\n    self->num_edges++;\n    edge[c] = edge_id;\n\n    if (self->options & TSK_SAMPLE_LISTS) {\n        tsk_tree_update_sample_lists(self, p, parent);\n    }\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_first(tsk_tree_t *self)\n{\n    int ret = TSK_TREE_OK;\n\n    ret = tsk_tree_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_next(self);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_last(tsk_tree_t *self)\n{\n    int ret = TSK_TREE_OK;\n\n    ret = tsk_tree_clear(self);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_tree_prev(self);\nout:\n    return ret;\n}\n\nstatic void\ntsk_tree_update_index_and_interval(tsk_tree_t *self)\n{\n    tsk_table_collection_t *tables = self->tree_sequence->tables;\n\n    self->index = self->tree_pos.index;\n    self->interval.left = self->tree_pos.interval.left;\n    self->interval.right = self->tree_pos.interval.right;\n\n    if (tables->sites.num_rows > 0) {\n        self->sites = self->tree_sequence->tree_sites[self->index];\n        self->sites_length = self->tree_sequence->tree_sites_length[self->index];\n    }\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_next(tsk_tree_t *self)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t *restrict edge_parent = tables->edges.parent;\n    const tsk_id_t *restrict edge_child = tables->edges.child;\n    tsk_id_t j, e;\n    tsk_tree_position_t tree_pos;\n    bool valid;\n\n    valid = tsk_tree_position_next(&self->tree_pos);\n    tree_pos = self->tree_pos;\n\n    if (valid) {\n        for (j = tree_pos.out.start; j != tree_pos.out.stop; j++) {\n            e = tree_pos.out.order[j];\n            tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);\n        }\n\n        for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {\n            e = tree_pos.in.order[j];\n            tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);\n        }\n        ret = TSK_TREE_OK;\n        tsk_tree_update_index_and_interval(self);\n    } else {\n        ret = tsk_tree_clear(self);\n    }\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_prev(tsk_tree_t *self)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t *restrict edge_parent = tables->edges.parent;\n    const tsk_id_t *restrict edge_child = tables->edges.child;\n    tsk_id_t j, e;\n    tsk_tree_position_t tree_pos;\n    bool valid;\n\n    valid = tsk_tree_position_prev(&self->tree_pos);\n    tree_pos = self->tree_pos;\n\n    if (valid) {\n        for (j = tree_pos.out.start; j != tree_pos.out.stop; j--) {\n            e = tree_pos.out.order[j];\n            tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);\n        }\n\n        for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {\n            e = tree_pos.in.order[j];\n            tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);\n        }\n        ret = TSK_TREE_OK;\n        tsk_tree_update_index_and_interval(self);\n    } else {\n        ret = tsk_tree_clear(self);\n    }\n    return ret;\n}\n\nstatic inline bool\ntsk_tree_position_in_interval(const tsk_tree_t *self, double x)\n{\n    return self->interval.left <= x && x < self->interval.right;\n}\n\nstatic int\ntsk_tree_seek_from_null(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t *restrict edge_parent = tables->edges.parent;\n    const tsk_id_t *restrict edge_child = tables->edges.child;\n    const double *restrict edge_left = tables->edges.left;\n    const double *restrict edge_right = tables->edges.right;\n    double interval_left, interval_right;\n    const double *restrict breakpoints = self->tree_sequence->breakpoints;\n    const tsk_size_t num_trees = self->tree_sequence->num_trees;\n    const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);\n    tsk_id_t j, e, index;\n    tsk_tree_position_t tree_pos;\n\n    index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, x);\n    if (breakpoints[index] > x) {\n        index--;\n    }\n\n    if (x <= L / 2.0) {\n        ret = tsk_tree_position_seek_forward(&self->tree_pos, index);\n        if (ret != 0) {\n            goto out;\n        }\n        // Since we are seeking from null, there are no edges to remove\n        tree_pos = self->tree_pos;\n        interval_left = tree_pos.interval.left;\n        for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {\n            e = tree_pos.in.order[j];\n            if (edge_left[e] <= interval_left && interval_left < edge_right[e]) {\n                tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);\n            }\n        }\n    } else {\n        ret = tsk_tree_position_seek_backward(&self->tree_pos, index);\n        if (ret != 0) {\n            goto out;\n        }\n        tree_pos = self->tree_pos;\n        interval_right = tree_pos.interval.right;\n        for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {\n            e = tree_pos.in.order[j];\n            if (edge_right[e] >= interval_right && interval_right > edge_left[e]) {\n                tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);\n            }\n        }\n    }\n    tsk_tree_update_index_and_interval(self);\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_tree_seek_forward(tsk_tree_t *self, tsk_id_t index)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t *restrict edge_parent = tables->edges.parent;\n    const tsk_id_t *restrict edge_child = tables->edges.child;\n    const double *restrict edge_left = tables->edges.left;\n    const double *restrict edge_right = tables->edges.right;\n    double interval_left, e_left;\n    const double old_right = self->interval.right;\n    tsk_id_t j, e;\n    tsk_tree_position_t tree_pos;\n\n    ret = tsk_tree_position_seek_forward(&self->tree_pos, index);\n    if (ret != 0) {\n        goto out;\n    }\n    tree_pos = self->tree_pos;\n    interval_left = tree_pos.interval.left;\n\n    for (j = tree_pos.out.start; j != tree_pos.out.stop; j++) {\n        e = tree_pos.out.order[j];\n        e_left = edge_left[e];\n        if (e_left < old_right) {\n            tsk_bug_assert(edge_parent[e] != TSK_NULL);\n            tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);\n        }\n        tsk_bug_assert(e_left < interval_left);\n    }\n\n    for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {\n        e = tree_pos.in.order[j];\n        if (edge_left[e] <= interval_left && interval_left < edge_right[e]) {\n            tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);\n        }\n    }\n    tsk_tree_update_index_and_interval(self);\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_tree_seek_backward(tsk_tree_t *self, tsk_id_t index)\n{\n    int ret = 0;\n    tsk_table_collection_t *tables = self->tree_sequence->tables;\n    const tsk_id_t *restrict edge_parent = tables->edges.parent;\n    const tsk_id_t *restrict edge_child = tables->edges.child;\n    const double *restrict edge_left = tables->edges.left;\n    const double *restrict edge_right = tables->edges.right;\n    double interval_right, e_right;\n    const double old_right = self->interval.right;\n    tsk_id_t j, e;\n    tsk_tree_position_t tree_pos;\n\n    ret = tsk_tree_position_seek_backward(&self->tree_pos, index);\n    if (ret != 0) {\n        goto out;\n    }\n    tree_pos = self->tree_pos;\n    interval_right = tree_pos.interval.right;\n\n    for (j = tree_pos.out.start; j != tree_pos.out.stop; j--) {\n        e = tree_pos.out.order[j];\n        e_right = edge_right[e];\n        if (e_right >= old_right) {\n            tsk_bug_assert(edge_parent[e] != TSK_NULL);\n            tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);\n        }\n        tsk_bug_assert(e_right > interval_right);\n    }\n\n    for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {\n        e = tree_pos.in.order[j];\n        if (edge_right[e] >= interval_right && interval_right > edge_left[e]) {\n            tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);\n        }\n    }\n    tsk_tree_update_index_and_interval(self);\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_seek_index(tsk_tree_t *self, tsk_id_t tree, tsk_flags_t options)\n{\n    int ret = 0;\n    double x;\n\n    if (tree < 0 || tree >= (tsk_id_t) self->tree_sequence->num_trees) {\n        ret = tsk_trace_error(TSK_ERR_SEEK_OUT_OF_BOUNDS);\n        goto out;\n    }\n    x = self->tree_sequence->breakpoints[tree];\n    ret = tsk_tree_seek(self, x, options);\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_tree_seek_linear(tsk_tree_t *self, double x)\n{\n    const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);\n    const double t_l = self->interval.left;\n    const double t_r = self->interval.right;\n    int ret = 0;\n    double distance_left, distance_right;\n\n    if (x < t_l) {\n        /* |-----|-----|========|---------| */\n        /* 0     x    t_l      t_r        L */\n        distance_left = t_l - x;\n        distance_right = L - t_r + x;\n    } else {\n        /* |------|========|------|-------| */\n        /* 0     t_l      t_r     x       L */\n        distance_right = x - t_r;\n        distance_left = t_l + L - x;\n    }\n    if (distance_right <= distance_left) {\n        while (!tsk_tree_position_in_interval(self, x)) {\n            ret = tsk_tree_next(self);\n            if (ret < 0) {\n                goto out;\n            }\n        }\n    } else {\n        while (!tsk_tree_position_in_interval(self, x)) {\n            ret = tsk_tree_prev(self);\n            if (ret < 0) {\n                goto out;\n            }\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int TSK_WARN_UNUSED\ntsk_tree_seek_skip(tsk_tree_t *self, double x)\n{\n    const double t_l = self->interval.left;\n    int ret = 0;\n    tsk_id_t index;\n    const tsk_size_t num_trees = self->tree_sequence->num_trees;\n    const double *restrict breakpoints = self->tree_sequence->breakpoints;\n\n    index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, x);\n    if (breakpoints[index] > x) {\n        index--;\n    }\n\n    if (x < t_l) {\n        ret = tsk_tree_seek_backward(self, index);\n    } else {\n        ret = tsk_tree_seek_forward(self, index);\n    }\n    tsk_bug_assert(tsk_tree_position_in_interval(self, x));\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t options)\n{\n    int ret = 0;\n    const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);\n\n    if (x < 0 || x >= L) {\n        ret = tsk_trace_error(TSK_ERR_SEEK_OUT_OF_BOUNDS);\n        goto out;\n    }\n\n    if (self->index == -1) {\n        ret = tsk_tree_seek_from_null(self, x, options);\n    } else {\n        if (options & TSK_SEEK_SKIP) {\n            ret = tsk_tree_seek_skip(self, x);\n        } else {\n            ret = tsk_tree_seek_linear(self, x);\n        }\n    }\n\nout:\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_tree_clear(tsk_tree_t *self)\n{\n    int ret = 0;\n    tsk_size_t j;\n    tsk_id_t u;\n    const tsk_size_t N = self->num_nodes + 1;\n    const tsk_size_t num_samples = self->tree_sequence->num_samples;\n    const bool sample_counts = !(self->options & TSK_NO_SAMPLE_COUNTS);\n    const bool sample_lists = !!(self->options & TSK_SAMPLE_LISTS);\n    const tsk_flags_t *flags = self->tree_sequence->tables->nodes.flags;\n\n    self->interval.left = 0;\n    self->interval.right = 0;\n    self->num_edges = 0;\n    self->index = -1;\n    tsk_tree_position_set_null(&self->tree_pos);\n    /* TODO we should profile this method to see if just doing a single loop over\n     * the nodes would be more efficient than multiple memsets.\n     */\n    tsk_memset(self->parent, 0xff, N * sizeof(*self->parent));\n    tsk_memset(self->left_child, 0xff, N * sizeof(*self->left_child));\n    tsk_memset(self->right_child, 0xff, N * sizeof(*self->right_child));\n    tsk_memset(self->left_sib, 0xff, N * sizeof(*self->left_sib));\n    tsk_memset(self->right_sib, 0xff, N * sizeof(*self->right_sib));\n    tsk_memset(self->num_children, 0, N * sizeof(*self->num_children));\n    tsk_memset(self->edge, 0xff, N * sizeof(*self->edge));\n\n    if (sample_counts) {\n        tsk_memset(self->num_samples, 0, N * sizeof(*self->num_samples));\n        /* We can't reset the tracked samples via memset because we don't\n         * know where the tracked samples are.\n         */\n        for (j = 0; j < self->num_nodes; j++) {\n            if (!(flags[j] & TSK_NODE_IS_SAMPLE)) {\n                self->num_tracked_samples[j] = 0;\n            }\n        }\n        /* The total tracked_samples gets set in set_tracked_samples */\n        self->num_samples[self->virtual_root] = num_samples;\n    }\n    if (sample_lists) {\n        tsk_memset(self->left_sample, 0xff, N * sizeof(tsk_id_t));\n        tsk_memset(self->right_sample, 0xff, N * sizeof(tsk_id_t));\n        tsk_memset(self->next_sample, 0xff, num_samples * sizeof(tsk_id_t));\n    }\n    /* Set the sample attributes */\n    for (j = 0; j < num_samples; j++) {\n        u = self->samples[j];\n        if (sample_counts) {\n            self->num_samples[u] = 1;\n        }\n        if (sample_lists) {\n            /* We are mapping to *indexes* into the list of samples here */\n            self->left_sample[u] = (tsk_id_t) j;\n            self->right_sample[u] = (tsk_id_t) j;\n        }\n    }\n    if (sample_counts && self->root_threshold == 1 && num_samples > 0) {\n        for (j = 0; j < num_samples; j++) {\n            /* Set initial roots */\n            if (self->root_threshold == 1) {\n                tsk_tree_insert_root(self, self->samples[j], self->parent);\n            }\n        }\n    }\n    return ret;\n}\n\ntsk_size_t\ntsk_tree_get_size_bound(const tsk_tree_t *self)\n{\n    tsk_size_t bound = 0;\n\n    if (self->tree_sequence != NULL) {\n        /* This is a safe upper bound which can be computed cheaply.\n         * We have at most n roots and each edge adds at most one new\n         * node to the tree. We also allow space for the virtual root,\n         * to simplify client code.\n         *\n         * In the common case of a binary tree with a single root, we have\n         * 2n - 1 nodes in total, and 2n - 2 edges. Therefore, we return\n         * 3n - 1, which is an over-estimate of 1/2 and we allocate\n         * 1.5 times as much memory as we need.\n         *\n         * Since tracking the exact number of nodes in the tree would require\n         * storing the number of nodes beneath every node and complicate\n         * the tree transition method, this seems like a good compromise\n         * and will result in less memory usage overall in nearly all cases.\n         */\n        bound = 1 + self->tree_sequence->num_samples + self->num_edges;\n    }\n    return bound;\n}\n\n/* Traversal orders */\nstatic tsk_id_t *\ntsk_tree_alloc_node_stack(const tsk_tree_t *self)\n{\n    return tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(tsk_id_t));\n}\n\nint\ntsk_tree_preorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)\n{\n    return tsk_tree_preorder_from(self, -1, nodes, num_nodes_ret);\n}\n\nint\ntsk_tree_preorder_from(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)\n{\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    tsk_id_t *stack = tsk_tree_alloc_node_stack(self);\n    tsk_size_t num_nodes = 0;\n    tsk_id_t u, v;\n    int stack_top;\n\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    if ((root == -1 || root == self->virtual_root)\n        && !tsk_tree_has_sample_counts(self)) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n        goto out;\n    }\n    if (root == -1) {\n        stack_top = -1;\n        for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {\n            stack_top++;\n            stack[stack_top] = u;\n        }\n    } else {\n        ret = tsk_tree_check_node(self, root);\n        if (ret != 0) {\n            goto out;\n        }\n        stack_top = 0;\n        stack[stack_top] = root;\n    }\n\n    while (stack_top >= 0) {\n        u = stack[stack_top];\n        stack_top--;\n        nodes[num_nodes] = u;\n        num_nodes++;\n        for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {\n            stack_top++;\n            stack[stack_top] = v;\n        }\n    }\n    *num_nodes_ret = num_nodes;\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\n/* We could implement this using the preorder function, but since it's\n * going to be performance critical we want to avoid the overhead\n * of mallocing the intermediate node list (which will be bigger than\n * the number of samples). */\nint\ntsk_tree_preorder_samples_from(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)\n{\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;\n    tsk_id_t *stack = tsk_tree_alloc_node_stack(self);\n    tsk_size_t num_nodes = 0;\n    tsk_id_t u, v;\n    int stack_top;\n\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    /* We could push the virtual_root onto the stack directly to simplify\n     * the code a little, but then we'd have to check put an extra check\n     * when looking up the flags array (which isn't defined for virtual_root).\n     */\n    if (root == -1 || root == self->virtual_root) {\n        if (!tsk_tree_has_sample_counts(self)) {\n            ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n            goto out;\n        }\n        stack_top = -1;\n        for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {\n            stack_top++;\n            stack[stack_top] = u;\n        }\n    } else {\n        ret = tsk_tree_check_node(self, root);\n        if (ret != 0) {\n            goto out;\n        }\n        stack_top = 0;\n        stack[stack_top] = root;\n    }\n\n    while (stack_top >= 0) {\n        u = stack[stack_top];\n        stack_top--;\n        if (flags[u] & TSK_NODE_IS_SAMPLE) {\n            nodes[num_nodes] = u;\n            num_nodes++;\n        }\n        for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {\n            stack_top++;\n            stack[stack_top] = v;\n        }\n    }\n    *num_nodes_ret = num_nodes;\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\nint\ntsk_tree_postorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)\n{\n    return tsk_tree_postorder_from(self, -1, nodes, num_nodes_ret);\n}\nint\ntsk_tree_postorder_from(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)\n{\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    const tsk_id_t *restrict parent = self->parent;\n    tsk_id_t *stack = tsk_tree_alloc_node_stack(self);\n    tsk_size_t num_nodes = 0;\n    tsk_id_t u, v, postorder_parent;\n    int stack_top;\n    bool is_virtual_root = root == self->virtual_root;\n\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    if (root == -1 || is_virtual_root) {\n        if (!tsk_tree_has_sample_counts(self)) {\n            ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);\n            goto out;\n        }\n        stack_top = -1;\n        for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {\n            stack_top++;\n            stack[stack_top] = u;\n        }\n    } else {\n        ret = tsk_tree_check_node(self, root);\n        if (ret != 0) {\n            goto out;\n        }\n        stack_top = 0;\n        stack[stack_top] = root;\n    }\n\n    postorder_parent = TSK_NULL;\n    while (stack_top >= 0) {\n        u = stack[stack_top];\n        if (right_child[u] != TSK_NULL && u != postorder_parent) {\n            for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {\n                stack_top++;\n                stack[stack_top] = v;\n            }\n        } else {\n            stack_top--;\n            postorder_parent = parent[u];\n            nodes[num_nodes] = u;\n            num_nodes++;\n        }\n    }\n    if (is_virtual_root) {\n        nodes[num_nodes] = root;\n        num_nodes++;\n    }\n    *num_nodes_ret = num_nodes;\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\n/* Balance/imbalance metrics */\n\n/* Result is a tsk_size_t value here because we could imagine the total\n * depth overflowing a 32bit integer for a large tree. */\nint\ntsk_tree_sackin_index(const tsk_tree_t *self, tsk_size_t *result)\n{\n    /* Keep the size of the stack elements to 8 bytes in total in the\n     * standard case. A tsk_id_t depth value is always safe, since\n     * depth counts the number of nodes encountered on a path.\n     */\n    struct stack_elem {\n        tsk_id_t node;\n        tsk_id_t depth;\n    };\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    struct stack_elem *stack\n        = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*stack));\n    int stack_top;\n    tsk_size_t total_depth;\n    tsk_id_t u;\n    struct stack_elem s = { .node = TSK_NULL, .depth = 0 };\n\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    stack_top = -1;\n    for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {\n        stack_top++;\n        s.node = u;\n        stack[stack_top] = s;\n    }\n    total_depth = 0;\n    while (stack_top >= 0) {\n        s = stack[stack_top];\n        stack_top--;\n        u = right_child[s.node];\n        if (u == TSK_NULL) {\n            total_depth += (tsk_size_t) s.depth;\n        } else {\n            s.depth++;\n            while (u != TSK_NULL) {\n                stack_top++;\n                s.node = u;\n                stack[stack_top] = s;\n                u = left_sib[u];\n            }\n        }\n    }\n    *result = total_depth;\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\nint\ntsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result)\n{\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));\n    tsk_id_t *num_leaves = tsk_calloc(self->num_nodes, sizeof(*num_leaves));\n    tsk_size_t j, num_nodes, total;\n    tsk_id_t num_children, u, v;\n\n    if (nodes == NULL || num_leaves == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (tsk_tree_get_num_roots(self) != 1) {\n        ret = tsk_trace_error(TSK_ERR_UNDEFINED_MULTIROOT);\n        goto out;\n    }\n    ret = tsk_tree_postorder(self, nodes, &num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n\n    total = 0;\n    for (j = 0; j < num_nodes; j++) {\n        u = nodes[j];\n        /* Cheaper to compute this on the fly than to access the num_children array.\n         * since we're already iterating over the children. */\n        num_children = 0;\n        for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {\n            num_children++;\n            num_leaves[u] += num_leaves[v];\n        }\n        if (num_children == 0) {\n            num_leaves[u] = 1;\n        } else if (num_children == 2) {\n            v = right_child[u];\n            total += (tsk_size_t) llabs(num_leaves[v] - num_leaves[left_sib[v]]);\n        } else {\n            ret = tsk_trace_error(TSK_ERR_UNDEFINED_NONBINARY);\n            goto out;\n        }\n    }\n    *result = total;\nout:\n    tsk_safe_free(nodes);\n    tsk_safe_free(num_leaves);\n    return ret;\n}\n\nint\ntsk_tree_b1_index(const tsk_tree_t *self, double *result)\n{\n    int ret = 0;\n    const tsk_id_t *restrict parent = self->parent;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));\n    tsk_size_t *max_path_length = tsk_calloc(self->num_nodes, sizeof(*max_path_length));\n    tsk_size_t j, num_nodes, mpl;\n    double total = 0.0;\n    tsk_id_t u, v;\n\n    if (nodes == NULL || max_path_length == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = tsk_tree_postorder(self, nodes, &num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (j = 0; j < num_nodes; j++) {\n        u = nodes[j];\n        if (parent[u] != TSK_NULL && right_child[u] != TSK_NULL) {\n            mpl = 0;\n            for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {\n                mpl = TSK_MAX(mpl, max_path_length[v]);\n            }\n            max_path_length[u] = mpl + 1;\n            total += 1 / (double) max_path_length[u];\n        }\n    }\n    *result = total;\nout:\n    tsk_safe_free(nodes);\n    tsk_safe_free(max_path_length);\n    return ret;\n}\n\nstatic double\ngeneral_log(double x, double base)\n{\n    return log(x) / log(base);\n}\n\nint\ntsk_tree_b2_index(const tsk_tree_t *self, double base, double *result)\n{\n    struct stack_elem {\n        tsk_id_t node;\n        double path_product;\n    };\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    struct stack_elem *stack\n        = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*stack));\n    int stack_top;\n    double total_proba = 0;\n    double num_children;\n    tsk_id_t u;\n    struct stack_elem s = { .node = TSK_NULL, .path_product = 1 };\n\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (tsk_tree_get_num_roots(self) != 1) {\n        ret = tsk_trace_error(TSK_ERR_UNDEFINED_MULTIROOT);\n        goto out;\n    }\n\n    stack_top = 0;\n    s.node = tsk_tree_get_left_root(self);\n    stack[stack_top] = s;\n\n    while (stack_top >= 0) {\n        s = stack[stack_top];\n        stack_top--;\n        u = right_child[s.node];\n        if (u == TSK_NULL) {\n            total_proba -= s.path_product * general_log(s.path_product, base);\n        } else {\n            num_children = 0;\n            for (; u != TSK_NULL; u = left_sib[u]) {\n                num_children++;\n            }\n            s.path_product *= 1 / num_children;\n            for (u = right_child[s.node]; u != TSK_NULL; u = left_sib[u]) {\n                stack_top++;\n                s.node = u;\n                stack[stack_top] = s;\n            }\n        }\n    }\n    *result = total_proba;\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\nint\ntsk_tree_num_lineages(const tsk_tree_t *self, double t, tsk_size_t *result)\n{\n    int ret = 0;\n    const tsk_id_t *restrict right_child = self->right_child;\n    const tsk_id_t *restrict left_sib = self->left_sib;\n    const double *restrict time = self->tree_sequence->tables->nodes.time;\n    tsk_id_t *stack = tsk_tree_alloc_node_stack(self);\n    tsk_size_t num_lineages = 0;\n    int stack_top;\n    tsk_id_t u, v;\n    double child_time, parent_time;\n\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (!tsk_isfinite(t)) {\n        ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);\n        goto out;\n    }\n    /* Push the roots onto the stack */\n    stack_top = -1;\n    for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {\n        stack_top++;\n        stack[stack_top] = u;\n    }\n\n    while (stack_top >= 0) {\n        u = stack[stack_top];\n        parent_time = time[u];\n        stack_top--;\n        for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {\n            child_time = time[v];\n            /* Only traverse down the tree as far as we need to */\n            if (child_time > t) {\n                stack_top++;\n                stack[stack_top] = v;\n            } else if (t < parent_time) {\n                num_lineages++;\n            }\n        }\n    }\n    *result = num_lineages;\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\n/* Parsimony methods */\n\nstatic inline uint64_t\nset_bit(uint64_t value, int32_t bit)\n{\n    return value | (1ULL << bit);\n}\n\nstatic inline bool\nbit_is_set(uint64_t value, int32_t bit)\n{\n    return (value & (1ULL << bit)) != 0;\n}\n\nstatic inline int8_t\nget_smallest_set_bit(uint64_t v)\n{\n    /* This is an inefficient implementation, there are several better\n     * approaches. On GCC we can use\n     * return (uint8_t) (__builtin_ffsll((long long) v) - 1);\n     */\n    uint64_t t = 1;\n    int8_t r = 0;\n\n    assert(v != 0);\n    while ((v & t) == 0) {\n        t <<= 1;\n        r++;\n    }\n    return r;\n}\n\n#define HARTIGAN_MAX_ALLELES 64\n\n/* This interface is experimental. In the future, we should provide the option to\n * use a general cost matrix, in which case we'll use the Sankoff algorithm. For\n * now this is unused.\n *\n * We should also vectorise the function so that several sites can be processed\n * at once.\n *\n * The algorithm used here is Hartigan parsimony, \"Minimum Mutation Fits to a\n * Given Tree\", Biometrics 1973.\n */\nint TSK_WARN_UNUSED\ntsk_tree_map_mutations(tsk_tree_t *self, int32_t *genotypes,\n    double *TSK_UNUSED(cost_matrix), tsk_flags_t options, int32_t *r_ancestral_state,\n    tsk_size_t *r_num_transitions, tsk_state_transition_t **r_transitions)\n{\n    int ret = 0;\n    struct stack_elem {\n        tsk_id_t node;\n        tsk_id_t transition_parent;\n        int32_t state;\n    };\n    const tsk_size_t num_samples = self->tree_sequence->num_samples;\n    const tsk_id_t *restrict left_child = self->left_child;\n    const tsk_id_t *restrict right_sib = self->right_sib;\n    const tsk_size_t N = tsk_treeseq_get_num_nodes(self->tree_sequence);\n    const tsk_flags_t *restrict node_flags = self->tree_sequence->tables->nodes.flags;\n    tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));\n    /* Note: to use less memory here and to improve cache performance we should\n     * probably change to allocating exactly the number of nodes returned by\n     * a preorder traversal, and then lay the memory out in this order. So, we'd\n     * need a map from node ID to its index in the preorder traversal, but this\n     * is trivial to compute. Probably doesn't matter so much at the moment\n     * when we're doing a single site, but it would make a big difference if\n     * we were vectorising over lots of sites. */\n    uint64_t *restrict optimal_set = tsk_calloc(N + 1, sizeof(*optimal_set));\n    struct stack_elem *restrict preorder_stack\n        = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*preorder_stack));\n    tsk_id_t u, v;\n    /* The largest possible number of transitions is one over every sample */\n    tsk_state_transition_t *transitions = tsk_malloc(num_samples * sizeof(*transitions));\n    int32_t allele, ancestral_state;\n    int stack_top;\n    struct stack_elem s;\n    tsk_size_t j, num_transitions, max_allele_count, num_nodes;\n    tsk_size_t allele_count[HARTIGAN_MAX_ALLELES];\n    tsk_size_t non_missing = 0;\n    int32_t num_alleles = 0;\n\n    if (optimal_set == NULL || preorder_stack == NULL || transitions == NULL\n        || nodes == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (j = 0; j < num_samples; j++) {\n        if (genotypes[j] >= HARTIGAN_MAX_ALLELES || genotypes[j] < TSK_MISSING_DATA) {\n            ret = tsk_trace_error(TSK_ERR_BAD_GENOTYPE);\n            goto out;\n        }\n        u = self->tree_sequence->samples[j];\n        if (genotypes[j] == TSK_MISSING_DATA) {\n            /* All bits set */\n            optimal_set[u] = UINT64_MAX;\n        } else {\n            optimal_set[u] = set_bit(optimal_set[u], genotypes[j]);\n            num_alleles = TSK_MAX(genotypes[j], num_alleles);\n            non_missing++;\n        }\n    }\n\n    if (non_missing == 0) {\n        ret = tsk_trace_error(TSK_ERR_GENOTYPES_ALL_MISSING);\n        goto out;\n    }\n    num_alleles++;\n\n    ancestral_state = 0; /* keep compiler happy */\n    if (options & TSK_MM_FIXED_ANCESTRAL_STATE) {\n        ancestral_state = *r_ancestral_state;\n        if ((ancestral_state < 0) || (ancestral_state >= HARTIGAN_MAX_ALLELES)) {\n            ret = tsk_trace_error(TSK_ERR_BAD_ANCESTRAL_STATE);\n            goto out;\n        } else if (ancestral_state >= num_alleles) {\n            num_alleles = (int32_t) (ancestral_state + 1);\n        }\n    }\n\n    ret = tsk_tree_postorder_from(self, self->virtual_root, nodes, &num_nodes);\n    if (ret != 0) {\n        goto out;\n    }\n    for (j = 0; j < num_nodes; j++) {\n        u = nodes[j];\n        tsk_memset(allele_count, 0, ((size_t) num_alleles) * sizeof(*allele_count));\n        for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {\n            for (allele = 0; allele < num_alleles; allele++) {\n                allele_count[allele] += bit_is_set(optimal_set[v], allele);\n            }\n        }\n        /* the virtual root has no flags defined */\n        if (u == (tsk_id_t) N || !(node_flags[u] & TSK_NODE_IS_SAMPLE)) {\n            max_allele_count = 0;\n            for (allele = 0; allele < num_alleles; allele++) {\n                max_allele_count = TSK_MAX(max_allele_count, allele_count[allele]);\n            }\n            for (allele = 0; allele < num_alleles; allele++) {\n                if (allele_count[allele] == max_allele_count) {\n                    optimal_set[u] = set_bit(optimal_set[u], allele);\n                }\n            }\n        }\n    }\n    if (!(options & TSK_MM_FIXED_ANCESTRAL_STATE)) {\n        ancestral_state = get_smallest_set_bit(optimal_set[self->virtual_root]);\n    } else {\n        optimal_set[self->virtual_root] = UINT64_MAX;\n    }\n\n    num_transitions = 0;\n\n    /* Do a preorder traversal */\n    preorder_stack[0].node = self->virtual_root;\n    preorder_stack[0].state = ancestral_state;\n    preorder_stack[0].transition_parent = TSK_NULL;\n    stack_top = 0;\n    while (stack_top >= 0) {\n        s = preorder_stack[stack_top];\n        stack_top--;\n\n        if (!bit_is_set(optimal_set[s.node], s.state)) {\n            s.state = get_smallest_set_bit(optimal_set[s.node]);\n            transitions[num_transitions].node = s.node;\n            transitions[num_transitions].parent = s.transition_parent;\n            transitions[num_transitions].state = s.state;\n            s.transition_parent = (tsk_id_t) num_transitions;\n            num_transitions++;\n        }\n        for (v = left_child[s.node]; v != TSK_NULL; v = right_sib[v]) {\n            stack_top++;\n            s.node = v;\n            preorder_stack[stack_top] = s;\n        }\n    }\n\n    *r_transitions = transitions;\n    *r_num_transitions = num_transitions;\n    *r_ancestral_state = ancestral_state;\n    transitions = NULL;\nout:\n    tsk_safe_free(transitions);\n    /* Cannot safe_free because of 'restrict' */\n    if (optimal_set != NULL) {\n        free(optimal_set);\n    }\n    if (preorder_stack != NULL) {\n        free(preorder_stack);\n    }\n    if (nodes != NULL) {\n        free(nodes);\n    }\n    return ret;\n}\n\n/* ======================================================== *\n * KC Distance\n * ======================================================== */\n\ntypedef struct {\n    tsk_size_t *m;\n    double *M;\n    tsk_id_t n;\n    tsk_id_t N;\n} kc_vectors;\n\nstatic int\nkc_vectors_alloc(kc_vectors *self, tsk_id_t n)\n{\n    int ret = 0;\n\n    self->n = n;\n    self->N = (n * (n - 1)) / 2;\n    self->m = tsk_calloc((size_t) (self->N + self->n), sizeof(*self->m));\n    self->M = tsk_calloc((size_t) (self->N + self->n), sizeof(*self->M));\n    if (self->m == NULL || self->M == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\nout:\n    return ret;\n}\n\nstatic void\nkc_vectors_free(kc_vectors *self)\n{\n    tsk_safe_free(self->m);\n    tsk_safe_free(self->M);\n}\n\nstatic inline void\nupdate_kc_vectors_single_sample(\n    const tsk_treeseq_t *ts, kc_vectors *kc_vecs, tsk_id_t u, double time)\n{\n    const tsk_id_t *sample_index_map = ts->sample_index_map;\n    tsk_id_t u_index = sample_index_map[u];\n\n    kc_vecs->m[kc_vecs->N + u_index] = 1;\n    kc_vecs->M[kc_vecs->N + u_index] = time;\n}\n\nstatic inline void\nupdate_kc_vectors_all_pairs(const tsk_tree_t *tree, kc_vectors *kc_vecs, tsk_id_t u,\n    tsk_id_t v, tsk_size_t depth, double time)\n{\n    tsk_id_t sample1_index, sample2_index, n1, n2, tmp, pair_index;\n    const tsk_id_t *restrict left_sample = tree->left_sample;\n    const tsk_id_t *restrict right_sample = tree->right_sample;\n    const tsk_id_t *restrict next_sample = tree->next_sample;\n    tsk_size_t *restrict kc_m = kc_vecs->m;\n    double *restrict kc_M = kc_vecs->M;\n\n    sample1_index = left_sample[u];\n    while (sample1_index != TSK_NULL) {\n        sample2_index = left_sample[v];\n        while (sample2_index != TSK_NULL) {\n            n1 = sample1_index;\n            n2 = sample2_index;\n            if (n1 > n2) {\n                tmp = n1;\n                n1 = n2;\n                n2 = tmp;\n            }\n\n            /* We spend ~40% of our time here because these accesses\n             * are not in order and gets very poor cache behavior */\n            pair_index = n2 - n1 - 1 + (-1 * n1 * (n1 - 2 * kc_vecs->n + 1)) / 2;\n            kc_m[pair_index] = depth;\n            kc_M[pair_index] = time;\n\n            if (sample2_index == right_sample[v]) {\n                break;\n            }\n            sample2_index = next_sample[sample2_index];\n        }\n        if (sample1_index == right_sample[u]) {\n            break;\n        }\n        sample1_index = next_sample[sample1_index];\n    }\n}\n\nstruct kc_stack_elmt {\n    tsk_id_t node;\n    tsk_size_t depth;\n};\n\nstatic int\nfill_kc_vectors(const tsk_tree_t *t, kc_vectors *kc_vecs)\n{\n    int stack_top;\n    tsk_size_t depth;\n    double time;\n    const double *times;\n    struct kc_stack_elmt *stack;\n    tsk_id_t root, u, c1, c2;\n    int ret = 0;\n    const tsk_treeseq_t *ts = t->tree_sequence;\n\n    stack = tsk_malloc(tsk_tree_get_size_bound(t) * sizeof(*stack));\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    times = t->tree_sequence->tables->nodes.time;\n\n    for (root = tsk_tree_get_left_root(t); root != TSK_NULL; root = t->right_sib[root]) {\n        stack_top = 0;\n        stack[stack_top].node = root;\n        stack[stack_top].depth = 0;\n        while (stack_top >= 0) {\n            u = stack[stack_top].node;\n            depth = stack[stack_top].depth;\n            stack_top--;\n\n            if (tsk_tree_is_sample(t, u)) {\n                time = tsk_tree_get_branch_length_unsafe(t, u);\n                update_kc_vectors_single_sample(ts, kc_vecs, u, time);\n            }\n\n            /* Don't bother going deeper if there are no samples under this node */\n            if (t->left_sample[u] != TSK_NULL) {\n                for (c1 = t->left_child[u]; c1 != TSK_NULL; c1 = t->right_sib[c1]) {\n                    stack_top++;\n                    stack[stack_top].node = c1;\n                    stack[stack_top].depth = depth + 1;\n\n                    for (c2 = t->right_sib[c1]; c2 != TSK_NULL; c2 = t->right_sib[c2]) {\n                        time = times[root] - times[u];\n                        update_kc_vectors_all_pairs(t, kc_vecs, c1, c2, depth, time);\n                    }\n                }\n            }\n        }\n    }\n\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\nstatic double\nnorm_kc_vectors(kc_vectors *self, kc_vectors *other, double lambda)\n{\n    double vT1, vT2, distance_sum;\n    tsk_id_t i;\n\n    distance_sum = 0;\n    for (i = 0; i < self->n + self->N; i++) {\n        vT1 = ((double) self->m[i] * (1 - lambda)) + (lambda * self->M[i]);\n        vT2 = ((double) other->m[i] * (1 - lambda)) + (lambda * other->M[i]);\n        distance_sum += (vT1 - vT2) * (vT1 - vT2);\n    }\n\n    return sqrt(distance_sum);\n}\n\nstatic int\ncheck_kc_distance_tree_inputs(const tsk_tree_t *self)\n{\n    tsk_id_t u, num_nodes, left_child;\n    int ret = 0;\n\n    if (tsk_tree_get_num_roots(self) != 1) {\n        ret = tsk_trace_error(TSK_ERR_MULTIPLE_ROOTS);\n        goto out;\n    }\n    if (!tsk_tree_has_sample_lists(self)) {\n        ret = tsk_trace_error(TSK_ERR_NO_SAMPLE_LISTS);\n        goto out;\n    }\n\n    num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(self->tree_sequence);\n    for (u = 0; u < num_nodes; u++) {\n        left_child = self->left_child[u];\n        if (left_child != TSK_NULL && left_child == self->right_child[u]) {\n            ret = tsk_trace_error(TSK_ERR_UNARY_NODES);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nstatic int\ncheck_kc_distance_samples_inputs(const tsk_treeseq_t *self, const tsk_treeseq_t *other)\n{\n    const tsk_id_t *samples, *other_samples;\n    tsk_id_t i, n;\n    int ret = 0;\n\n    if (self->num_samples != other->num_samples) {\n        ret = tsk_trace_error(TSK_ERR_SAMPLE_SIZE_MISMATCH);\n        goto out;\n    }\n\n    samples = self->samples;\n    other_samples = other->samples;\n    n = (tsk_id_t) self->num_samples;\n    for (i = 0; i < n; i++) {\n        if (samples[i] != other_samples[i]) {\n            ret = tsk_trace_error(TSK_ERR_SAMPLES_NOT_EQUAL);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_tree_kc_distance(\n    const tsk_tree_t *self, const tsk_tree_t *other, double lambda, double *result)\n{\n    tsk_id_t n, i;\n    kc_vectors vecs[2];\n    const tsk_tree_t *trees[2] = { self, other };\n    int ret = 0;\n\n    for (i = 0; i < 2; i++) {\n        tsk_memset(&vecs[i], 0, sizeof(kc_vectors));\n    }\n\n    ret = check_kc_distance_samples_inputs(self->tree_sequence, other->tree_sequence);\n    if (ret != 0) {\n        goto out;\n    }\n    for (i = 0; i < 2; i++) {\n        ret = check_kc_distance_tree_inputs(trees[i]);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    n = (tsk_id_t) self->tree_sequence->num_samples;\n    for (i = 0; i < 2; i++) {\n        ret = kc_vectors_alloc(&vecs[i], n);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = fill_kc_vectors(trees[i], &vecs[i]);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    *result = norm_kc_vectors(&vecs[0], &vecs[1], lambda);\nout:\n    for (i = 0; i < 2; i++) {\n        kc_vectors_free(&vecs[i]);\n    }\n    return ret;\n}\n\nstatic int\ncheck_kc_distance_tree_sequence_inputs(\n    const tsk_treeseq_t *self, const tsk_treeseq_t *other)\n{\n    int ret = 0;\n\n    if (self->tables->sequence_length != other->tables->sequence_length) {\n        ret = tsk_trace_error(TSK_ERR_SEQUENCE_LENGTH_MISMATCH);\n        goto out;\n    }\n\n    ret = check_kc_distance_samples_inputs(self, other);\n    if (ret != 0) {\n        goto out;\n    }\n\nout:\n    return ret;\n}\n\nstatic void\nupdate_kc_pair_with_sample(const tsk_tree_t *self, kc_vectors *kc, tsk_id_t sample,\n    tsk_size_t *depths, double root_time)\n{\n    tsk_id_t c, p, sib;\n    double time;\n    tsk_size_t depth;\n    double *times = self->tree_sequence->tables->nodes.time;\n\n    c = sample;\n    for (p = self->parent[sample]; p != TSK_NULL; p = self->parent[p]) {\n        time = root_time - times[p];\n        depth = depths[p];\n        for (sib = self->left_child[p]; sib != TSK_NULL; sib = self->right_sib[sib]) {\n            if (sib != c) {\n                update_kc_vectors_all_pairs(self, kc, sample, sib, depth, time);\n            }\n        }\n        c = p;\n    }\n}\n\nstatic int\nupdate_kc_subtree_state(\n    tsk_tree_t *t, kc_vectors *kc, tsk_id_t u, tsk_size_t *depths, double root_time)\n{\n    int stack_top;\n    tsk_id_t v, c;\n    tsk_id_t *stack = NULL;\n    int ret = 0;\n\n    stack = tsk_malloc(tsk_tree_get_size_bound(t) * sizeof(*stack));\n    if (stack == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    stack_top = 0;\n    stack[stack_top] = u;\n    while (stack_top >= 0) {\n        v = stack[stack_top];\n        stack_top--;\n\n        if (tsk_tree_is_sample(t, v)) {\n            update_kc_pair_with_sample(t, kc, v, depths, root_time);\n        }\n        for (c = t->left_child[v]; c != TSK_NULL; c = t->right_sib[c]) {\n            if (depths[c] != 0) {\n                depths[c] = depths[v] + 1;\n                stack_top++;\n                stack[stack_top] = c;\n            }\n        }\n    }\n\nout:\n    tsk_safe_free(stack);\n    return ret;\n}\n\nstatic int\nupdate_kc_incremental(tsk_tree_t *tree, kc_vectors *kc, tsk_size_t *depths)\n{\n    int ret = 0;\n    tsk_id_t u, v, e, j;\n    double root_time, time;\n    const double *restrict times = tree->tree_sequence->tables->nodes.time;\n    const tsk_id_t *restrict edges_child = tree->tree_sequence->tables->edges.child;\n    const tsk_id_t *restrict edges_parent = tree->tree_sequence->tables->edges.parent;\n    tsk_tree_position_t tree_pos = tree->tree_pos;\n\n    /* Update state of detached subtrees */\n    for (j = tree_pos.out.stop - 1; j >= tree_pos.out.start; j--) {\n        e = tree_pos.out.order[j];\n        u = edges_child[e];\n        depths[u] = 0;\n\n        if (tree->parent[u] == TSK_NULL) {\n            root_time = times[tsk_tree_node_root(tree, u)];\n            ret = update_kc_subtree_state(tree, kc, u, depths, root_time);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n    }\n\n    /* Propagate state change down into reattached subtrees. */\n    for (j = tree_pos.in.stop - 1; j >= tree_pos.in.start; j--) {\n        e = tree_pos.in.order[j];\n        u = edges_child[e];\n        v = edges_parent[e];\n\n        tsk_bug_assert(depths[u] == 0);\n        depths[u] = depths[v] + 1;\n\n        root_time = times[tsk_tree_node_root(tree, u)];\n        ret = update_kc_subtree_state(tree, kc, u, depths, root_time);\n        if (ret != 0) {\n            goto out;\n        }\n\n        if (tsk_tree_is_sample(tree, u)) {\n            time = tsk_tree_get_branch_length_unsafe(tree, u);\n            update_kc_vectors_single_sample(tree->tree_sequence, kc, u, time);\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,\n    double lambda_, double *result)\n{\n    int i;\n    tsk_id_t n;\n    tsk_size_t num_nodes;\n    double left, span, total;\n    const tsk_treeseq_t *treeseqs[2] = { self, other };\n    tsk_tree_t trees[2];\n    kc_vectors kcs[2];\n    tsk_size_t *depths[2];\n    int ret = 0;\n\n    for (i = 0; i < 2; i++) {\n        tsk_memset(&trees[i], 0, sizeof(trees[i]));\n        tsk_memset(&kcs[i], 0, sizeof(kcs[i]));\n        depths[i] = NULL;\n    }\n\n    ret = check_kc_distance_tree_sequence_inputs(self, other);\n    if (ret != 0) {\n        goto out;\n    }\n\n    n = (tsk_id_t) self->num_samples;\n    for (i = 0; i < 2; i++) {\n        ret = tsk_tree_init(&trees[i], treeseqs[i], TSK_SAMPLE_LISTS);\n        if (ret != 0) {\n            goto out;\n        }\n        ret = kc_vectors_alloc(&kcs[i], n);\n        if (ret != 0) {\n            goto out;\n        }\n        num_nodes = tsk_treeseq_get_num_nodes(treeseqs[i]);\n        depths[i] = tsk_calloc(num_nodes, sizeof(*depths[i]));\n        if (depths[i] == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n    }\n\n    total = 0;\n    left = 0;\n\n    ret = tsk_tree_first(&trees[0]);\n    if (ret != TSK_TREE_OK) {\n        goto out;\n    }\n    ret = check_kc_distance_tree_inputs(&trees[0]);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = update_kc_incremental(&trees[0], &kcs[0], depths[0]);\n    if (ret != 0) {\n        goto out;\n    }\n    while ((ret = tsk_tree_next(&trees[1])) == TSK_TREE_OK) {\n        ret = check_kc_distance_tree_inputs(&trees[1]);\n        if (ret != 0) {\n            goto out;\n        }\n\n        ret = update_kc_incremental(&trees[1], &kcs[1], depths[1]);\n        if (ret != 0) {\n            goto out;\n        }\n        while (trees[0].interval.right < trees[1].interval.right) {\n            span = trees[0].interval.right - left;\n            total += norm_kc_vectors(&kcs[0], &kcs[1], lambda_) * span;\n\n            left = trees[0].interval.right;\n            ret = tsk_tree_next(&trees[0]);\n            tsk_bug_assert(ret == TSK_TREE_OK);\n            ret = check_kc_distance_tree_inputs(&trees[0]);\n            if (ret != 0) {\n                goto out;\n            }\n            ret = update_kc_incremental(&trees[0], &kcs[0], depths[0]);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n        span = trees[1].interval.right - left;\n        left = trees[1].interval.right;\n        total += norm_kc_vectors(&kcs[0], &kcs[1], lambda_) * span;\n    }\n    if (ret != 0) {\n        goto out;\n    }\n\n    *result = total / self->tables->sequence_length;\nout:\n    for (i = 0; i < 2; i++) {\n        tsk_tree_free(&trees[i]);\n        kc_vectors_free(&kcs[i]);\n        tsk_safe_free(depths[i]);\n    }\n    return ret;\n}\n\n/*\n * Divergence matrix\n */\n\ntypedef struct {\n    /* Note it's a waste storing the triply linked tree here, but the code\n     * is written on the assumption of 1-based trees and the algorithm is\n     * frighteningly subtle, so it doesn't seem worth messing with it\n     * unless we really need to save some memory */\n    tsk_id_t *parent;\n    tsk_id_t *child;\n    tsk_id_t *sib;\n    tsk_id_t *lambda;\n    tsk_id_t *pi;\n    tsk_id_t *tau;\n    tsk_id_t *beta;\n    tsk_id_t *alpha;\n} sv_tables_t;\n\nstatic int\nsv_tables_init(sv_tables_t *self, tsk_size_t n)\n{\n    int ret = 0;\n\n    self->parent = tsk_malloc(n * sizeof(*self->parent));\n    self->child = tsk_malloc(n * sizeof(*self->child));\n    self->sib = tsk_malloc(n * sizeof(*self->sib));\n    self->pi = tsk_malloc(n * sizeof(*self->pi));\n    self->lambda = tsk_malloc(n * sizeof(*self->lambda));\n    self->tau = tsk_malloc(n * sizeof(*self->tau));\n    self->beta = tsk_malloc(n * sizeof(*self->beta));\n    self->alpha = tsk_malloc(n * sizeof(*self->alpha));\n    if (self->parent == NULL || self->child == NULL || self->sib == NULL\n        || self->lambda == NULL || self->tau == NULL || self->beta == NULL\n        || self->alpha == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nsv_tables_free(sv_tables_t *self)\n{\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->child);\n    tsk_safe_free(self->sib);\n    tsk_safe_free(self->lambda);\n    tsk_safe_free(self->pi);\n    tsk_safe_free(self->tau);\n    tsk_safe_free(self->beta);\n    tsk_safe_free(self->alpha);\n    return 0;\n}\nstatic void\nsv_tables_reset(sv_tables_t *self, tsk_tree_t *tree)\n{\n    const tsk_size_t n = 1 + tree->num_nodes;\n    tsk_memset(self->parent, 0, n * sizeof(*self->parent));\n    tsk_memset(self->child, 0, n * sizeof(*self->child));\n    tsk_memset(self->sib, 0, n * sizeof(*self->sib));\n    tsk_memset(self->pi, 0, n * sizeof(*self->pi));\n    tsk_memset(self->lambda, 0, n * sizeof(*self->lambda));\n    tsk_memset(self->tau, 0, n * sizeof(*self->tau));\n    tsk_memset(self->beta, 0, n * sizeof(*self->beta));\n    tsk_memset(self->alpha, 0, n * sizeof(*self->alpha));\n}\n\nstatic void\nsv_tables_convert_tree(sv_tables_t *self, tsk_tree_t *tree)\n{\n    const tsk_size_t n = 1 + tree->num_nodes;\n    const tsk_id_t *restrict tsk_parent = tree->parent;\n    tsk_id_t *restrict child = self->child;\n    tsk_id_t *restrict parent = self->parent;\n    tsk_id_t *restrict sib = self->sib;\n    tsk_size_t j;\n    tsk_id_t u, v;\n\n    for (j = 0; j < n - 1; j++) {\n        u = (tsk_id_t) j + 1;\n        v = tsk_parent[j] + 1;\n        sib[u] = child[v];\n        child[v] = u;\n        parent[u] = v;\n    }\n}\n\n#define LAMBDA 0\n\nstatic void\nsv_tables_build_index(sv_tables_t *self)\n{\n    const tsk_id_t *restrict child = self->child;\n    const tsk_id_t *restrict parent = self->parent;\n    const tsk_id_t *restrict sib = self->sib;\n    tsk_id_t *restrict lambda = self->lambda;\n    tsk_id_t *restrict pi = self->pi;\n    tsk_id_t *restrict tau = self->tau;\n    tsk_id_t *restrict beta = self->beta;\n    tsk_id_t *restrict alpha = self->alpha;\n    tsk_id_t a, n, p, h;\n\n    p = child[LAMBDA];\n    n = 0;\n    lambda[0] = -1;\n    while (p != LAMBDA) {\n        while (true) {\n            n++;\n            pi[p] = n;\n            tau[n] = LAMBDA;\n            lambda[n] = 1 + lambda[n >> 1];\n            if (child[p] != LAMBDA) {\n                p = child[p];\n            } else {\n                break;\n            }\n        }\n        beta[p] = n;\n        while (true) {\n            tau[beta[p]] = parent[p];\n            if (sib[p] != LAMBDA) {\n                p = sib[p];\n                break;\n            } else {\n                p = parent[p];\n                if (p != LAMBDA) {\n                    h = lambda[n & -pi[p]];\n                    beta[p] = ((n >> h) | 1) << h;\n                } else {\n                    break;\n                }\n            }\n        }\n    }\n\n    /* Begin the second traversal */\n    lambda[0] = lambda[n];\n    pi[LAMBDA] = 0;\n    beta[LAMBDA] = 0;\n    alpha[LAMBDA] = 0;\n    p = child[LAMBDA];\n    while (p != LAMBDA) {\n        while (true) {\n            a = alpha[parent[p]] | (beta[p] & -beta[p]);\n            alpha[p] = a;\n            if (child[p] != LAMBDA) {\n                p = child[p];\n            } else {\n                break;\n            }\n        }\n        while (true) {\n            if (sib[p] != LAMBDA) {\n                p = sib[p];\n                break;\n            } else {\n                p = parent[p];\n                if (p == LAMBDA) {\n                    break;\n                }\n            }\n        }\n    }\n}\n\nstatic void\nsv_tables_build(sv_tables_t *self, tsk_tree_t *tree)\n{\n    sv_tables_reset(self, tree);\n    sv_tables_convert_tree(self, tree);\n    sv_tables_build_index(self);\n}\n\nstatic tsk_id_t\nsv_tables_mrca_one_based(const sv_tables_t *self, tsk_id_t x, tsk_id_t y)\n{\n    const tsk_id_t *restrict lambda = self->lambda;\n    const tsk_id_t *restrict pi = self->pi;\n    const tsk_id_t *restrict tau = self->tau;\n    const tsk_id_t *restrict beta = self->beta;\n    const tsk_id_t *restrict alpha = self->alpha;\n    tsk_id_t h, k, xhat, yhat, ell, j, z;\n\n    if (beta[x] <= beta[y]) {\n        h = lambda[beta[y] & -beta[x]];\n    } else {\n        h = lambda[beta[x] & -beta[y]];\n    }\n    k = alpha[x] & alpha[y] & -(1 << h);\n    h = lambda[k & -k];\n    j = ((beta[x] >> h) | 1) << h;\n    if (j == beta[x]) {\n        xhat = x;\n    } else {\n        ell = lambda[alpha[x] & ((1 << h) - 1)];\n        xhat = tau[((beta[x] >> ell) | 1) << ell];\n    }\n    if (j == beta[y]) {\n        yhat = y;\n    } else {\n        ell = lambda[alpha[y] & ((1 << h) - 1)];\n        yhat = tau[((beta[y] >> ell) | 1) << ell];\n    }\n    if (pi[xhat] <= pi[yhat]) {\n        z = xhat;\n    } else {\n        z = yhat;\n    }\n    return z;\n}\n\nstatic tsk_id_t\nsv_tables_mrca(const sv_tables_t *self, tsk_id_t x, tsk_id_t y)\n{\n    /* Convert to 1-based indexes and back */\n    return sv_tables_mrca_one_based(self, x + 1, y + 1) - 1;\n}\n\nstatic int\ntsk_treeseq_divergence_matrix_branch(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *restrict sample_set_sizes,\n    const tsk_id_t *restrict sample_sets, tsk_size_t num_windows,\n    const double *restrict windows, tsk_flags_t options, double *restrict result)\n{\n    int ret = 0;\n    tsk_tree_t tree;\n    const double *restrict nodes_time = self->tables->nodes.time;\n    const tsk_size_t N = num_sample_sets;\n    tsk_size_t i, j, k, offset, sj, sk;\n    tsk_id_t u, v, w, u_root, v_root;\n    double tu, tv, d, span, left, right, span_left, span_right;\n    double *restrict D;\n    sv_tables_t sv;\n    tsk_size_t *ss_offsets = tsk_malloc((num_sample_sets + 1) * sizeof(*ss_offsets));\n\n    memset(&sv, 0, sizeof(sv));\n    ret = tsk_tree_init(&tree, self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = sv_tables_init(&sv, self->tables->nodes.num_rows + 1);\n    if (ret != 0) {\n        goto out;\n    }\n    if (ss_offsets == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    if (self->time_uncalibrated && !(options & TSK_STAT_ALLOW_TIME_UNCALIBRATED)) {\n        ret = tsk_trace_error(TSK_ERR_TIME_UNCALIBRATED);\n        goto out;\n    }\n\n    ss_offsets[0] = 0;\n    offset = 0;\n    for (j = 0; j < N; j++) {\n        offset += sample_set_sizes[j];\n        ss_offsets[j + 1] = offset;\n    }\n\n    for (i = 0; i < num_windows; i++) {\n        left = windows[i];\n        right = windows[i + 1];\n        D = result + i * N * N;\n        ret = tsk_tree_seek(&tree, left, 0);\n        if (ret != 0) {\n            goto out;\n        }\n        while (tree.interval.left < right && tree.index != -1) {\n            span_left = TSK_MAX(tree.interval.left, left);\n            span_right = TSK_MIN(tree.interval.right, right);\n            span = span_right - span_left;\n            sv_tables_build(&sv, &tree);\n            for (sj = 0; sj < N; sj++) {\n                for (j = ss_offsets[sj]; j < ss_offsets[sj + 1]; j++) {\n                    u = sample_sets[j];\n                    for (sk = sj; sk < N; sk++) {\n                        for (k = ss_offsets[sk]; k < ss_offsets[sk + 1]; k++) {\n                            v = sample_sets[k];\n                            if (u == v) {\n                                /* This case contributes zero to divergence, so\n                                 * short-circuit to save time.\n                                 * TODO is there a better way to do this? */\n                                continue;\n                            }\n                            w = sv_tables_mrca(&sv, u, v);\n                            if (w != TSK_NULL) {\n                                u_root = w;\n                                v_root = w;\n                            } else {\n                                /* Slow path - only happens for nodes in disconnected\n                                 * subtrees in a tree with multiple roots */\n                                u_root = tsk_tree_get_node_root(&tree, u);\n                                v_root = tsk_tree_get_node_root(&tree, v);\n                            }\n                            tu = nodes_time[u_root] - nodes_time[u];\n                            tv = nodes_time[v_root] - nodes_time[v];\n                            d = (tu + tv) * span;\n                            D[sj * N + sk] += d;\n                        }\n                    }\n                }\n            }\n            ret = tsk_tree_next(&tree);\n            if (ret < 0) {\n                goto out;\n            }\n        }\n    }\n    ret = 0;\nout:\n    tsk_tree_free(&tree);\n    sv_tables_free(&sv);\n    tsk_safe_free(ss_offsets);\n    return ret;\n}\n\n// FIXME see #2817\n// Just including this here for now as it's the simplest option. Everything\n// will probably move to stats.[c,h] in the near future though, and it\n// can pull in ``genotypes.h`` without issues.\n#include <tskit/genotypes.h>\n\nstatic void\nupdate_site_divergence(const tsk_variant_t *var, const tsk_id_t *restrict A,\n    const tsk_size_t *restrict offsets, const tsk_size_t num_sample_sets, double *D)\n\n{\n    const tsk_size_t num_alleles = var->num_alleles;\n    tsk_size_t a, b, j, k;\n    tsk_id_t u, v;\n    double increment;\n\n    for (a = 0; a < num_alleles; a++) {\n        for (b = a + 1; b < num_alleles; b++) {\n            for (j = offsets[a]; j < offsets[a + 1]; j++) {\n                for (k = offsets[b]; k < offsets[b + 1]; k++) {\n                    u = A[j];\n                    v = A[k];\n                    /* Only increment the upper triangle to (hopefully) improve memory\n                     * access patterns */\n                    if (u > v) {\n                        u = A[k];\n                        v = A[j];\n                    }\n                    increment = 1;\n                    if (u == v) {\n                        increment = 2;\n                    }\n                    D[u * (tsk_id_t) num_sample_sets + v] += increment;\n                }\n            }\n        }\n    }\n}\n\nstatic void\ngroup_alleles(const tsk_variant_t *var, tsk_id_t *restrict A, tsk_size_t *offsets)\n{\n    const tsk_size_t n = var->num_samples;\n    const int32_t *restrict genotypes = var->genotypes;\n    tsk_id_t a;\n    tsk_size_t j, k;\n\n    k = 0;\n    offsets[0] = 0;\n    for (a = 0; a < (tsk_id_t) var->num_alleles; a++) {\n        offsets[a + 1] = offsets[a];\n        for (j = 0; j < n; j++) {\n            if (genotypes[j] == a) {\n                offsets[a + 1]++;\n                A[k] = (tsk_id_t) j;\n                k++;\n            }\n        }\n    }\n}\n\nstatic void\nremap_to_sample_sets(const tsk_size_t num_samples, const tsk_id_t *restrict samples,\n    const tsk_id_t *restrict sample_set_index_map, tsk_id_t *restrict A)\n{\n    tsk_size_t j;\n    tsk_id_t u;\n    for (j = 0; j < num_samples; j++) {\n        u = samples[A[j]];\n        tsk_bug_assert(u >= 0);\n        tsk_bug_assert(sample_set_index_map[u] >= 0);\n        A[j] = sample_set_index_map[u];\n    }\n}\n\nstatic int\ntsk_treeseq_divergence_matrix_site(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_id_t *restrict sample_set_index_map, const tsk_size_t num_samples,\n    const tsk_id_t *restrict samples, tsk_size_t num_windows,\n    const double *restrict windows, tsk_flags_t TSK_UNUSED(options),\n    double *restrict result)\n{\n    int ret = 0;\n    tsk_size_t i;\n    tsk_id_t site_id;\n    double left, right;\n    double *restrict D;\n    const tsk_id_t num_sites = (tsk_id_t) self->tables->sites.num_rows;\n    const double *restrict sites_position = self->tables->sites.position;\n    tsk_id_t *A = tsk_malloc(num_samples * sizeof(*A));\n    /* Allocate the allele offsets at the first variant */\n    tsk_size_t max_alleles = 0;\n    tsk_size_t *allele_offsets = NULL;\n    tsk_variant_t variant;\n\n    /* FIXME it's not clear that using TSK_ISOLATED_NOT_MISSING is\n     * correct here */\n    ret = tsk_variant_init(\n        &variant, self, samples, num_samples, NULL, TSK_ISOLATED_NOT_MISSING);\n    if (ret != 0) {\n        goto out;\n    }\n    if (A == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    site_id = 0;\n    while (site_id < num_sites && sites_position[site_id] < windows[0]) {\n        site_id++;\n    }\n\n    for (i = 0; i < num_windows; i++) {\n        left = windows[i];\n        right = windows[i + 1];\n        D = result + i * num_sample_sets * num_sample_sets;\n\n        if (site_id < num_sites) {\n            tsk_bug_assert(sites_position[site_id] >= left);\n        }\n        while (site_id < num_sites && sites_position[site_id] < right) {\n            ret = tsk_variant_decode(&variant, site_id, 0);\n            if (ret != 0) {\n                goto out;\n            }\n            if (variant.num_alleles > max_alleles) {\n                /* could do some kind of doubling here, but there's no\n                 * point - just keep it simple for testing. */\n                max_alleles = variant.num_alleles;\n                tsk_safe_free(allele_offsets);\n                allele_offsets = tsk_malloc((max_alleles + 1) * sizeof(*allele_offsets));\n                if (allele_offsets == NULL) {\n                    ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                    goto out;\n                }\n            }\n            group_alleles(&variant, A, allele_offsets);\n            remap_to_sample_sets(num_samples, samples, sample_set_index_map, A);\n            update_site_divergence(&variant, A, allele_offsets, num_sample_sets, D);\n            site_id++;\n        }\n    }\n    ret = 0;\nout:\n    tsk_variant_free(&variant);\n    tsk_safe_free(A);\n    tsk_safe_free(allele_offsets);\n    return ret;\n}\n\n/* Return the mapping from node IDs to the index of the sample set\n * they belong to, or -1 of none. Error if a node is in more than one\n * set.\n */\nstatic int\nget_sample_set_index_map(const tsk_treeseq_t *self, const tsk_size_t num_sample_sets,\n    const tsk_size_t *restrict sample_set_sizes, const tsk_id_t *restrict sample_sets,\n    tsk_size_t *ret_total_samples, tsk_id_t *restrict node_index_map)\n{\n    int ret = 0;\n    tsk_size_t i, j, k;\n    tsk_id_t u;\n    tsk_size_t total_samples = 0;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const tsk_flags_t *restrict node_flags = self->tables->nodes.flags;\n\n    for (j = 0; j < num_nodes; j++) {\n        node_index_map[j] = TSK_NULL;\n    }\n    i = 0;\n    for (j = 0; j < num_sample_sets; j++) {\n        total_samples += sample_set_sizes[j];\n        for (k = 0; k < sample_set_sizes[j]; k++) {\n            u = sample_sets[i];\n            i++;\n            if (u < 0 || u >= (tsk_id_t) num_nodes) {\n                ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n                goto out;\n            }\n            /* Note: we require nodes to be samples because we have to think\n             * about how to normalise by the length of genome that the node\n             * is 'in' the tree for each window otherwise. */\n            if (!(node_flags[u] & TSK_NODE_IS_SAMPLE)) {\n                ret = tsk_trace_error(TSK_ERR_BAD_SAMPLES);\n                goto out;\n            }\n            if (node_index_map[u] != TSK_NULL) {\n                ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);\n                goto out;\n            }\n            node_index_map[u] = (tsk_id_t) j;\n        }\n    }\n    *ret_total_samples = total_samples;\nout:\n    return ret;\n}\n\nstatic void\nfill_lower_triangle_count_normalise(const tsk_size_t num_windows, const tsk_size_t n,\n    const tsk_size_t *set_sizes, double *restrict result)\n{\n    tsk_size_t i, j, k;\n    double denom;\n    double *restrict D;\n\n    /* TODO there's probably a better striding pattern that could be used here */\n    for (i = 0; i < num_windows; i++) {\n        D = result + i * n * n;\n        for (j = 0; j < n; j++) {\n            denom = (double) set_sizes[j] * (double) (set_sizes[j] - 1);\n            if (denom != 0) {\n                D[j * n + j] /= denom;\n            }\n            for (k = j + 1; k < n; k++) {\n                denom = (double) set_sizes[j] * (double) set_sizes[k];\n                D[j * n + k] /= denom;\n                D[k * n + j] = D[j * n + k];\n            }\n        }\n    }\n}\n\nint\ntsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_sets_in,\n    const tsk_size_t *sample_set_sizes_in, const tsk_id_t *sample_sets_in,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    tsk_size_t N, total_samples;\n    const tsk_size_t *sample_set_sizes;\n    const tsk_id_t *sample_sets;\n    tsk_size_t *tmp_sample_set_sizes = NULL;\n    const double default_windows[] = { 0, self->tables->sequence_length };\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    bool stat_site = !!(options & TSK_STAT_SITE);\n    bool stat_branch = !!(options & TSK_STAT_BRANCH);\n    bool stat_node = !!(options & TSK_STAT_NODE);\n    tsk_id_t *sample_set_index_map\n        = tsk_malloc(num_nodes * sizeof(*sample_set_index_map));\n    tsk_size_t j;\n\n    if (stat_node) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);\n        goto out;\n    }\n    /* If no mode is specified, we default to site mode */\n    if (!(stat_site || stat_branch)) {\n        stat_site = true;\n    }\n    /* It's an error to specify more than one mode */\n    if (stat_site + stat_branch > 1) {\n        ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);\n        goto out;\n    }\n\n    if (options & TSK_STAT_POLARISED) {\n        ret = tsk_trace_error(TSK_ERR_STAT_POLARISED_UNSUPPORTED);\n        goto out;\n    }\n\n    if (windows == NULL) {\n        num_windows = 1;\n        windows = default_windows;\n    } else {\n        ret = tsk_treeseq_check_windows(self, num_windows, windows, 0);\n        if (ret != 0) {\n            goto out;\n        }\n    }\n\n    /* If sample_sets is NULL, use self->samples and ignore input\n     * num_sample_sets */\n    sample_sets = sample_sets_in;\n    N = num_sample_sets_in;\n    if (sample_sets_in == NULL) {\n        sample_sets = self->samples;\n        if (sample_set_sizes_in == NULL) {\n            N = self->num_samples;\n        }\n    }\n    sample_set_sizes = sample_set_sizes_in;\n    /* If sample_set_sizes is NULL, assume its N 1S */\n    if (sample_set_sizes_in == NULL) {\n        tmp_sample_set_sizes = tsk_malloc(N * sizeof(*tmp_sample_set_sizes));\n        if (tmp_sample_set_sizes == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        for (j = 0; j < N; j++) {\n            tmp_sample_set_sizes[j] = 1;\n        }\n        sample_set_sizes = tmp_sample_set_sizes;\n    }\n\n    ret = get_sample_set_index_map(\n        self, N, sample_set_sizes, sample_sets, &total_samples, sample_set_index_map);\n    if (ret != 0) {\n        goto out;\n    }\n\n    tsk_memset(result, 0, num_windows * N * N * sizeof(*result));\n\n    if (stat_branch) {\n        ret = tsk_treeseq_divergence_matrix_branch(self, N, sample_set_sizes,\n            sample_sets, num_windows, windows, options, result);\n    } else {\n        tsk_bug_assert(stat_site);\n        ret = tsk_treeseq_divergence_matrix_site(self, N, sample_set_index_map,\n            total_samples, sample_sets, num_windows, windows, options, result);\n    }\n    if (ret != 0) {\n        goto out;\n    }\n    fill_lower_triangle_count_normalise(num_windows, N, sample_set_sizes, result);\n\n    if (options & TSK_STAT_SPAN_NORMALISE) {\n        span_normalise(num_windows, windows, N * N, result);\n    }\nout:\n    tsk_safe_free(sample_set_index_map);\n    tsk_safe_free(tmp_sample_set_sizes);\n    return ret;\n}\n\n/* ======================================================== *\n * Extend haplotypes\n * ======================================================== */\n\ntypedef struct _edge_list_t {\n    tsk_id_t edge;\n    // the `extended` flags records whether we have decided to extend\n    // this entry to the current tree?\n    int extended;\n    struct _edge_list_t *next;\n} edge_list_t;\n\nstatic void\nedge_list_print(edge_list_t **head, tsk_edge_table_t *edges, FILE *out)\n{\n    int n = 0;\n    edge_list_t *px;\n    fprintf(out, \"Edge list:\\n\");\n    for (px = *head; px != NULL; px = px->next) {\n        fprintf(out, \"  %d: %d (%d); \", n, (int) px->edge, px->extended);\n        if (px->edge >= 0 && edges != NULL) {\n            fprintf(out, \"%d->%d on [%.1f, %.1f)\", (int) edges->child[px->edge],\n                (int) edges->parent[px->edge], edges->left[px->edge],\n                edges->right[px->edge]);\n        } else {\n            fprintf(out, \"(null)\");\n        }\n        fprintf(out, \"\\n\");\n        n += 1;\n    }\n    fprintf(out, \"length = %d\\n\", n);\n}\n\nstatic void\nedge_list_append_entry(\n    edge_list_t **head, edge_list_t **tail, edge_list_t *x, tsk_id_t edge, int extended)\n{\n    x->edge = edge;\n    x->extended = extended;\n    x->next = NULL;\n\n    if (*tail == NULL) {\n        *head = x;\n    } else {\n        (*tail)->next = x;\n    }\n    *tail = x;\n}\n\nstatic void\nremove_unextended(edge_list_t **head, edge_list_t **tail)\n{\n    edge_list_t *px, *x;\n\n    px = *head;\n    while (px != NULL && px->extended == 0) {\n        px = px->next;\n    }\n    *head = px;\n    if (px != NULL) {\n        px->extended = 0;\n        x = px->next;\n        while (x != NULL) {\n            if (x->extended > 0) {\n                x->extended = 0;\n                px->next = x;\n                px = x;\n            }\n            x = x->next;\n        }\n        px->next = NULL;\n    }\n    *tail = px;\n}\n\nstatic void\nedge_list_set_extended(edge_list_t **head, tsk_id_t edge_id)\n{\n    // finds the entry with edge 'edge_id'\n    // and sets its 'extended' flag to 1\n    edge_list_t *px;\n    px = *head;\n    tsk_bug_assert(px != NULL);\n    while (px->edge != edge_id) {\n        px = px->next;\n        tsk_bug_assert(px != NULL);\n    }\n    tsk_bug_assert(px->edge == edge_id);\n    px->extended = 1;\n}\n\nstatic int\ntsk_treeseq_slide_mutation_nodes_up(\n    const tsk_treeseq_t *self, tsk_mutation_table_t *mutations)\n{\n    int ret = 0;\n    double t;\n    tsk_id_t c, p, next_mut;\n    const tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const double *sites_position = self->tables->sites.position;\n    const double *nodes_time = self->tables->nodes.time;\n    tsk_tree_t tree;\n\n    ret = tsk_tree_init(&tree, self, TSK_NO_SAMPLE_COUNTS);\n    if (ret != 0) {\n        goto out;\n    }\n\n    next_mut = 0;\n    for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {\n        while (next_mut < (tsk_id_t) mutations->num_rows\n               && sites_position[mutations->site[next_mut]] < tree.interval.right) {\n            t = mutations->time[next_mut];\n            if (tsk_is_unknown_time(t)) {\n                ret = tsk_trace_error(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME);\n                goto out;\n            }\n            c = mutations->node[next_mut];\n            tsk_bug_assert(c < (tsk_id_t) num_nodes);\n            p = tree.parent[c];\n            while (p != TSK_NULL && nodes_time[p] <= t) {\n                c = p;\n                p = tree.parent[c];\n            }\n            tsk_bug_assert(nodes_time[c] <= t);\n            mutations->node[next_mut] = c;\n            next_mut++;\n        }\n    }\n    if (ret != 0) {\n        goto out;\n    }\n\nout:\n    tsk_tree_free(&tree);\n\n    return ret;\n}\n\ntypedef struct {\n    const tsk_treeseq_t *ts;\n    tsk_edge_table_t *edges;\n    int direction;\n    tsk_id_t *last_degree, *next_degree;\n    tsk_id_t *last_nodes_edge, *next_nodes_edge;\n    tsk_id_t *parent_out, *parent_in;\n    bool *not_sample;\n    double *near_side, *far_side;\n    edge_list_t *edges_out_head, *edges_out_tail;\n    edge_list_t *edges_in_head, *edges_in_tail;\n    tsk_blkalloc_t edge_list_heap;\n} haplotype_extender_t;\n\nstatic int\nhaplotype_extender_init(haplotype_extender_t *self, const tsk_treeseq_t *ts,\n    int direction, tsk_edge_table_t *edges)\n{\n    int ret = 0;\n    tsk_id_t tj;\n    tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts);\n\n    tsk_memset(self, 0, sizeof(haplotype_extender_t));\n\n    self->ts = ts;\n    self->edges = edges;\n    ret = tsk_edge_table_copy(&ts->tables->edges, self->edges, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n\n    self->direction = direction;\n    if (direction == TSK_DIR_FORWARD) {\n        self->near_side = self->edges->left;\n        self->far_side = self->edges->right;\n    } else {\n        self->near_side = self->edges->right;\n        self->far_side = self->edges->left;\n    }\n\n    self->edges_in_head = NULL;\n    self->edges_in_tail = NULL;\n    self->edges_out_head = NULL;\n    self->edges_out_tail = NULL;\n\n    ret = tsk_blkalloc_init(&self->edge_list_heap, 8192);\n    if (ret != 0) {\n        goto out;\n    }\n\n    self->last_degree = tsk_calloc(num_nodes, sizeof(*self->last_degree));\n    self->next_degree = tsk_calloc(num_nodes, sizeof(*self->next_degree));\n    self->last_nodes_edge = tsk_malloc(num_nodes * sizeof(*self->last_nodes_edge));\n    self->next_nodes_edge = tsk_malloc(num_nodes * sizeof(*self->next_nodes_edge));\n    self->parent_out = tsk_malloc(num_nodes * sizeof(*self->parent_out));\n    self->parent_in = tsk_malloc(num_nodes * sizeof(*self->parent_in));\n    self->not_sample = tsk_malloc(num_nodes * sizeof(*self->not_sample));\n\n    if (self->last_degree == NULL || self->next_degree == NULL\n        || self->last_nodes_edge == NULL || self->next_nodes_edge == NULL\n        || self->parent_out == NULL || self->parent_in == NULL\n        || self->not_sample == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    tsk_memset(self->last_nodes_edge, 0xff, num_nodes * sizeof(*self->last_nodes_edge));\n    tsk_memset(self->next_nodes_edge, 0xff, num_nodes * sizeof(*self->next_nodes_edge));\n    tsk_memset(self->parent_out, 0xff, num_nodes * sizeof(*self->parent_out));\n    tsk_memset(self->parent_in, 0xff, num_nodes * sizeof(*self->parent_in));\n\n    for (tj = 0; tj < (tsk_id_t) num_nodes; tj++) {\n        self->not_sample[tj] = ((ts->tables->nodes.flags[tj] & TSK_NODE_IS_SAMPLE) == 0);\n    }\n\nout:\n    return ret;\n}\n\nstatic void\nhaplotype_extender_print_state(haplotype_extender_t *self, FILE *out)\n{\n    fprintf(out, \"\\n======= haplotype extender ===========\\n\");\n    fprintf(out, \"parent in:\\n\");\n    for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {\n        fprintf(out, \"   %d: %d\\n\", j, (int) self->parent_in[j]);\n    }\n    fprintf(out, \"parent out:\\n\");\n    for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {\n        fprintf(out, \"   %d: %d\\n\", j, (int) self->parent_out[j]);\n    }\n    fprintf(out, \"last nodes edge:\\n\");\n    for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {\n        tsk_id_t ej = self->last_nodes_edge[j];\n        fprintf(out, \"   %d: %d, \", j, (int) ej);\n        if (self->last_nodes_edge[j] != TSK_NULL) {\n            fprintf(out, \"(%d->%d, %.1f-%.1f)\", (int) self->edges->child[ej],\n                (int) self->edges->parent[ej], self->edges->left[ej],\n                self->edges->right[ej]);\n        } else {\n            fprintf(out, \"(null);\");\n        }\n        fprintf(out, \"\\n\");\n    }\n    fprintf(out, \"next nodes edge:\\n\");\n    for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {\n        tsk_id_t ej = self->next_nodes_edge[j];\n        fprintf(out, \"   %d: %d, \", j, (int) ej);\n        if (self->next_nodes_edge[j] != TSK_NULL) {\n            fprintf(out, \"(%d->%d, %.1f-%.1f)\", (int) self->edges->child[ej],\n                (int) self->edges->parent[ej], self->edges->left[ej],\n                self->edges->right[ej]);\n        } else {\n            fprintf(out, \"(null);\");\n        }\n        fprintf(out, \"\\n\");\n    }\n    fprintf(out, \"edges out:\\n\");\n    edge_list_print(&self->edges_out_head, self->edges, out);\n    fprintf(out, \"edges in:\\n\");\n    edge_list_print(&self->edges_in_head, self->edges, out);\n}\n\nstatic int\nhaplotype_extender_free(haplotype_extender_t *self)\n{\n    tsk_blkalloc_free(&self->edge_list_heap);\n    tsk_safe_free(self->last_degree);\n    tsk_safe_free(self->next_degree);\n    tsk_safe_free(self->last_nodes_edge);\n    tsk_safe_free(self->next_nodes_edge);\n    tsk_safe_free(self->parent_out);\n    tsk_safe_free(self->parent_in);\n    tsk_safe_free(self->not_sample);\n    return 0;\n}\n\nstatic int\nhaplotype_extender_next_tree(haplotype_extender_t *self, tsk_tree_position_t *tree_pos)\n{\n    int ret = 0;\n    tsk_id_t tj, e;\n    edge_list_t *ex_out, *ex_in;\n    edge_list_t *new_ex;\n    const tsk_id_t *edges_child = self->edges->child;\n    const tsk_id_t *edges_parent = self->edges->parent;\n\n    for (ex_out = self->edges_out_head; ex_out != NULL; ex_out = ex_out->next) {\n        e = ex_out->edge;\n        self->parent_out[edges_child[e]] = TSK_NULL;\n        // note we only adjust near_side of edges_in, not edges_out,\n        // so no need to check for zero-length edges\n        if (ex_out->extended > 1) {\n            // this is needed to catch newly-created edges\n            self->last_nodes_edge[edges_child[e]] = e;\n            self->last_degree[edges_child[e]] += 1;\n            self->last_degree[edges_parent[e]] += 1;\n        } else if (ex_out->extended == 0) {\n            self->last_nodes_edge[edges_child[e]] = TSK_NULL;\n            self->last_degree[edges_child[e]] -= 1;\n            self->last_degree[edges_parent[e]] -= 1;\n        }\n    }\n    remove_unextended(&self->edges_out_head, &self->edges_out_tail);\n    for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {\n        e = ex_in->edge;\n        self->parent_in[edges_child[e]] = TSK_NULL;\n        if (ex_in->extended == 0 && self->near_side[e] != self->far_side[e]) {\n            self->last_nodes_edge[edges_child[e]] = e;\n            self->last_degree[edges_child[e]] += 1;\n            self->last_degree[edges_parent[e]] += 1;\n        }\n    }\n    remove_unextended(&self->edges_in_head, &self->edges_in_tail);\n\n    // done cleanup from last tree transition;\n    // now we set the state up for this tree transition\n    for (tj = tree_pos->out.start; tj != tree_pos->out.stop; tj += self->direction) {\n        e = tree_pos->out.order[tj];\n        if (self->near_side[e] != self->far_side[e]) {\n            new_ex = tsk_blkalloc_get(&self->edge_list_heap, sizeof(*new_ex));\n            if (new_ex == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            edge_list_append_entry(\n                &self->edges_out_head, &self->edges_out_tail, new_ex, e, 0);\n        }\n    }\n    for (ex_out = self->edges_out_head; ex_out != NULL; ex_out = ex_out->next) {\n        e = ex_out->edge;\n        self->parent_out[edges_child[e]] = edges_parent[e];\n        self->next_nodes_edge[edges_child[e]] = TSK_NULL;\n        self->next_degree[edges_child[e]] -= 1;\n        self->next_degree[edges_parent[e]] -= 1;\n    }\n\n    for (tj = tree_pos->in.start; tj != tree_pos->in.stop; tj += self->direction) {\n        e = tree_pos->in.order[tj];\n        // add edge to pending_in\n        new_ex = tsk_blkalloc_get(&self->edge_list_heap, sizeof(*new_ex));\n        if (new_ex == NULL) {\n            ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n            goto out;\n        }\n        edge_list_append_entry(&self->edges_in_head, &self->edges_in_tail, new_ex, e, 0);\n    }\n    for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {\n        e = ex_in->edge;\n        self->parent_in[edges_child[e]] = edges_parent[e];\n        self->next_nodes_edge[edges_child[e]] = e;\n        self->next_degree[edges_child[e]] += 1;\n        self->next_degree[edges_parent[e]] += 1;\n    }\n\nout:\n    return ret;\n}\n\nstatic int\nhaplotype_extender_add_or_extend_edge(haplotype_extender_t *self, tsk_id_t new_parent,\n    tsk_id_t child, double left, double right)\n{\n    int ret = 0;\n    double there;\n    tsk_id_t old_edge, e_out, old_parent;\n    edge_list_t *ex_in;\n    edge_list_t *new_ex = NULL;\n    tsk_id_t e_in;\n\n    there = (self->direction == TSK_DIR_FORWARD) ? right : left;\n    old_edge = self->next_nodes_edge[child];\n    if (old_edge != TSK_NULL) {\n        old_parent = self->edges->parent[old_edge];\n    } else {\n        old_parent = TSK_NULL;\n    }\n    if (new_parent != old_parent) {\n        if (self->parent_out[child] == new_parent) {\n            // if our new edge is in edges_out, it should be extended\n            e_out = self->last_nodes_edge[child];\n            self->far_side[e_out] = there;\n            edge_list_set_extended(&self->edges_out_head, e_out);\n        } else {\n            e_out = tsk_edge_table_add_row(\n                self->edges, left, right, new_parent, child, NULL, 0);\n            if (e_out < 0) {\n                ret = (int) e_out;\n                goto out;\n            }\n            /* pointers to left/right might have changed! */\n            if (self->direction == TSK_DIR_FORWARD) {\n                self->near_side = self->edges->left;\n                self->far_side = self->edges->right;\n            } else {\n                self->near_side = self->edges->right;\n                self->far_side = self->edges->left;\n            }\n            new_ex = tsk_blkalloc_get(&self->edge_list_heap, sizeof(*new_ex));\n            if (new_ex == NULL) {\n                ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n                goto out;\n            }\n            edge_list_append_entry(\n                &self->edges_out_head, &self->edges_out_tail, new_ex, e_out, 2);\n        }\n        self->next_nodes_edge[child] = e_out;\n        self->next_degree[child] += 1;\n        self->next_degree[new_parent] += 1;\n        self->parent_out[child] = TSK_NULL;\n        if (old_edge != TSK_NULL) {\n            for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {\n                e_in = ex_in->edge;\n                if (e_in == old_edge) {\n                    self->near_side[e_in] = there;\n                    if (self->far_side[e_in] != there) {\n                        ex_in->extended = 1;\n                    }\n                    self->next_degree[child] -= 1;\n                    self->next_degree[self->parent_in[child]] -= 1;\n                    self->parent_in[child] = TSK_NULL;\n                }\n            }\n        }\n    }\nout:\n    return ret;\n}\n\nstatic float\nhaplotype_extender_mergeable(haplotype_extender_t *self, tsk_id_t c)\n{\n    // returns the number of new edges needed\n    // if the paths in parent_in and parent_out\n    // up through nodes that aren't in the other tree\n    // end at the same place and don't have conflicting times;\n    // otherwise, return infinity\n    tsk_id_t p_in, p_out, child;\n    float num_new_edges; // needs to be float so we can have infinity\n    int num_extended;\n    double t_in, t_out;\n    bool climb_in, climb_out;\n    const double *nodes_time = self->ts->tables->nodes.time;\n\n    p_out = self->parent_out[c];\n    p_in = self->parent_in[c];\n    t_out = (p_out == TSK_NULL) ? INFINITY : nodes_time[p_out];\n    t_in = (p_in == TSK_NULL) ? INFINITY : nodes_time[p_in];\n    child = c;\n    num_new_edges = 0;\n    num_extended = 0;\n    while (true) {\n        climb_in = (p_in != TSK_NULL && self->last_degree[p_in] == 0\n                    && self->not_sample[p_in] && t_in < t_out);\n        climb_out = (p_out != TSK_NULL && self->next_degree[p_out] == 0\n                     && self->not_sample[p_out] && t_out < t_in);\n        if (climb_in) {\n            if (self->parent_in[child] != p_in) {\n                num_new_edges += 1;\n            }\n            child = p_in;\n            p_in = self->parent_in[p_in];\n            t_in = (p_in == TSK_NULL) ? INFINITY : nodes_time[p_in];\n        } else if (climb_out) {\n            if (self->parent_out[child] != p_out) {\n                num_new_edges += 1;\n            }\n            child = p_out;\n            p_out = self->parent_out[p_out];\n            t_out = (p_out == TSK_NULL) ? INFINITY : nodes_time[p_out];\n            num_extended += 1;\n        } else {\n            break;\n        }\n    }\n    if ((num_extended == 0) || (p_in != p_out) || (p_in == TSK_NULL)) {\n        num_new_edges = INFINITY;\n    }\n    return num_new_edges;\n}\n\nstatic int\nhaplotype_extender_merge_paths(\n    haplotype_extender_t *self, tsk_id_t c, double left, double right)\n{\n    int ret = 0;\n    tsk_id_t p_in, p_out, child;\n    double t_in, t_out;\n    bool climb_in, climb_out;\n    const double *nodes_time = self->ts->tables->nodes.time;\n\n    p_out = self->parent_out[c];\n    p_in = self->parent_in[c];\n    t_out = nodes_time[p_out];\n    t_in = nodes_time[p_in];\n    child = c;\n    while (true) {\n        climb_in = (p_in != TSK_NULL && self->last_degree[p_in] == 0\n                    && self->not_sample[p_in] && t_in < t_out);\n        climb_out = (p_out != TSK_NULL && self->next_degree[p_out] == 0\n                     && self->not_sample[p_out] && t_out < t_in);\n        if (climb_in) {\n            ret = haplotype_extender_add_or_extend_edge(self, p_in, child, left, right);\n            if (ret != 0) {\n                goto out;\n            }\n            child = p_in;\n            p_in = self->parent_in[p_in];\n            t_in = (p_in == TSK_NULL) ? INFINITY : nodes_time[p_in];\n        } else if (climb_out) {\n            ret = haplotype_extender_add_or_extend_edge(self, p_out, child, left, right);\n            if (ret != 0) {\n                goto out;\n            }\n            child = p_out;\n            p_out = self->parent_out[p_out];\n            t_out = (p_out == TSK_NULL) ? INFINITY : nodes_time[p_out];\n        } else {\n            break;\n        }\n    }\n    tsk_bug_assert(p_out == p_in);\n    ret = haplotype_extender_add_or_extend_edge(self, p_out, child, left, right);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nhaplotype_extender_extend_paths(haplotype_extender_t *self)\n{\n    int ret = 0;\n    bool valid;\n    double left, right;\n    float ne, max_new_edges, next_max_new_edges;\n    tsk_tree_position_t tree_pos;\n    edge_list_t *ex_in;\n    tsk_id_t e_in, c, e;\n    tsk_size_t num_edges;\n    tsk_bool_t *keep = NULL;\n\n    tsk_memset(&tree_pos, 0, sizeof(tree_pos));\n    ret = tsk_tree_position_init(&tree_pos, self->ts, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (self->direction == TSK_DIR_FORWARD) {\n        valid = tsk_tree_position_next(&tree_pos);\n    } else {\n        valid = tsk_tree_position_prev(&tree_pos);\n    }\n\n    while (valid) {\n        left = tree_pos.interval.left;\n        right = tree_pos.interval.right;\n        ret = haplotype_extender_next_tree(self, &tree_pos);\n        if (ret != 0) {\n            goto out;\n        }\n        max_new_edges = 0;\n        next_max_new_edges = INFINITY;\n        while (max_new_edges < INFINITY) {\n            for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {\n                e_in = ex_in->edge;\n                c = self->edges->child[e_in];\n                if (self->last_degree[c] > 0) {\n                    ne = haplotype_extender_mergeable(self, c);\n                    if (ne <= max_new_edges) {\n                        ret = haplotype_extender_merge_paths(self, c, left, right);\n                        if (ret != 0) {\n                            goto out;\n                        }\n                    } else {\n                        next_max_new_edges = TSK_MIN(ne, next_max_new_edges);\n                    }\n                }\n            }\n            max_new_edges = next_max_new_edges;\n            next_max_new_edges = INFINITY;\n        }\n        if (self->direction == TSK_DIR_FORWARD) {\n            valid = tsk_tree_position_next(&tree_pos);\n        } else {\n            valid = tsk_tree_position_prev(&tree_pos);\n        }\n    }\n    /* Get rid of adjacent, identical edges */\n    /* note: we need to calloc this here instead of at the start\n     * because we don't know how big it will need to be until now */\n    num_edges = self->edges->num_rows;\n    keep = tsk_calloc(num_edges, sizeof(*keep));\n    if (keep == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (e = 0; e < (tsk_id_t) num_edges - 1; e++) {\n        if (self->edges->parent[e] == self->edges->parent[e + 1]\n            && self->edges->child[e] == self->edges->child[e + 1]\n            && self->edges->right[e] == self->edges->left[e + 1]) {\n            self->edges->right[e] = self->edges->right[e + 1];\n            self->edges->left[e + 1] = self->edges->right[e + 1];\n        }\n    }\n    for (e = 0; e < (tsk_id_t) num_edges; e++) {\n        keep[e] = self->edges->left[e] < self->edges->right[e];\n    }\n    ret = tsk_edge_table_keep_rows(self->edges, keep, 0, NULL);\nout:\n    tsk_tree_position_free(&tree_pos);\n    tsk_safe_free(keep);\n    return ret;\n}\n\nstatic int\nextend_haplotypes_iter(const tsk_treeseq_t *self, int direction, tsk_edge_table_t *edges,\n    tsk_flags_t options)\n{\n    int ret = 0;\n    haplotype_extender_t haplotype_extender;\n    tsk_memset(&haplotype_extender, 0, sizeof(haplotype_extender));\n    ret = haplotype_extender_init(&haplotype_extender, self, direction, edges);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = haplotype_extender_extend_paths(&haplotype_extender);\n    if (ret != 0) {\n        goto out;\n    }\n\n    if (!!(options & TSK_DEBUG)) {\n        haplotype_extender_print_state(&haplotype_extender, tsk_get_debug_stream());\n    }\n\nout:\n    haplotype_extender_free(&haplotype_extender);\n    return ret;\n}\n\nint TSK_WARN_UNUSED\ntsk_treeseq_extend_haplotypes(\n    const tsk_treeseq_t *self, int max_iter, tsk_flags_t options, tsk_treeseq_t *output)\n{\n    int ret = 0;\n    tsk_table_collection_t tables;\n    tsk_treeseq_t ts;\n    int iter, j;\n    tsk_size_t last_num_edges;\n    tsk_bookmark_t sort_start;\n    const int direction[] = { TSK_DIR_FORWARD, TSK_DIR_REVERSE };\n\n    tsk_memset(&tables, 0, sizeof(tables));\n    tsk_memset(&ts, 0, sizeof(ts));\n    tsk_memset(output, 0, sizeof(*output));\n\n    if (max_iter <= 0) {\n        ret = tsk_trace_error(TSK_ERR_EXTEND_EDGES_BAD_MAXITER);\n        goto out;\n    }\n    if (tsk_treeseq_get_num_migrations(self) != 0) {\n        ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);\n        goto out;\n    }\n\n    /* Note: there is a fair bit of copying of table data in this implementation\n     * currently, as we create a new tree sequence for each iteration, which\n     * takes a full copy of the input tables. We could streamline this by\n     * adding a flag to treeseq_init which says \"steal a reference to these\n     * tables and *don't* free them at the end\". Then, we would only need\n     * one copy of the full tables, and could pass in a standalone edge\n     * table to use for in-place updating.\n     */\n    ret = tsk_table_collection_copy(self->tables, &tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_mutation_table_clear(&tables.mutations);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_init(&ts, &tables, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    last_num_edges = tsk_treeseq_get_num_edges(&ts);\n    for (iter = 0; iter < max_iter; iter++) {\n        for (j = 0; j < 2; j++) {\n            ret = extend_haplotypes_iter(&ts, direction[j], &tables.edges, options);\n            if (ret != 0) {\n                goto out;\n            }\n            /* We're done with the current ts now */\n            tsk_treeseq_free(&ts);\n            /* no need to sort sites and mutations */\n            memset(&sort_start, 0, sizeof(sort_start));\n            sort_start.sites = tables.sites.num_rows;\n            sort_start.mutations = tables.mutations.num_rows;\n            ret = tsk_table_collection_sort(&tables, &sort_start, 0);\n            if (ret != 0) {\n                goto out;\n            }\n            ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n            if (ret != 0) {\n                goto out;\n            }\n        }\n        if (last_num_edges == tsk_treeseq_get_num_edges(&ts)) {\n            break;\n        }\n        last_num_edges = tsk_treeseq_get_num_edges(&ts);\n    }\n\n    /* Remap mutation nodes */\n    ret = tsk_mutation_table_copy(\n        &self->tables->mutations, &tables.mutations, TSK_NO_INIT);\n    if (ret != 0) {\n        goto out;\n    }\n    /* Note: to allow migrations we'd also have to do this same operation\n     * on the migration nodes; however it's a can of worms because the interval\n     * covering the migration might no longer make sense. */\n    ret = tsk_treeseq_slide_mutation_nodes_up(&ts, &tables.mutations);\n    if (ret != 0) {\n        goto out;\n    }\n    tsk_treeseq_free(&ts);\n    ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* Hand ownership of the tree sequence to the calling code */\n    tsk_memcpy(output, &ts, sizeof(ts));\n    tsk_memset(&ts, 0, sizeof(*output));\nout:\n    tsk_treeseq_free(&ts);\n    tsk_table_collection_free(&tables);\n    return ret;\n}\n\n/* ======================================================== *\n * Pair coalescence\n * ======================================================== */\n\nstatic int\ncheck_node_bin_map(\n    const tsk_size_t num_nodes, const tsk_size_t num_bins, const tsk_id_t *node_bin_map)\n{\n    int ret = 0;\n    tsk_id_t max_index, index;\n    tsk_size_t i;\n\n    max_index = TSK_NULL;\n    for (i = 0; i < num_nodes; i++) {\n        index = node_bin_map[i];\n        if (index < TSK_NULL) {\n            ret = tsk_trace_error(TSK_ERR_BAD_NODE_BIN_MAP);\n            goto out;\n        }\n        if (index > max_index) {\n            max_index = index;\n        }\n    }\n    if (num_bins < 1 || (tsk_id_t) num_bins < max_index + 1) {\n        ret = tsk_trace_error(TSK_ERR_BAD_NODE_BIN_MAP_DIM);\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic inline void\nTRANSPOSE_2D(tsk_size_t rows, tsk_size_t cols, const double *source, double *dest)\n{\n    tsk_size_t i, j;\n    for (i = 0; i < rows; ++i) {\n        for (j = 0; j < cols; ++j) {\n            dest[j * rows + i] = source[i * cols + j];\n        }\n    }\n}\n\nstatic inline void\npair_coalescence_count(tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_sample_sets, const double *parent_count, const double *child_count,\n    const double *parent_state, const double *inside, double *outside, double *result)\n{\n    tsk_size_t i;\n    tsk_id_t j, k;\n    for (i = 0; i < num_sample_sets; i++) {\n        outside[i] = parent_count[i] - child_count[i] - parent_state[i];\n    }\n    for (i = 0; i < num_set_indexes; i++) {\n        j = set_indexes[2 * i];\n        k = set_indexes[2 * i + 1];\n        result[i] = outside[j] * inside[k];\n        if (j != k) {\n            result[i] += outside[k] * inside[j];\n        }\n    }\n}\n\nint\ntsk_treeseq_pair_coalescence_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_set_indexes, const tsk_id_t *set_indexes, tsk_size_t num_windows,\n    const double *windows, tsk_size_t num_bins, const tsk_id_t *node_bin_map,\n    pair_coalescence_stat_func_t *summary_func, tsk_size_t summary_func_dim,\n    void *summary_func_args, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    double left, right, remaining_span, missing_span, window_span, denominator, x, t;\n    tsk_id_t e, p, c, u, v, w, i, j;\n    tsk_size_t num_samples, num_edges;\n    tsk_tree_position_t tree_pos;\n    const tsk_table_collection_t *tables = self->tables;\n    const tsk_size_t num_nodes = tables->nodes.num_rows;\n    const double *restrict nodes_time = self->tables->nodes.time;\n    const double sequence_length = tables->sequence_length;\n    const tsk_size_t num_outputs = summary_func_dim;\n\n    /* buffers */\n    bool *visited = NULL;\n    tsk_id_t *nodes_sample_set = NULL;\n    tsk_id_t *nodes_parent = NULL;\n    double *coalescing_pairs = NULL;\n    double *coalescence_time = NULL;\n    double *nodes_sample = NULL;\n    double *sample_count = NULL;\n    double *bin_weight = NULL;\n    double *bin_values = NULL;\n    double *pair_count = NULL;\n    double *total_pair = NULL;\n    double *outside = NULL;\n\n    /* row pointers */\n    double *inside = NULL;\n    double *weight = NULL;\n    double *values = NULL;\n    double *output = NULL;\n    double *above = NULL;\n    double *below = NULL;\n    double *state = NULL;\n    double *pairs = NULL;\n    double *times = NULL;\n\n    tsk_memset(&tree_pos, 0, sizeof(tree_pos));\n\n    /* check inputs */\n    ret = tsk_treeseq_check_windows(self, num_windows, windows, TSK_REQUIRE_FULL_SPAN);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_set_indexes(num_sample_sets, 2 * num_set_indexes, set_indexes);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = tsk_treeseq_check_sample_sets(\n        self, num_sample_sets, sample_set_sizes, sample_sets);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_node_bin_map(num_nodes, num_bins, node_bin_map);\n    if (ret != 0) {\n        goto out;\n    }\n\n    /* map nodes to sample sets */\n    nodes_sample_set = tsk_malloc(num_nodes * sizeof(*nodes_sample_set));\n    if (nodes_sample_set == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    ret = get_sample_set_index_map(self, num_sample_sets, sample_set_sizes, sample_sets,\n        &num_samples, nodes_sample_set);\n    if (ret != 0) {\n        goto out;\n    }\n\n    visited = tsk_malloc(num_nodes * sizeof(*visited));\n    outside = tsk_malloc(num_sample_sets * sizeof(*outside));\n    nodes_parent = tsk_malloc(num_nodes * sizeof(*nodes_parent));\n    nodes_sample = tsk_calloc(num_nodes * num_sample_sets, sizeof(*nodes_sample));\n    sample_count = tsk_malloc(num_nodes * num_sample_sets * sizeof(*sample_count));\n    coalescing_pairs = tsk_calloc(num_bins * num_set_indexes, sizeof(*coalescing_pairs));\n    coalescence_time = tsk_calloc(num_bins * num_set_indexes, sizeof(*coalescence_time));\n    bin_weight = tsk_malloc(num_bins * num_set_indexes * sizeof(*bin_weight));\n    bin_values = tsk_malloc(num_bins * num_set_indexes * sizeof(*bin_values));\n    pair_count = tsk_malloc(num_set_indexes * sizeof(*pair_count));\n    total_pair = tsk_malloc(num_set_indexes * sizeof(*total_pair));\n    if (nodes_parent == NULL || nodes_sample == NULL || sample_count == NULL\n        || coalescing_pairs == NULL || bin_weight == NULL || bin_values == NULL\n        || outside == NULL || pair_count == NULL || visited == NULL\n        || total_pair == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {\n        u = set_indexes[2 * i];\n        v = set_indexes[2 * i + 1];\n        total_pair[i] = (double) sample_set_sizes[u] * (double) sample_set_sizes[v];\n        if (u == v) {\n            total_pair[i] -= (double) sample_set_sizes[v];\n            total_pair[i] /= 2;\n        }\n    }\n\n    /* initialize internal state */\n    for (c = 0; c < (tsk_id_t) num_nodes; c++) {\n        i = nodes_sample_set[c];\n        if (i != TSK_NULL) {\n            state = GET_2D_ROW(nodes_sample, num_sample_sets, c);\n            state[i] = 1.0;\n        }\n        nodes_parent[c] = TSK_NULL;\n        visited[c] = false;\n    }\n    tsk_memcpy(\n        sample_count, nodes_sample, num_nodes * num_sample_sets * sizeof(*sample_count));\n\n    ret = tsk_tree_position_init(&tree_pos, self, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    num_edges = 0;\n    missing_span = 0.0;\n    w = 0;\n    while (true) {\n        tsk_tree_position_next(&tree_pos);\n        if (tree_pos.index == TSK_NULL) {\n            break;\n        }\n\n        left = tree_pos.interval.left;\n        right = tree_pos.interval.right;\n        remaining_span = sequence_length - left;\n\n        for (u = tree_pos.out.start; u != tree_pos.out.stop; u++) {\n            e = tree_pos.out.order[u];\n            p = tables->edges.parent[e];\n            c = tables->edges.child[e];\n            nodes_parent[c] = TSK_NULL;\n            inside = GET_2D_ROW(sample_count, num_sample_sets, c);\n            while (p != TSK_NULL) { /* downdate statistic */\n                v = node_bin_map[p];\n                t = nodes_time[p];\n                if (v != TSK_NULL) {\n                    above = GET_2D_ROW(sample_count, num_sample_sets, p);\n                    below = GET_2D_ROW(sample_count, num_sample_sets, c);\n                    state = GET_2D_ROW(nodes_sample, num_sample_sets, p);\n                    pairs = GET_2D_ROW(coalescing_pairs, num_set_indexes, v);\n                    times = GET_2D_ROW(coalescence_time, num_set_indexes, v);\n                    pair_coalescence_count(num_set_indexes, set_indexes, num_sample_sets,\n                        above, below, state, inside, outside, pair_count);\n                    for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {\n                        x = pair_count[i] * remaining_span;\n                        pairs[i] -= x;\n                        times[i] -= t * x;\n                    }\n                }\n                c = p;\n                p = nodes_parent[c];\n            }\n            p = tables->edges.parent[e];\n            while (p != TSK_NULL) { /* downdate state */\n                above = GET_2D_ROW(sample_count, num_sample_sets, p);\n                for (i = 0; i < (tsk_id_t) num_sample_sets; i++) {\n                    above[i] -= inside[i];\n                }\n                p = nodes_parent[p];\n            }\n            num_edges -= 1;\n        }\n\n        for (u = tree_pos.in.start; u != tree_pos.in.stop; u++) {\n            e = tree_pos.in.order[u];\n            p = tables->edges.parent[e];\n            c = tables->edges.child[e];\n            nodes_parent[c] = p;\n            inside = GET_2D_ROW(sample_count, num_sample_sets, c);\n            while (p != TSK_NULL) { /* update state */\n                above = GET_2D_ROW(sample_count, num_sample_sets, p);\n                for (i = 0; i < (tsk_id_t) num_sample_sets; i++) {\n                    above[i] += inside[i];\n                }\n                p = nodes_parent[p];\n            }\n            p = tables->edges.parent[e];\n            while (p != TSK_NULL) { /* update statistic */\n                v = node_bin_map[p];\n                t = nodes_time[p];\n                if (v != TSK_NULL) {\n                    above = GET_2D_ROW(sample_count, num_sample_sets, p);\n                    below = GET_2D_ROW(sample_count, num_sample_sets, c);\n                    state = GET_2D_ROW(nodes_sample, num_sample_sets, p);\n                    pairs = GET_2D_ROW(coalescing_pairs, num_set_indexes, v);\n                    times = GET_2D_ROW(coalescence_time, num_set_indexes, v);\n                    pair_coalescence_count(num_set_indexes, set_indexes, num_sample_sets,\n                        above, below, state, inside, outside, pair_count);\n                    for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {\n                        x = pair_count[i] * remaining_span;\n                        pairs[i] += x;\n                        times[i] += t * x;\n                    }\n                }\n                c = p;\n                p = nodes_parent[c];\n            }\n            num_edges += 1;\n        }\n\n        if (num_edges == 0) {\n            missing_span += right - left;\n        }\n\n        /* flush windows */\n        while (w < (tsk_id_t) num_windows && windows[w + 1] <= right) {\n            TRANSPOSE_2D(num_bins, num_set_indexes, coalescing_pairs, bin_weight);\n            TRANSPOSE_2D(num_bins, num_set_indexes, coalescence_time, bin_values);\n            tsk_memset(coalescing_pairs, 0,\n                num_bins * num_set_indexes * sizeof(*coalescing_pairs));\n            tsk_memset(coalescence_time, 0,\n                num_bins * num_set_indexes * sizeof(*coalescence_time));\n            remaining_span = sequence_length - windows[w + 1];\n            for (j = 0; j < (tsk_id_t) num_samples; j++) { /* truncate at tree */\n                c = sample_sets[j];\n                p = nodes_parent[c];\n                while (!visited[c] && p != TSK_NULL) {\n                    v = node_bin_map[p];\n                    t = nodes_time[p];\n                    if (v != TSK_NULL) {\n                        above = GET_2D_ROW(sample_count, num_sample_sets, p);\n                        below = GET_2D_ROW(sample_count, num_sample_sets, c);\n                        state = GET_2D_ROW(nodes_sample, num_sample_sets, p);\n                        pairs = GET_2D_ROW(coalescing_pairs, num_set_indexes, v);\n                        times = GET_2D_ROW(coalescence_time, num_set_indexes, v);\n                        pair_coalescence_count(num_set_indexes, set_indexes,\n                            num_sample_sets, above, below, state, below, outside,\n                            pair_count);\n                        for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {\n                            weight = GET_2D_ROW(bin_weight, num_bins, i);\n                            values = GET_2D_ROW(bin_values, num_bins, i);\n                            x = pair_count[i] * remaining_span / 2;\n                            pairs[i] += x;\n                            times[i] += t * x;\n                            weight[v] -= x;\n                            values[v] -= t * x;\n                        }\n                    }\n                    visited[c] = true;\n                    c = p;\n                    p = nodes_parent[c];\n                }\n            }\n            for (j = 0; j < (tsk_id_t) num_samples; j++) { /* reset tree */\n                c = sample_sets[j];\n                p = nodes_parent[c];\n                while (visited[c] && p != TSK_NULL) {\n                    visited[c] = false;\n                    c = p;\n                    p = nodes_parent[c];\n                }\n            }\n            for (i = 0; i < (tsk_id_t) num_set_indexes; i++) { /* normalise values */\n                weight = GET_2D_ROW(bin_weight, num_bins, i);\n                values = GET_2D_ROW(bin_values, num_bins, i);\n                for (v = 0; v < (tsk_id_t) num_bins; v++) {\n                    values[v] /= weight[v];\n                }\n            }\n            /* normalise weights */\n            if (options & (TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE)) {\n                window_span = windows[w + 1] - windows[w] - missing_span;\n                missing_span = 0.0;\n                if (num_edges == 0) {\n                    /* missing interval, so remove overcounted missing span */\n                    remaining_span = right - windows[w + 1];\n                    window_span += remaining_span;\n                    missing_span += remaining_span;\n                }\n                for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {\n                    denominator = 1.0;\n                    if (options & TSK_STAT_SPAN_NORMALISE) {\n                        denominator *= window_span;\n                    }\n                    if (options & TSK_STAT_PAIR_NORMALISE) {\n                        denominator *= total_pair[i];\n                    }\n                    weight = GET_2D_ROW(bin_weight, num_bins, i);\n                    for (v = 0; v < (tsk_id_t) num_bins; v++) {\n                        weight[v] *= denominator == 0.0 ? 0.0 : 1 / denominator;\n                    }\n                }\n            }\n            for (i = 0; i < (tsk_id_t) num_set_indexes; i++) { /* summarise bins */\n                weight = GET_2D_ROW(bin_weight, num_bins, i);\n                values = GET_2D_ROW(bin_values, num_bins, i);\n                output = GET_3D_ROW(\n                    result, num_set_indexes, num_outputs, (tsk_size_t) w, i);\n                ret = summary_func(\n                    num_bins, weight, values, num_outputs, output, summary_func_args);\n                if (ret != 0) {\n                    goto out;\n                }\n            };\n            w += 1;\n        }\n    }\nout:\n    tsk_tree_position_free(&tree_pos);\n    tsk_safe_free(nodes_sample_set);\n    tsk_safe_free(coalescing_pairs);\n    tsk_safe_free(coalescence_time);\n    tsk_safe_free(nodes_parent);\n    tsk_safe_free(nodes_sample);\n    tsk_safe_free(sample_count);\n    tsk_safe_free(bin_weight);\n    tsk_safe_free(bin_values);\n    tsk_safe_free(pair_count);\n    tsk_safe_free(total_pair);\n    tsk_safe_free(visited);\n    tsk_safe_free(outside);\n    return ret;\n}\n\nstatic int\npair_coalescence_weights(tsk_size_t TSK_UNUSED(input_dim), const double *weight,\n    const double *TSK_UNUSED(values), tsk_size_t output_dim, double *output,\n    void *TSK_UNUSED(params))\n{\n    int ret = 0;\n    tsk_memcpy(output, weight, output_dim * sizeof(*output));\n    return ret;\n}\n\nint\ntsk_treeseq_pair_coalescence_counts(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,\n    const tsk_id_t *node_bin_map, tsk_flags_t options, double *result)\n{\n    return tsk_treeseq_pair_coalescence_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_set_indexes, set_indexes, num_windows, windows, num_bins,\n        node_bin_map, pair_coalescence_weights, num_bins, NULL, options, result);\n}\n\nstatic int\npair_coalescence_quantiles(tsk_size_t input_dim, const double *weight,\n    const double *values, tsk_size_t output_dim, double *output, void *params)\n{\n    int ret = 0;\n    double coalesced, timepoint;\n    double *quantiles = (double *) params;\n    tsk_size_t i, j;\n    j = 0;\n    coalesced = 0.0;\n    timepoint = TSK_UNKNOWN_TIME;\n    for (i = 0; i < output_dim; i++) {\n        output[i] = NAN;\n    }\n    for (i = 0; i < input_dim; i++) {\n        if (weight[i] > 0) {\n            coalesced += weight[i];\n            timepoint = values[i];\n            while (j < output_dim && quantiles[j] <= coalesced) {\n                output[j] = timepoint;\n                j += 1;\n            }\n        }\n    }\n    if (quantiles[output_dim - 1] == 1.0) {\n        output[output_dim - 1] = timepoint;\n    }\n    return ret;\n}\n\nstatic int\ncheck_quantiles(const tsk_size_t num_quantiles, const double *quantiles)\n{\n    int ret = 0;\n    tsk_size_t i;\n    double last = -INFINITY;\n    for (i = 0; i < num_quantiles; i++) {\n        if (quantiles[i] <= last || quantiles[i] < 0.0 || quantiles[i] > 1.0) {\n            ret = tsk_trace_error(TSK_ERR_BAD_QUANTILES);\n            goto out;\n        }\n        last = quantiles[i];\n    }\nout:\n    return ret;\n}\n\nstatic int\ncheck_sorted_node_bin_map(\n    const tsk_treeseq_t *self, tsk_size_t num_bins, const tsk_id_t *node_bin_map)\n{\n    int ret = 0;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    const double *nodes_time = self->tables->nodes.time;\n    double last;\n    tsk_id_t i, j;\n    double *min_time = tsk_malloc(num_bins * sizeof(*min_time));\n    double *max_time = tsk_malloc(num_bins * sizeof(*max_time));\n    if (min_time == NULL || max_time == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    for (j = 0; j < (tsk_id_t) num_bins; j++) {\n        min_time[j] = TSK_UNKNOWN_TIME;\n        max_time[j] = TSK_UNKNOWN_TIME;\n    }\n    for (i = 0; i < (tsk_id_t) num_nodes; i++) {\n        j = node_bin_map[i];\n        if (j < 0 || j >= (tsk_id_t) num_bins) {\n            continue;\n        }\n        if (tsk_is_unknown_time(max_time[j]) || nodes_time[i] > max_time[j]) {\n            max_time[j] = nodes_time[i];\n        }\n        if (tsk_is_unknown_time(min_time[j]) || nodes_time[i] < min_time[j]) {\n            min_time[j] = nodes_time[i];\n        }\n    }\n    last = -INFINITY;\n    for (j = 0; j < (tsk_id_t) num_bins; j++) {\n        if (tsk_is_unknown_time(min_time[j])) {\n            continue;\n        }\n        if (min_time[j] < last) {\n            ret = tsk_trace_error(TSK_ERR_UNSORTED_TIMES);\n            goto out;\n        } else {\n            last = max_time[j];\n        }\n    }\nout:\n    tsk_safe_free(min_time);\n    tsk_safe_free(max_time);\n    return ret;\n}\n\nint\ntsk_treeseq_pair_coalescence_quantiles(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,\n    const tsk_id_t *node_bin_map, tsk_size_t num_quantiles, double *quantiles,\n    tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    void *params = (void *) quantiles;\n    ret = check_quantiles(num_quantiles, quantiles);\n    if (ret != 0) {\n        goto out;\n    }\n    ret = check_sorted_node_bin_map(self, num_bins, node_bin_map);\n    if (ret != 0) {\n        goto out;\n    }\n    options |= TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE;\n    ret = tsk_treeseq_pair_coalescence_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_set_indexes, set_indexes, num_windows, windows, num_bins,\n        node_bin_map, pair_coalescence_quantiles, num_quantiles, params, options,\n        result);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\npair_coalescence_rates(tsk_size_t input_dim, const double *weight, const double *values,\n    tsk_size_t output_dim, double *output, void *params)\n{\n    int ret = 0;\n    double coalesced, rate, waiting_time, a, b;\n    double *time_windows = (double *) params;\n    tsk_id_t i, j;\n    tsk_bug_assert(input_dim == output_dim);\n    for (j = (tsk_id_t) output_dim; j > 0; j--) { /* find last window with data */\n        if (weight[j - 1] == 0) {\n            output[j - 1] = NAN; /* TODO: should fill value be zero instead? */\n        } else {\n            break;\n        }\n    }\n    coalesced = 0.0;\n    for (i = 0; i < j; i++) {\n        a = time_windows[i];\n        b = time_windows[i + 1];\n        if (i + 1 == j) {\n            waiting_time = values[i] < a ? 0.0 : values[i] - a;\n            rate = 1 / waiting_time;\n        } else {\n            rate = log(1 - weight[i] / (1 - coalesced)) / (a - b);\n        }\n        // avoid tiny negative values from fp error\n        output[i] = rate > 0 ? rate : 0;\n        coalesced += weight[i];\n    }\n    return ret;\n}\n\nstatic int\ncheck_coalescence_rate_time_windows(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_time_windows,\n    const tsk_id_t *node_time_window, const double *time_windows)\n{\n    int ret = 0;\n    double timepoint;\n    const double *nodes_time = self->tables->nodes.time;\n    tsk_size_t num_nodes = self->tables->nodes.num_rows;\n    tsk_id_t i, j, k;\n    tsk_id_t n;\n    if (num_time_windows == 0) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TIME_WINDOWS_DIM);\n        goto out;\n    }\n    /* time windows are sorted */\n    timepoint = time_windows[0];\n    for (i = 0; i < (tsk_id_t) num_time_windows; i++) {\n        if (time_windows[i + 1] <= timepoint) {\n            ret = tsk_trace_error(TSK_ERR_BAD_TIME_WINDOWS);\n            goto out;\n        }\n        timepoint = time_windows[i + 1];\n    }\n    if (timepoint != INFINITY) {\n        ret = tsk_trace_error(TSK_ERR_BAD_TIME_WINDOWS_END);\n        goto out;\n    }\n    /* all sample times align with start of first time window */\n    k = 0;\n    for (i = 0; i < (tsk_id_t) num_sample_sets; i++) {\n        for (j = 0; j < (tsk_id_t) sample_set_sizes[i]; j++) {\n            n = sample_sets[k++];\n            if (nodes_time[n] != time_windows[0]) {\n                ret = tsk_trace_error(TSK_ERR_BAD_SAMPLE_PAIR_TIMES);\n                goto out;\n            }\n        }\n    }\n    /* nodes are correctly assigned to time windows */\n    for (i = 0; i < (tsk_id_t) num_nodes; i++) {\n        j = node_time_window[i];\n        if (j < 0) {\n            continue;\n        }\n        if (j >= (tsk_id_t) num_time_windows) {\n            ret = tsk_trace_error(TSK_ERR_BAD_NODE_BIN_MAP_DIM);\n            goto out;\n        }\n        if (nodes_time[i] < time_windows[j] || nodes_time[i] >= time_windows[j + 1]) {\n            ret = tsk_trace_error(TSK_ERR_BAD_NODE_TIME_WINDOW);\n            goto out;\n        }\n    }\nout:\n    return ret;\n}\n\nint\ntsk_treeseq_pair_coalescence_rates(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_set_indexes, const tsk_id_t *set_indexes, tsk_size_t num_windows,\n    const double *windows, tsk_size_t num_time_windows, const tsk_id_t *node_time_window,\n    double *time_windows, tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    void *params = (void *) time_windows;\n    ret = check_coalescence_rate_time_windows(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_time_windows, node_time_window, time_windows);\n    if (ret != 0) {\n        goto out;\n    }\n    options |= TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE;\n    ret = tsk_treeseq_pair_coalescence_stat(self, num_sample_sets, sample_set_sizes,\n        sample_sets, num_set_indexes, set_indexes, num_windows, windows,\n        num_time_windows, node_time_window, pair_coalescence_rates, num_time_windows,\n        params, options, result);\n    if (ret != 0) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\n/* ======================================================== *\n * Relatedness matrix-vector product\n * ======================================================== */\n\ntypedef struct {\n    const tsk_treeseq_t *ts;\n    tsk_size_t num_weights;\n    const double *weights;\n    tsk_size_t num_windows;\n    const double *windows;\n    tsk_size_t num_focal_nodes;\n    const tsk_id_t *focal_nodes;\n    tsk_flags_t options;\n    double *result;\n    tsk_tree_position_t tree_pos;\n    double position;\n    tsk_size_t num_nodes;\n    tsk_id_t *parent;\n    double *x;\n    double *w;\n    double *v;\n} tsk_matvec_calculator_t;\n\nstatic void\ntsk_matvec_calculator_print_state(const tsk_matvec_calculator_t *self, FILE *out)\n{\n    tsk_id_t j;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(self->ts);\n\n    fprintf(out, \"Matvec state:\\n\");\n    fprintf(out, \"options = %d\\n\", self->options);\n    fprintf(out, \"position = %f\\n\", self->position);\n    fprintf(out, \"focal nodes = %lld: [\", (long long) self->num_focal_nodes);\n    fprintf(out, \"tree_pos:\\n\");\n    tsk_tree_position_print_state(&self->tree_pos, out);\n    fprintf(out, \"samples = %lld: [\", (long long) num_samples);\n    fprintf(out, \"]\\n\");\n    fprintf(out, \"node\\tparent\\tx\\tv\\tw\");\n    fprintf(out, \"\\n\");\n\n    for (j = 0; j < (tsk_id_t) self->num_nodes; j++) {\n        fprintf(out, \"%lld\\t\", (long long) j);\n        fprintf(out, \"%lld\\t%g\\t%g\\t%g\\n\", (long long) self->parent[j], self->x[j],\n            self->v[j], self->w[j]);\n    }\n}\n\nstatic int\ntsk_matvec_calculator_init(tsk_matvec_calculator_t *self, const tsk_treeseq_t *ts,\n    tsk_size_t num_weights, const double *weights, tsk_size_t num_windows,\n    const double *windows, tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes,\n    tsk_flags_t options, double *result)\n{\n    int ret = 0;\n    tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);\n    const tsk_size_t num_nodes = ts->tables->nodes.num_rows;\n    const double *row;\n    double *new_row;\n    tsk_size_t k;\n    tsk_id_t index, u, j;\n    double *weight_means = tsk_malloc(num_weights * sizeof(*weight_means));\n    const tsk_size_t num_trees = ts->num_trees;\n    const double *restrict breakpoints = ts->breakpoints;\n\n    self->ts = ts;\n    self->num_weights = num_weights;\n    self->weights = weights;\n    self->num_windows = num_windows;\n    self->windows = windows;\n    self->num_focal_nodes = num_focal_nodes;\n    self->focal_nodes = focal_nodes;\n    self->options = options;\n    self->result = result;\n    self->num_nodes = num_nodes;\n    self->position = windows[0];\n\n    self->parent = tsk_malloc(num_nodes * sizeof(*self->parent));\n    self->x = tsk_calloc(num_nodes, sizeof(*self->x));\n    self->v = tsk_calloc(num_nodes * num_weights, sizeof(*self->v));\n    self->w = tsk_calloc(num_nodes * num_weights, sizeof(*self->w));\n\n    if (self->parent == NULL || self->x == NULL || self->w == NULL || self->v == NULL\n        || weight_means == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    tsk_memset(result, 0, num_windows * num_focal_nodes * num_weights * sizeof(*result));\n    tsk_memset(self->parent, TSK_NULL, num_nodes * sizeof(*self->parent));\n\n    for (j = 0; j < (tsk_id_t) num_focal_nodes; j++) {\n        if (focal_nodes[j] < 0 || (tsk_size_t) focal_nodes[j] >= num_nodes) {\n            ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);\n            goto out;\n        }\n    }\n\n    ret = tsk_tree_position_init(&self->tree_pos, ts, 0);\n    if (ret != 0) {\n        goto out;\n    }\n    /* seek to the first window */\n    index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, windows[0]);\n    if (breakpoints[index] > windows[0]) {\n        index--;\n    }\n    ret = tsk_tree_position_seek_forward(&self->tree_pos, index);\n    if (ret != 0) {\n        goto out;\n    }\n\n    for (k = 0; k < num_weights; k++) {\n        weight_means[k] = 0.0;\n    }\n    /* centre the input */\n    if (!(options & TSK_STAT_NONCENTRED)) {\n        for (j = 0; j < (tsk_id_t) num_samples; j++) {\n            row = GET_2D_ROW(weights, num_weights, j);\n            for (k = 0; k < num_weights; k++) {\n                weight_means[k] += row[k];\n            }\n        }\n        for (k = 0; k < num_weights; k++) {\n            weight_means[k] /= (double) num_samples;\n        }\n    }\n\n    /* set the initial state */\n    for (j = 0; j < (tsk_id_t) num_samples; j++) {\n        u = ts->samples[j];\n        row = GET_2D_ROW(weights, num_weights, j);\n        new_row = GET_2D_ROW(self->w, num_weights, u);\n        for (k = 0; k < num_weights; k++) {\n            new_row[k] = row[k] - weight_means[k];\n        }\n    }\nout:\n    tsk_safe_free(weight_means);\n    return ret;\n}\n\nstatic int\ntsk_matvec_calculator_free(tsk_matvec_calculator_t *self)\n{\n    tsk_safe_free(self->parent);\n    tsk_safe_free(self->x);\n    tsk_safe_free(self->w);\n    tsk_safe_free(self->v);\n    tsk_tree_position_free(&self->tree_pos);\n\n    /* Make this safe for multiple free calls */\n    memset(self, 0, sizeof(*self));\n    return 0;\n}\n\nstatic inline void\ntsk_matvec_calculator_add_z(tsk_id_t u, tsk_id_t p, const double position,\n    double *restrict x, const tsk_size_t num_weights, double *restrict w,\n    double *restrict v, const double *restrict nodes_time)\n{\n    double t, span;\n    tsk_size_t j;\n    double *restrict v_row, *restrict w_row;\n\n    if (p != TSK_NULL) {\n        t = nodes_time[p] - nodes_time[u];\n        span = position - x[u];\n        // do this: self->v[u] += t * span * self->w[u];\n        w_row = GET_2D_ROW(w, num_weights, u);\n        v_row = GET_2D_ROW(v, num_weights, u);\n        for (j = 0; j < num_weights; j++) {\n            v_row[j] += t * span * w_row[j];\n        }\n    }\n    x[u] = position;\n}\n\nstatic void\ntsk_matvec_calculator_adjust_path_up(\n    tsk_matvec_calculator_t *self, tsk_id_t p, tsk_id_t c, double sign)\n{\n    tsk_size_t j;\n    double *p_row, *c_row;\n    const tsk_id_t *restrict parent = self->parent;\n    const double position = self->position;\n    double *restrict x = self->x;\n    const tsk_size_t num_weights = self->num_weights;\n    double *restrict w = self->w;\n    double *restrict v = self->v;\n    const double *restrict nodes_time = self->ts->tables->nodes.time;\n\n    // sign = -1 for removing edges, +1 for adding\n    while (p != TSK_NULL) {\n        tsk_matvec_calculator_add_z(\n            p, parent[p], position, x, num_weights, w, v, nodes_time);\n        // do this: self->v[c] -= sign * self->v[p];\n        p_row = GET_2D_ROW(v, num_weights, p);\n        c_row = GET_2D_ROW(v, num_weights, c);\n        for (j = 0; j < num_weights; j++) {\n            c_row[j] -= sign * p_row[j];\n        }\n        // do this: self->w[p] += sign * self->w[c];\n        p_row = GET_2D_ROW(w, num_weights, p);\n        c_row = GET_2D_ROW(w, num_weights, c);\n        for (j = 0; j < num_weights; j++) {\n            p_row[j] += sign * c_row[j];\n        }\n        p = parent[p];\n    }\n}\n\nstatic void\ntsk_matvec_calculator_remove_edge(tsk_matvec_calculator_t *self, tsk_id_t p, tsk_id_t c)\n{\n    tsk_id_t *parent = self->parent;\n    const double position = self->position;\n    double *restrict x = self->x;\n    const tsk_size_t num_weights = self->num_weights;\n    double *restrict w = self->w;\n    double *restrict v = self->v;\n    const double *restrict nodes_time = self->ts->tables->nodes.time;\n\n    tsk_matvec_calculator_add_z(\n        c, parent[c], position, x, num_weights, w, v, nodes_time);\n    parent[c] = TSK_NULL;\n    tsk_matvec_calculator_adjust_path_up(self, p, c, -1);\n}\n\nstatic void\ntsk_matvec_calculator_insert_edge(tsk_matvec_calculator_t *self, tsk_id_t p, tsk_id_t c)\n{\n    tsk_id_t *parent = self->parent;\n\n    tsk_matvec_calculator_adjust_path_up(self, p, c, +1);\n    self->x[c] = self->position;\n    parent[c] = p;\n}\n\nstatic int\ntsk_matvec_calculator_write_output(tsk_matvec_calculator_t *self, double *restrict y)\n{\n    int ret = 0;\n    tsk_id_t u;\n    tsk_size_t j, k;\n    const tsk_size_t n = self->num_focal_nodes;\n    const tsk_size_t num_weights = self->num_weights;\n    const double position = self->position;\n    double *u_row, *out_row;\n    double *out_means = tsk_malloc(num_weights * sizeof(*out_means));\n    const tsk_id_t *restrict parent = self->parent;\n    const double *restrict nodes_time = self->ts->tables->nodes.time;\n    double *restrict x = self->x;\n    double *restrict w = self->w;\n    double *restrict v = self->v;\n    const tsk_id_t *restrict focal_nodes = self->focal_nodes;\n\n    if (out_means == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n\n    for (j = 0; j < n; j++) {\n        out_row = GET_2D_ROW(y, num_weights, j);\n        u = focal_nodes[j];\n        while (u != TSK_NULL) {\n            if (x[u] != position) {\n                tsk_matvec_calculator_add_z(\n                    u, parent[u], position, x, num_weights, w, v, nodes_time);\n            }\n            u_row = GET_2D_ROW(v, num_weights, u);\n            for (k = 0; k < num_weights; k++) {\n                out_row[k] += u_row[k];\n            }\n            u = parent[u];\n        }\n    }\n\n    if (!(self->options & TSK_STAT_NONCENTRED)) {\n        for (k = 0; k < num_weights; k++) {\n            out_means[k] = 0.0;\n        }\n        for (j = 0; j < n; j++) {\n            out_row = GET_2D_ROW(y, num_weights, j);\n            for (k = 0; k < num_weights; k++) {\n                out_means[k] += out_row[k];\n            }\n        }\n        for (k = 0; k < num_weights; k++) {\n            out_means[k] /= (double) n;\n        }\n        for (j = 0; j < n; j++) {\n            out_row = GET_2D_ROW(y, num_weights, j);\n            for (k = 0; k < num_weights; k++) {\n                out_row[k] -= out_means[k];\n            }\n        }\n    }\n    /* zero out v */\n    tsk_memset(self->v, 0, self->num_nodes * num_weights * sizeof(*self->v));\nout:\n    tsk_safe_free(out_means);\n    return ret;\n}\n\nstatic int\ntsk_matvec_calculator_run(tsk_matvec_calculator_t *self)\n{\n    int ret = 0;\n    tsk_size_t j, k, m;\n    tsk_id_t e, p, c;\n    const tsk_size_t out_size = self->num_weights * self->num_focal_nodes;\n    const tsk_size_t num_edges = self->ts->tables->edges.num_rows;\n    const double *restrict edge_right = self->ts->tables->edges.right;\n    const double *restrict edge_left = self->ts->tables->edges.left;\n    const tsk_id_t *restrict edge_child = self->ts->tables->edges.child;\n    const tsk_id_t *restrict edge_parent = self->ts->tables->edges.parent;\n    const double *restrict windows = self->windows;\n    double *restrict out;\n    tsk_tree_position_t tree_pos = self->tree_pos;\n    const tsk_id_t *restrict in_order = tree_pos.in.order;\n    const tsk_id_t *restrict out_order = tree_pos.out.order;\n    bool valid;\n    double next_position;\n\n    m = 0;\n    self->position = windows[0];\n\n    for (j = (tsk_size_t) tree_pos.in.start; j != (tsk_size_t) tree_pos.in.stop; j++) {\n        e = in_order[j];\n        tsk_bug_assert(edge_left[e] <= self->position);\n        if (self->position < edge_right[e]) {\n            p = edge_parent[e];\n            c = edge_child[e];\n            tsk_matvec_calculator_insert_edge(self, p, c);\n        }\n    }\n\n    valid = tsk_tree_position_next(&tree_pos);\n    j = (tsk_size_t) tree_pos.in.start;\n    k = (tsk_size_t) tree_pos.out.start;\n    while (m < self->num_windows) {\n        if (valid && self->position == tree_pos.interval.left) {\n            for (k = (tsk_size_t) tree_pos.out.start;\n                k != (tsk_size_t) tree_pos.out.stop; k++) {\n                e = out_order[k];\n                p = edge_parent[e];\n                c = edge_child[e];\n                tsk_matvec_calculator_remove_edge(self, p, c);\n            }\n            for (j = (tsk_size_t) tree_pos.in.start; j != (tsk_size_t) tree_pos.in.stop;\n                j++) {\n                e = in_order[j];\n                p = edge_parent[e];\n                c = edge_child[e];\n                tsk_matvec_calculator_insert_edge(self, p, c);\n            }\n            valid = tsk_tree_position_next(&tree_pos);\n        }\n        next_position = windows[m + 1];\n        if (j < num_edges) {\n            next_position = TSK_MIN(next_position, edge_left[in_order[j]]);\n        }\n        if (k < num_edges) {\n            next_position = TSK_MIN(next_position, edge_right[out_order[k]]);\n        }\n        tsk_bug_assert(self->position < next_position);\n        self->position = next_position;\n        if (self->position == windows[m + 1]) {\n            out = GET_2D_ROW(self->result, out_size, m);\n            tsk_matvec_calculator_write_output(self, out);\n            m += 1;\n        }\n        if (self->options & TSK_DEBUG) {\n            tsk_matvec_calculator_print_state(self, tsk_get_debug_stream());\n        }\n    }\n    if (!!(self->options & TSK_STAT_SPAN_NORMALISE)) {\n        span_normalise(self->num_windows, windows, out_size, self->result);\n    }\n\n    /* out: */\n    return ret;\n}\n\nint\ntsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes, double *result,\n    tsk_flags_t options)\n{\n    int ret = 0;\n    bool stat_site = !!(options & TSK_STAT_SITE);\n    bool stat_node = !!(options & TSK_STAT_NODE);\n    tsk_matvec_calculator_t calc;\n\n    memset(&calc, 0, sizeof(calc));\n\n    if (stat_node || stat_site) {\n        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);\n        goto out;\n    }\n    ret = tsk_treeseq_check_windows(self, num_windows, windows, 0);\n    if (ret != 0) {\n        goto out;\n    }\n\n    ret = tsk_matvec_calculator_init(&calc, self, num_weights, weights, num_windows,\n        windows, num_focal_nodes, focal_nodes, options, result);\n    if (ret != 0) {\n        goto out;\n    }\n    if (options & TSK_DEBUG) {\n        tsk_matvec_calculator_print_state(&calc, tsk_get_debug_stream());\n    }\n    ret = tsk_matvec_calculator_run(&calc);\nout:\n    tsk_matvec_calculator_free(&calc);\n    return ret;\n}\n"
  },
  {
    "path": "c/tskit/trees.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/**\n * @file trees.h\n * @brief Tskit core tree sequence operations.\n */\n#ifndef TSK_TREES_H\n#define TSK_TREES_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <tskit/tables.h>\n\n// clang-format off\n\n/*\n * These are both undocumented options for tsk_tree_init\n */\n#define TSK_SAMPLE_LISTS            (1 << 1)\n#define TSK_NO_SAMPLE_COUNTS        (1 << 2)\n\n#define TSK_STAT_SITE               (1 << 0)\n#define TSK_STAT_BRANCH             (1 << 1)\n#define TSK_STAT_NODE               (1 << 2)\n\n/* Leave room for other stat types */\n#define TSK_STAT_POLARISED               (1 << 10)\n#define TSK_STAT_SPAN_NORMALISE          (1 << 11)\n#define TSK_STAT_ALLOW_TIME_UNCALIBRATED (1 << 12)\n#define TSK_STAT_PAIR_NORMALISE          (1 << 13)\n#define TSK_STAT_NONCENTRED              (1 << 14)\n\n/* Options for map_mutations */\n#define TSK_MM_FIXED_ANCESTRAL_STATE (1 << 0)\n\n#define TSK_DIR_FORWARD 1\n#define TSK_DIR_REVERSE -1\n\n/**\n@defgroup API_FLAGS_TS_INIT_GROUP :c:func:`tsk_treeseq_init` specific flags.\n@{\n*/\n/**\nIf specified edge indexes will be built and stored in the table collection\nwhen the tree sequence is initialised. Indexes are required for a valid\ntree sequence, and are not built by default for performance reasons.\n*/\n#define TSK_TS_INIT_BUILD_INDEXES (1 << 0)\n/**\nIf specified, mutation parents in the table collection will be overwritten\nwith those computed from the topology when the tree sequence is initialised.\n*/\n#define TSK_TS_INIT_COMPUTE_MUTATION_PARENTS (1 << 1)\n/** @} */\n\n// clang-format on\n\n/**\n@brief The tree sequence object.\n*/\ntypedef struct {\n    tsk_size_t num_trees;\n    tsk_size_t num_samples;\n    tsk_id_t *samples;\n    /* Does this tree sequence have time_units == \"uncalibrated\" */\n    bool time_uncalibrated;\n    /* Are all genome coordinates discrete? */\n    bool discrete_genome;\n    /* Are all time values discrete? */\n    bool discrete_time;\n    /* Min and max time in node table and mutation table */\n    double min_time;\n    double max_time;\n    /* Breakpoints along the sequence, including 0 and L. */\n    double *breakpoints;\n    /* If a node is a sample, map to its index in the samples list */\n    tsk_id_t *sample_index_map;\n    /* Map individuals to the list of nodes that reference them */\n    tsk_id_t *individual_nodes_mem;\n    tsk_id_t **individual_nodes;\n    tsk_size_t *individual_nodes_length;\n    /* For each tree, a list of sites on that tree */\n    tsk_site_t *tree_sites_mem;\n    tsk_site_t **tree_sites;\n    tsk_size_t *tree_sites_length;\n    /* For each site, a list of mutations at that site */\n    tsk_mutation_t *site_mutations_mem;\n    tsk_mutation_t **site_mutations;\n    tsk_size_t *site_mutations_length;\n    /** @brief  The table collection underlying this tree sequence, This table\n     *  collection must be treated as read-only, and any changes to it will\n     *  lead to undefined behaviour. */\n    tsk_table_collection_t *tables;\n} tsk_treeseq_t;\n\ntypedef struct {\n    tsk_id_t index;\n    struct {\n        double left;\n        double right;\n    } interval;\n    struct {\n        tsk_id_t start;\n        tsk_id_t stop;\n        const tsk_id_t *order;\n    } in;\n    struct {\n        tsk_id_t start;\n        tsk_id_t stop;\n        const tsk_id_t *order;\n    } out;\n    tsk_id_t left_current_index;\n    tsk_id_t right_current_index;\n    int direction;\n    const tsk_treeseq_t *tree_sequence;\n} tsk_tree_position_t;\n\n/**\n@brief A single tree in a tree sequence.\n\n@rst\nA ``tsk_tree_t`` object has two basic functions:\n\n1. Represent the state of a single tree in a tree sequence;\n2. Provide methods to transform this state into different trees in the sequence.\n\nThe state of a single tree in the tree sequence is represented using the\nquintuply linked encoding: please see the\n:ref:`data model <sec_data_model_tree_structure>` section for details on\nhow this works. The left-to-right ordering of nodes in this encoding\nis arbitrary, and may change depending on the order in which trees are\naccessed within the sequence. Please see the\n:ref:`sec_c_api_examples_tree_traversals` examples for recommended\nusage.\n\nOn initialisation, a tree is in the :ref:`null state<sec_c_api_trees_null>` and\nwe must call one of the :ref:`seeking<sec_c_api_trees_seeking>` methods to make\nthe state of the tree object correspond to a particular tree in the sequence.\nPlease see the :ref:`sec_c_api_examples_tree_iteration` examples for\nrecommended usage.\n\n@endrst\n */\ntypedef struct {\n    /**\n     * @brief The parent tree sequence.\n     */\n    const tsk_treeseq_t *tree_sequence;\n    /**\n     @brief The ID of the \"virtual root\" whose children are the roots of the\n     tree.\n     */\n    tsk_id_t virtual_root;\n    /**\n     @brief The parent of node u is parent[u]. Equal to ``TSK_NULL`` if node u is\n     a root or is not a node in the current tree.\n     */\n    tsk_id_t *parent;\n    /**\n     @brief The leftmost child of node u is left_child[u]. Equal to ``TSK_NULL``\n     if node u is a leaf or is not a node in the current tree.\n     */\n    tsk_id_t *left_child;\n    /**\n     @brief The rightmost child of node u is right_child[u]. Equal to ``TSK_NULL``\n     if node u is a leaf or is not a node in the current tree.\n     */\n    tsk_id_t *right_child;\n    /**\n     @brief The sibling to the left of node u is left_sib[u]. Equal to\n     ``TSK_NULL`` if node u has no siblings to its left.\n     */\n    tsk_id_t *left_sib;\n    /**\n     @brief The sibling to the right of node u is right_sib[u]. Equal to\n     ``TSK_NULL`` if node u has no siblings to its right.\n     */\n    tsk_id_t *right_sib;\n    /**\n     @brief The number of children of node u is num_children[u].\n     */\n    tsk_id_t *num_children;\n    /**\n     @brief Array of edge ids where ``edge[u]`` is the edge that encodes the\n     relationship between the child node ``u`` and its parent. Equal to\n     ``TSK_NULL`` if node ``u`` is a root, virtual root or is not a node in the\n     current tree.\n     */\n    tsk_id_t *edge;\n    /**\n     @brief The total number of edges defining the topology of this tree.\n     This is equal to the number of tree sequence edges that intersect with\n     the tree's genomic interval.\n     */\n    tsk_size_t num_edges;\n    /**\n     @brief Left and right coordinates of the genomic interval that this\n     tree covers. The left coordinate is inclusive and the right coordinate\n     exclusive.\n\n    @rst\n\n    Example:\n\n    .. code-block:: c\n\n        tsk_tree_t tree;\n        int ret;\n        // initialise etc\n        ret = tsk_tree_first(&tree);\n        // Check for error\n        assert(ret == TSK_TREE_OK);\n        printf(\"Coordinates covered by first tree are left=%f, right=%f\\n\",\n            tree.interval.left, tree.interval.right);\n\n    @endrst\n\n     */\n    struct {\n        double left;\n        double right;\n    } interval;\n    /**\n     @brief The index of this tree in the tree sequence.\n\n     @rst\n     This attribute provides the zero-based index of the tree represented by the\n     current state of the struct within the parent tree sequence. For example,\n     immediately after we call ``tsk_tree_first(&tree)``, ``tree.index`` will\n     be zero, and after we call ``tsk_tree_last(&tree)``, ``tree.index`` will\n     be the number of trees - 1 (see :c:func:`tsk_treeseq_get_num_trees`)\n     When the tree is in the null state (immediately after initialisation,\n     or after, e.g., calling :c:func:`tsk_tree_prev` on the first tree)\n     the value of the ``index`` is -1.\n     @endrst\n     */\n    tsk_id_t index;\n    /* Attributes below are private and should not be used in client code. */\n    tsk_size_t num_nodes;\n    tsk_flags_t options;\n    tsk_size_t root_threshold;\n    const tsk_id_t *samples;\n    /*\n    These are involved in the optional sample tracking; num_samples counts\n    all samples below a give node, and num_tracked_samples counts those\n    from a specific subset. By default sample counts are tracked and roots\n    maintained. If ``TSK_NO_SAMPLE_COUNTS`` is specified, then neither sample\n    counts or roots are available.\n    */\n    tsk_size_t *num_samples;\n    tsk_size_t *num_tracked_samples;\n    /* These are for the optional sample list tracking. */\n    tsk_id_t *left_sample;\n    tsk_id_t *right_sample;\n    tsk_id_t *next_sample;\n    /* The sites on this tree */\n    const tsk_site_t *sites;\n    tsk_size_t sites_length;\n    /* Counters needed for next() and prev() transformations. */\n    int direction;\n    tsk_id_t left_index;\n    tsk_id_t right_index;\n    tsk_tree_position_t tree_pos;\n} tsk_tree_t;\n\n/****************************************************************************/\n/* Tree sequence.*/\n/****************************************************************************/\n\n/**\n@defgroup TREESEQ_API_GROUP Tree sequence API\n@{\n*/\n\n/**\n@brief Initialises the tree sequence based on the specified table collection.\n\n@rst\nThis method will copy the supplied table collection unless :c:macro:`TSK_TAKE_OWNERSHIP`\nis specified. The table collection will be checked for integrity and index maps built.\n\nThis must be called before any operations are performed on the tree sequence.\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n\nIf specified, TSK_TAKE_OWNERSHIP takes immediate ownership of the tables, regardless\nof error conditions.\n\n**Options**\n\n- :c:macro:`TSK_TS_INIT_BUILD_INDEXES`\n- :c:macro:`TSK_TAKE_OWNERSHIP` (applies to the table collection).\n@endrst\n\n@param self A pointer to an uninitialised tsk_table_collection_t object.\n@param tables A pointer to a tsk_table_collection_t object.\n@param options Allocation time options. See above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_init(\n    tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options);\n\n/**\n@brief Load a tree sequence from a file path.\n\n@rst\nLoads the data from the specified file into this tree sequence.\nThe tree sequence is also initialised.\nThe resources allocated must be freed using\n:c:func:`tsk_treeseq_free` even in error conditions.\n\nWorks similarly to :c:func:`tsk_table_collection_load` please see\nthat function's documentation for details and options.\n\n**Examples**\n\n.. code-block:: c\n\n    int ret;\n    tsk_treeseq_t ts;\n    ret = tsk_treeseq_load(&ts, \"data.trees\", 0);\n    if (ret != 0) {\n        fprintf(stderr, \"Load error:%s\\n\", tsk_strerror(ret));\n        exit(EXIT_FAILURE);\n    }\n\n@endrst\n\n@param self A pointer to an uninitialised tsk_treeseq_t object\n@param filename A NULL terminated string containing the filename.\n@param options Bitwise options. See above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_load(tsk_treeseq_t *self, const char *filename, tsk_flags_t options);\n\n/**\n@brief Load a tree sequence from a stream.\n\n@rst\nLoads a tree sequence from the specified file stream. The tree sequence\nis also initialised. The resources allocated must be freed using\n:c:func:`tsk_treeseq_free` even in error conditions.\n\nWorks similarly to :c:func:`tsk_table_collection_loadf` please\nsee that function's documentation for details and options.\n\n@endrst\n\n@param self A pointer to an uninitialised tsk_treeseq_t object.\n@param file A FILE stream opened in an appropriate mode for reading (e.g.\n    \"r\", \"r+\" or \"w+\") positioned at the beginning of a tree sequence\n    definition.\n@param options Bitwise options. See above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_loadf(tsk_treeseq_t *self, FILE *file, tsk_flags_t options);\n\n/**\n@brief Write a tree sequence to file.\n\n@rst\nWrites the data from this tree sequence to the specified file.\n\nIf an error occurs the file path is deleted, ensuring that only complete\nand well formed files will be written.\n@endrst\n\n@param self A pointer to an initialised tsk_treeseq_t object.\n@param filename A NULL terminated string containing the filename.\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_dump(\n    const tsk_treeseq_t *self, const char *filename, tsk_flags_t options);\n\n/**\n@brief Write a tree sequence to a stream.\n\n@rst\nWrites the data from this tree sequence to the specified FILE stream.\nSemantics are identical to :c:func:`tsk_treeseq_dump`.\n\nPlease see the :ref:`sec_c_api_examples_file_streaming` section for an example\nof how to sequentially dump and load tree sequences from a stream.\n@endrst\n\n@param self A pointer to an initialised tsk_treeseq_t object.\n@param file A FILE stream opened in an appropriate mode for writing (e.g.\n    \"w\", \"a\", \"r+\" or \"w+\").\n@param options Bitwise options. Currently unused; should be\n    set to zero to ensure compatibility with later versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_dumpf(const tsk_treeseq_t *self, FILE *file, tsk_flags_t options);\n\n/**\n@brief Copies the state of the table collection underlying this tree sequence\ninto the specified destination table collection.\n\n@rst\nBy default the method initialises the specified destination table collection. If the\ndestination is already initialised, the :c:macro:`TSK_NO_INIT` option should\nbe supplied to avoid leaking memory.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@param tables A pointer to a tsk_table_collection_t object. If the TSK_NO_INIT\noption is specified, this must be an initialised table collection. If not, it must be an\nuninitialised table collection.\n@param options Bitwise option flags.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_copy_tables(\n    const tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified tree sequence.\n\n@param self A pointer to an initialised tsk_treeseq_t object.\n@return Always returns 0.\n*/\nint tsk_treeseq_free(tsk_treeseq_t *self);\n\n/**\n@brief Print out the state of this tree sequence to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_treeseq_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_treeseq_print_state(const tsk_treeseq_t *self, FILE *out);\n\n/**\n@brief Get the number of nodes\n\n@rst\nReturns the number of nodes in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of nodes.\n*/\ntsk_size_t tsk_treeseq_get_num_nodes(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of edges\n\n@rst\nReturns the number of edges in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of edges.\n*/\n\ntsk_size_t tsk_treeseq_get_num_edges(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of migrations\n\n@rst\nReturns the number of migrations in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of migrations.\n*/\ntsk_size_t tsk_treeseq_get_num_migrations(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of sites\n\n@rst\nReturns the number of sites in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of sites.\n*/\ntsk_size_t tsk_treeseq_get_num_sites(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of mutations\n\n@rst\nReturns the number of mutations in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of mutations.\n*/\ntsk_size_t tsk_treeseq_get_num_mutations(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of provenances\n\n@rst\nReturns the number of provenances in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of provenances.\n*/\ntsk_size_t tsk_treeseq_get_num_provenances(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of populations\n\n@rst\nReturns the number of populations in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of populations.\n*/\ntsk_size_t tsk_treeseq_get_num_populations(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of individuals\n\n@rst\nReturns the number of individuals in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of individuals.\n*/\ntsk_size_t tsk_treeseq_get_num_individuals(const tsk_treeseq_t *self);\n\n/**\n@brief Return the number of trees in this tree sequence.\n\n@rst\nThis is a constant time operation.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return The number of trees in the tree sequence.\n*/\ntsk_size_t tsk_treeseq_get_num_trees(const tsk_treeseq_t *self);\n\n/**\n@brief Get the number of samples\n\n@rst\nReturns the number of nodes marked as samples in this tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the number of samples.\n*/\ntsk_size_t tsk_treeseq_get_num_samples(const tsk_treeseq_t *self);\n\n/**\n@brief Get the top-level tree sequence metadata.\n\n@rst\nReturns a pointer to the metadata string, which is owned by the tree sequence and\nnot null-terminated.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns a pointer to the metadata.\n*/\nconst char *tsk_treeseq_get_metadata(const tsk_treeseq_t *self);\n\n/**\n@brief Get the length of top-level tree sequence metadata\n\n@rst\nReturns the length of the metadata string.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the length of the metadata.\n*/\ntsk_size_t tsk_treeseq_get_metadata_length(const tsk_treeseq_t *self);\n\n/**\n@brief Get the top-level tree sequence metadata schema.\n\n@rst\nReturns a pointer to the metadata schema string, which is owned by the tree sequence and\nnot null-terminated.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns a pointer to the metadata schema.\n*/\nconst char *tsk_treeseq_get_metadata_schema(const tsk_treeseq_t *self);\n\n/**\n@brief Get the length of the top-level tree sequence metadata schema.\n\n@rst\nReturns the length of the metadata schema string.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the length of the metadata schema.\n*/\ntsk_size_t tsk_treeseq_get_metadata_schema_length(const tsk_treeseq_t *self);\n\n/**\n@brief Get the time units string\n\n@rst\nReturns a pointer to the time units string, which is owned by the tree sequence and\nnot null-terminated.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns a pointer to the time units.\n*/\nconst char *tsk_treeseq_get_time_units(const tsk_treeseq_t *self);\n\n/**\n@brief Get the length of time units string\n@rst\nReturns the length of the time units string.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the length of the time units.\n*/\ntsk_size_t tsk_treeseq_get_time_units_length(const tsk_treeseq_t *self);\n\n/**\n@brief Get the file uuid\n\n@rst\nReturns a pointer to the null-terminated file uuid string, which is owned by the tree\nsequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns a pointer to the null-terminated file uuid.\n*/\nconst char *tsk_treeseq_get_file_uuid(const tsk_treeseq_t *self);\n\n/**\n@brief Get the sequence length\n\n@rst\nReturns the sequence length of this tree sequence\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the sequence length.\n*/\ndouble tsk_treeseq_get_sequence_length(const tsk_treeseq_t *self);\n\n/**\n@brief Get the breakpoints\n\n@rst\nReturns an array of breakpoint locations, the array is owned by the tree sequence.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the pointer to the breakpoint array.\n*/\nconst double *tsk_treeseq_get_breakpoints(const tsk_treeseq_t *self);\n\n/**\n@brief Get the samples\n\n@rst\nReturns an array of ids of sample nodes in this tree sequence.\nI.e. nodes that have the :c:macro:`TSK_NODE_IS_SAMPLE` flag set.\nThe array is owned by the tree sequence and should not be modified or free'd.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the pointer to the sample node id array.\n*/\nconst tsk_id_t *tsk_treeseq_get_samples(const tsk_treeseq_t *self);\n\n/**\n@brief Get the map of node id to sample index\n\n@rst\nReturns the location of each node in the list of samples or\n:c:macro:`TSK_NULL` for nodes that are not samples.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the pointer to the array of sample indexes.\n*/\nconst tsk_id_t *tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self);\n\n/**\n@brief Check if a node is a sample\n\n@rst\nReturns the sample status of a given node id.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param u The id of the node to be checked.\n@return Returns true if the node is a sample.\n*/\nbool tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u);\n\n/**\n@brief Get the discrete genome status\n\n@rst\nIf all the genomic locations in the tree sequence are discrete integer values\nthen this flag will be true.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns true if all genomic locations are discrete.\n*/\nbool tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self);\n\n/**\n@brief Get the discrete time status\n\n@rst\nIf all times in the tree sequence are discrete integer values\nthen this flag will be true\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns true if all times are discrete.\n*/\nbool tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self);\n\n/**\n@brief Get the min time in node table and mutation table\n\n@rst\nThe times stored in both the node and mutation tables are considered.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the min time of all nodes and mutations.\n*/\ndouble tsk_treeseq_get_min_time(const tsk_treeseq_t *self);\n\n/**\n@brief Get the max time in node table and mutation table\n\n@rst\nThe times stored in both the node and mutation tables are considered.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@return Returns the max time of all nodes and mutations.\n*/\ndouble tsk_treeseq_get_max_time(const tsk_treeseq_t *self);\n\n/**\n@brief Get a node by its index\n\n@rst\nCopies a node from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The node index to copy\n@param node A pointer to a tsk_node_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node);\n\n/**\n@brief Get a edge by its index\n\n@rst\nCopies a edge from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The edge index to copy\n@param edge A pointer to a tsk_edge_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge);\n\n/**\n@brief Get a edge by its index\n\n@rst\nCopies a migration from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The migration index to copy\n@param migration A pointer to a tsk_migration_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_migration(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_migration_t *migration);\n\n/**\n@brief Get a site by its index\n\n@rst\nCopies a site from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The site index to copy\n@param site A pointer to a tsk_site_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_site(const tsk_treeseq_t *self, tsk_id_t index, tsk_site_t *site);\n\n/**\n@brief Get a mutation by its index\n\n@rst\nCopies a mutation from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The mutation index to copy\n@param mutation A pointer to a tsk_mutation_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_mutation(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_mutation_t *mutation);\n\n/**\n@brief Get a provenance by its index\n\n@rst\nCopies a provenance from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The provenance index to copy\n@param provenance A pointer to a tsk_provenance_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_provenance(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_provenance_t *provenance);\n\n/**\n@brief Get a population by its index\n\n@rst\nCopies a population from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The population index to copy\n@param population A pointer to a tsk_population_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_population(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_population_t *population);\n\n/**\n@brief Get a individual by its index\n\n@rst\nCopies a individual from this tree sequence to the specified destination.\n@endrst\n@param self A pointer to a tsk_treeseq_t object.\n@param index The individual index to copy\n@param individual A pointer to a tsk_individual_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_get_individual(\n    const tsk_treeseq_t *self, tsk_id_t index, tsk_individual_t *individual);\n\n/**\n@brief Create a simplified instance of this tree sequence\n\n@rst\nCopies this tree sequence to the specified destination and performs simplification.\nThe destination tree sequence should be uninitialised.\nSimplification transforms the tables to remove redundancy and canonicalise\ntree sequence data. See the :ref:`simplification <sec_simplification>` tutorial for\nmore details.\n\nFor full details and flags see :c:func:`tsk_table_collection_simplify` which performs\nthe same operation in place.\n\n@endrst\n@param self A pointer to a uninitialised tsk_treeseq_t object.\n@param samples Either NULL or an array of num_samples distinct and valid node IDs.\n    If non-null the nodes in this array will be marked as samples in the output.\n    If NULL, the num_samples parameter is ignored and the samples in the output\n    will be the same as the samples in the input. This is equivalent to populating\n    the samples array with all of the sample nodes in the input in increasing\n    order of ID.\n@param num_samples The number of node IDs in the input samples array. Ignored\n    if the samples array is NULL.\n@param options Simplify options; see above for the available bitwise flags.\n    For the default behaviour, a value of 0 should be provided.\n@param output A pointer to an uninitialised tsk_treeseq_t object.\n@param node_map If not NULL, this array will be filled to define the mapping\n    between nodes IDs in the table collection before and after simplification.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_simplify(const tsk_treeseq_t *self, const tsk_id_t *samples,\n    tsk_size_t num_samples, tsk_flags_t options, tsk_treeseq_t *output,\n    tsk_id_t *node_map);\n\n/**\n@brief Extends haplotypes\n\nReturns a new tree sequence in which the span covered by ancestral nodes\nis \"extended\" to regions of the genome according to the following rule:\nIf an ancestral segment corresponding to node `n` has ancestor `p` and\ndescendant `c` on some portion of the genome, and on an adjacent segment of\ngenome `p` is still an ancestor of `c`, then `n` is inserted into the\npath from `p` to `c`. For instance, if `p` is the parent of `n` and `n`\nis the parent of `c`, then the span of the edges from `p` to `n` and\n`n` to `c` are extended, and the span of the edge from `p` to `c` is\nreduced. However, any edges whose child node is a sample are not\nmodified.  See Fritze et al. (2025):\nhttps://doi.org/10.1093/genetics/iyaf198 for more details.\n\nThe method works by iterating over the genome to look for edges that can\nbe extended in this way; the maximum number of such iterations is\ncontrolled by ``max_iter``.\n\nThe `node` of certain mutations may also be remapped; to do this\nunambiguously we need to know mutation times. If mutations times are unknown,\nuse `tsk_table_collection_compute_mutation_times` first.\n\nThe method will not affect any tables except the edge table, or the node\ncolumn in the mutation table.\n\n@rst\n\n**Options**: None currently defined.\n@endrst\n\n@param self A pointer to a tsk_treeseq_t object.\n@param max_iter The maximum number of iterations over the tree sequence.\n@param options Bitwise option flags. (UNUSED)\n@param output A pointer to an uninitialised tsk_treeseq_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_extend_haplotypes(\n    const tsk_treeseq_t *self, int max_iter, tsk_flags_t options, tsk_treeseq_t *output);\n\n/** @} */\n\nint tsk_treeseq_split_edges(const tsk_treeseq_t *self, double time, tsk_flags_t flags,\n    tsk_id_t population, const char *metadata, tsk_size_t metadata_length,\n    tsk_flags_t options, tsk_treeseq_t *output);\n\nbool tsk_treeseq_has_reference_sequence(const tsk_treeseq_t *self);\n\n/**\n@brief Decode full-length alignments for specified nodes over an interval.\n\n@rst\nFills a caller-provided buffer with per-node sequence alignments for the interval\n``[left, right)``. Each row is exactly ``L = right - left`` bytes with no trailing\nterminator, and rows are tightly packed in row-major order in the output buffer.\n\nThe output at non-site positions comes from the provided ``ref_seq`` slice\n(``ref_seq[left:right]``); per-site alleles are overlaid onto this for each node.\n\nIf the :c:macro:`TSK_ISOLATED_NOT_MISSING` option is\nnot set, nodes that are isolated (no parent and no children) within a tree\ninterval in ``[left, right)`` are rendered as the ``missing_data_character`` for\nthat interval. At site positions, decoded genotypes override any previous value;\nif a genotype is missing (``TSK_MISSING_DATA``), the ``missing_data_character`` is\noverlaid onto the reference base.\n\nRequirements and validation:\n\n- The tree sequence must have a discrete genome.\n- ``left`` and ``right`` must be integers with ``0 <= left < right <= sequence_length``.\n- ``ref_seq`` must be non-NULL and ``ref_seq_length == sequence_length``.\n- Each allele at a site must be exactly one byte; alleles equal to\n  ``missing_data_character`` are not permitted.\n\n@endrst\n\n@param self A pointer to a :c:type:`tsk_treeseq_t` object.\n@param ref_seq Pointer to a reference sequence buffer of length ``ref_seq_length``.\n@param ref_seq_length The total length of ``ref_seq``; must equal the tree sequence\nlength.\n@param nodes Array of node IDs to decode (may include non-samples).\n@param num_nodes The number of nodes in ``nodes`` and rows in the output.\n@param left The inclusive-left genomic coordinate of the output interval.\n@param right The exclusive-right genomic coordinate of the output interval.\n@param missing_data_character The byte to use for missing data.\n@param alignments_out Output buffer of size at least ``num_nodes * (right - left)``.\n@param options Bitwise option flags; supports :c:macro:`TSK_ISOLATED_NOT_MISSING`.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_treeseq_decode_alignments(const tsk_treeseq_t *self, const char *ref_seq,\n    tsk_size_t ref_seq_length, const tsk_id_t *nodes, tsk_size_t num_nodes, double left,\n    double right, char missing_data_character, char *alignments_out,\n    tsk_flags_t options);\n\nint tsk_treeseq_get_individuals_population(const tsk_treeseq_t *self, tsk_id_t *output);\nint tsk_treeseq_get_individuals_time(const tsk_treeseq_t *self, double *output);\n\nint tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,\n    double lambda_, double *result);\n\nint tsk_treeseq_genealogical_nearest_neighbours(const tsk_treeseq_t *self,\n    const tsk_id_t *focal, tsk_size_t num_focal, const tsk_id_t *const *reference_sets,\n    const tsk_size_t *reference_set_size, tsk_size_t num_reference_sets,\n    tsk_flags_t options, double *ret_array);\nint tsk_treeseq_mean_descendants(const tsk_treeseq_t *self,\n    const tsk_id_t *const *reference_sets, const tsk_size_t *reference_set_size,\n    tsk_size_t num_reference_sets, tsk_flags_t options, double *ret_array);\n\ntypedef int general_stat_func_t(tsk_size_t state_dim, const double *state,\n    tsk_size_t result_dim, double *result, void *params);\n\nint tsk_treeseq_general_stat(const tsk_treeseq_t *self, tsk_size_t K, const double *W,\n    tsk_size_t M, general_stat_func_t *f, void *f_params, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\n\ntypedef int norm_func_t(tsk_size_t result_dim, const double *hap_weights, tsk_size_t n_a,\n    tsk_size_t n_b, double *result, void *params);\n\nint tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t result_dim, const tsk_id_t *set_indexes,\n    general_stat_func_t *f, norm_func_t *norm_f, tsk_size_t out_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t out_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\n\n/* One way weighted stats */\n\ntypedef int one_way_weighted_method(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result);\n\nint tsk_treeseq_trait_covariance(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result);\nint tsk_treeseq_trait_correlation(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result);\n\n/* One way weighted stats with covariates */\n\ntypedef int one_way_covariates_method(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_covariates, const double *covariates,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\n\nint tsk_treeseq_trait_linear_model(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_covariates, const double *covariates,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\n\n/* Two way weighted stats with covariates */\n\ntypedef int two_way_weighted_method(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_index_tuples, const tsk_id_t *index_tuples,\n    tsk_size_t num_windows, const double *windows, double *result, tsk_flags_t options);\n\nint tsk_treeseq_genetic_relatedness_weighted(const tsk_treeseq_t *self,\n    tsk_size_t num_weights, const double *weights, tsk_size_t num_index_tuples,\n    const tsk_id_t *index_tuples, tsk_size_t num_windows, const double *windows,\n    double *result, tsk_flags_t options);\n\n/* One way weighted stats with vector output */\n\ntypedef int weighted_vector_method(const tsk_treeseq_t *self, tsk_size_t num_weights,\n    const double *weights, tsk_size_t num_windows, const double *windows,\n    tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes, double *result,\n    tsk_flags_t options);\n\nint tsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self,\n    tsk_size_t num_weights, const double *weights, tsk_size_t num_windows,\n    const double *windows, tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes,\n    double *result, tsk_flags_t options);\n\n/* One way sample set stats */\n\ntypedef int one_way_sample_stat_method(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result);\n\nint tsk_treeseq_diversity(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_segregating_sites(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_Y1(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,\n    tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,\n    double *result);\n\ntypedef int general_sample_stat_method(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_indexes, const tsk_id_t *indexes,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\n\ntypedef int two_locus_count_stat_method(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_rows, const tsk_id_t *row_sites,\n    const double *row_positions, tsk_size_t num_cols, const tsk_id_t *col_sites,\n    const double *col_positions, tsk_flags_t options, double *result);\n\nint tsk_treeseq_D(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_D2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_r2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_D_prime(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_r(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_Dz(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_pi2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_D2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_Dz_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_pi2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\n\ntypedef int k_way_two_locus_count_stat_method(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_index_tuples,\n    const tsk_id_t *index_tuples, tsk_size_t num_rows, const tsk_id_t *row_sites,\n    const double *row_positions, tsk_size_t num_cols, const tsk_id_t *col_sites,\n    const double *col_positions, tsk_flags_t options, double *result);\n\n/* Two way sample set stats */\n\nint tsk_treeseq_divergence(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_Y2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_genetic_relatedness(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_index_tuples,\n    const tsk_id_t *index_tuples, tsk_size_t num_windows, const double *windows,\n    tsk_flags_t options, double *result);\nint tsk_treeseq_D2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_D2_ij_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_r2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,\n    const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,\n    const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,\n    double *result);\n\n/* Three way sample set stats */\nint tsk_treeseq_Y3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\nint tsk_treeseq_f3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\n\n/* Four way sample set stats */\nint tsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,\n    const double *windows, tsk_flags_t options, double *result);\n\nint tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,\n    const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,\n    tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);\n\n/* Coalescence rates */\ntypedef int pair_coalescence_stat_func_t(tsk_size_t input_dim, const double *atoms,\n    const double *weights, tsk_size_t result_dim, double *result, void *params);\nint tsk_treeseq_pair_coalescence_stat(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,\n    const tsk_id_t *node_bin_map, pair_coalescence_stat_func_t *summary_func,\n    tsk_size_t summary_func_dim, void *summary_func_args, tsk_flags_t options,\n    double *result);\nint tsk_treeseq_pair_coalescence_counts(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,\n    const tsk_id_t *node_bin_map, tsk_flags_t options, double *result);\nint tsk_treeseq_pair_coalescence_quantiles(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,\n    const tsk_id_t *node_bin_map, tsk_size_t num_quantiles, double *quantiles,\n    tsk_flags_t options, double *result);\nint tsk_treeseq_pair_coalescence_rates(const tsk_treeseq_t *self,\n    tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,\n    const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,\n    tsk_size_t num_windows, const double *windows, tsk_size_t num_time_windows,\n    const tsk_id_t *node_time_window, double *time_windows, tsk_flags_t options,\n    double *result);\n\n/****************************************************************************/\n/* Tree */\n/****************************************************************************/\n\n/**\n@defgroup TREE_API_LIFECYCLE_GROUP Tree lifecycle\n@{\n*/\n\n/**\n@brief Initialises the tree by allocating internal memory and associating\n    with the specified tree sequence.\n\n@rst\nThis must be called before any operations are performed on the tree.\n\nThe specified tree sequence object must be initialised, and must be\nvalid for the full lifetime of this tree.\n\nSee the :ref:`sec_c_api_overview_structure` for details on how objects\nare initialised and freed.\n\nThe ``options`` parameter is provided to support future expansions\nof the API. A number of undocumented internal features are controlled\nvia this parameter, and it **must** be set to 0 to ensure that operations\nwork as expected and for compatibility with future versions of tskit.\n@endrst\n\n@param self A pointer to an uninitialised tsk_tree_t object.\n@param tree_sequence A pointer to an initialised tsk_treeseq_t object.\n@param options Allocation time options. Must be 0, or behaviour is undefined.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_tree_init(\n    tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options);\n\n/**\n@brief Free the internal memory for the specified tree.\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Always returns 0.\n*/\nint tsk_tree_free(tsk_tree_t *self);\n\n/**\n@brief Copies the state of this tree into the specified destination.\n\n@rst\nBy default (``options`` = 0) the method initialises the specified destination\ntree by calling :c:func:`tsk_tree_init`. If the destination is already\ninitialised, the :c:macro:`TSK_NO_INIT` option should be supplied to avoid\nleaking memory. If :c:macro:`TSK_NO_INIT` is supplied and the tree sequence associated\nwith the ``dest`` tree is not equal to the tree sequence associated\nwith ``self``, an error is raised.\n\nThe destination tree will keep a reference to the tree sequence object\nassociated with the source tree, and this tree sequence must be\nvalid for the full lifetime of the destination tree.\n\n**Options**\n\n- :c:macro:`TSK_NO_INIT`\n\nIf :c:macro:`TSK_NO_INIT` is not specified, options for :c:func:`tsk_tree_init`\ncan be provided and will be passed on.\n\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@param dest A pointer to a tsk_tree_t object. If the TSK_NO_INIT option\n    is specified, this must be an initialised tree. If not, it must\n    be an uninitialised tree.\n@param options Copy and allocation time options. See the notes above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options);\n\n/** @} */\n\n/**\n@defgroup TREE_API_SEEKING_GROUP Seeking along the sequence\n@{\n*/\n\n/** @brief Option to seek by skipping to the target tree, adding and removing as few\n   edges as possible. If not specified, a linear time algorithm is used instead.\n\n    @ingroup TREE_API_SEEKING_GROUP\n*/\n#define TSK_SEEK_SKIP (1 << 0)\n\n/**\n@brief Seek to the first tree in the sequence.\n\n@rst\nSet the state of this tree to reflect the first tree in parent\ntree sequence.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Return TSK_TREE_OK on success; or a negative value if an error occurs.\n*/\nint tsk_tree_first(tsk_tree_t *self);\n\n/**\n@brief Seek to the last tree in the sequence.\n\n@rst\nSet the state of this tree to reflect the last tree in parent\ntree sequence.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Return TSK_TREE_OK on success; or a negative value if an error occurs.\n*/\nint tsk_tree_last(tsk_tree_t *self);\n\n/**\n@brief Seek to the next tree in the sequence.\n\n@rst\nSet the state of this tree to reflect the next tree in parent\ntree sequence. If the index of the current tree is ``j``,\nthen the after this operation the index will be ``j + 1``.\n\nCalling :c:func:`tsk_tree_next` a tree in the\n:ref:`null state<sec_c_api_trees_null>` is equivalent to calling\n:c:func:`tsk_tree_first`.\n\nCalling :c:func:`tsk_tree_next` on the last tree in the\nsequence will transform it into the\n:ref:`null state<sec_c_api_trees_null>` (equivalent to\ncalling :c:func:`tsk_tree_clear`).\n\nPlease see the :ref:`sec_c_api_examples_tree_iteration` examples for\nrecommended usage.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Return TSK_TREE_OK on successfully transforming to a\nnon-null tree; 0 on successfully transforming into the null\ntree; or a negative value if an error occurs.\n*/\nint tsk_tree_next(tsk_tree_t *self);\n\n/**\n@brief Seek to the previous tree in the sequence.\n\n@rst\nSet the state of this tree to reflect the previous tree in parent\ntree sequence. If the index of the current tree is ``j``,\nthen the after this operation the index will be ``j - 1``.\n\nCalling :c:func:`tsk_tree_prev` a tree in the\n:ref:`null state<sec_c_api_trees_null>` is equivalent to calling\n:c:func:`tsk_tree_last`.\n\nCalling :c:func:`tsk_tree_prev` on the first tree in the\nsequence will transform it into the\n:ref:`null state<sec_c_api_trees_null>` (equivalent to\ncalling :c:func:`tsk_tree_clear`).\n\nPlease see the :ref:`sec_c_api_examples_tree_iteration` examples for\nrecommended usage.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Return TSK_TREE_OK on successfully transforming to a\nnon-null tree; 0 on successfully transforming into the null\ntree; or a negative value if an error occurs.\n*/\nint tsk_tree_prev(tsk_tree_t *self);\n\n/**\n@brief Set the tree into the null state.\n\n@rst\nTransform this tree into the :ref:`null state<sec_c_api_trees_null>`.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_tree_clear(tsk_tree_t *self);\n\n/**\n@brief Seek to a particular position on the genome.\n\n@rst\nSet the state of this tree to reflect the tree in parent\ntree sequence covering the specified ``position``. That is, on success\nwe will have ``tree.interval.left <= position`` and\nwe will have ``position < tree.interval.right``.\n\nSeeking to a position currently covered by the tree is\na constant time operation.\n\nSeeking to a position from a non-null tree uses a linear time\nalgorithm by default, unless the option :c:macro:`TSK_SEEK_SKIP`\nis specified. In this case, a faster algorithm is employed which skips\nto the target tree by removing and adding the minimal number of edges\npossible. However, this approach does not guarantee that edges are\ninserted and removed in time-sorted order.\n\n.. warning:: Using the :c:macro:`TSK_SEEK_SKIP` option\n    may lead to edges not being inserted or removed in time-sorted order.\n\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@param position The position in genome coordinates\n@param options Seek options. See the notes above for details.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_tree_seek(tsk_tree_t *self, double position, tsk_flags_t options);\n\n/**\n@brief Seek to a specific tree in a tree sequence.\n\n@rst\nSet the state of this tree to reflect the tree in parent\ntree sequence whose index is ``0 <= tree < num_trees``.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@param tree The target tree index.\n@param options Seek options. Currently unused. Set to 0 for compatibility\n    with future versions of tskit.\n@return Return 0 on success or a negative value on failure.\n*/\nint tsk_tree_seek_index(tsk_tree_t *self, tsk_id_t tree, tsk_flags_t options);\n\n/** @} */\n\n/**\n@defgroup TREE_API_TREE_QUERY_GROUP Tree Queries\n@{\n*/\n\n/**\n@brief Returns the number of roots in this tree.\n\n@rst\nSee the :ref:`sec_data_model_tree_roots` section for more information\non how the roots of a tree are defined.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Returns the number roots in this tree.\n*/\ntsk_size_t tsk_tree_get_num_roots(const tsk_tree_t *self);\n\n/**\n@brief Returns the leftmost root in this tree.\n\n@rst\nSee the :ref:`sec_data_model_tree_roots` section for more information\non how the roots of a tree are defined.\n\nThis function is equivalent to ``tree.left_child[tree.virtual_root]``.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Returns the leftmost root in the tree.\n*/\ntsk_id_t tsk_tree_get_left_root(const tsk_tree_t *self);\n\n/**\n@brief Returns the rightmost root in this tree.\n\n@rst\nSee the :ref:`sec_data_model_tree_roots` section for more information\non how the roots of a tree are defined.\n\nThis function is equivalent to ``tree.right_child[tree.virtual_root]``.\n@endrst\n\n@param self A pointer to an initialised tsk_tree_t object.\n@return Returns the rightmost root in the tree.\n*/\ntsk_id_t tsk_tree_get_right_root(const tsk_tree_t *self);\n\n/**\n@brief Get the list of sites for this tree.\n\n@rst\nGets the list of :c:data:`tsk_site_t` objects in the parent tree sequence\nfor which the position lies within this tree's genomic interval.\n\nThe memory pointed to by the ``sites`` parameter is managed by the\n``tsk_tree_t`` object and must not be altered or freed by client code.\n\n.. code-block:: c\n\n    static void\n    print_sites(const tsk_tree_t *tree)\n    {\n        int ret;\n        tsk_size_t j, num_sites;\n        const tsk_site_t *sites;\n\n        ret = tsk_tree_get_sites(tree, &sites, &num_sites);\n        check_tsk_error(ret);\n        for (j = 0; j < num_sites; j++) {\n            printf(\"position = %f\\n\", sites[j].position);\n        }\n    }\n\nThis is a constant time operation.\n\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param sites The destination pointer for the list of sites.\n@param sites_length A pointer to a tsk_size_t value in which the number\n    of sites is stored.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_sites(\n    const tsk_tree_t *self, const tsk_site_t **sites, tsk_size_t *sites_length);\n\n/**\n@brief Return an upper bound on the number of nodes reachable\n    from the roots of this tree.\n\n@rst\nThis function provides an upper bound on the number of nodes that\ncan be reached in tree traversals, and is intended to be used\nfor memory allocation purposes. If ``num_nodes`` is the number\nof nodes visited in a tree traversal from the\n:ref:`virtual root<sec_data_model_tree_roots>`\n(e.g., ``tsk_tree_preorder_from(tree, tree->virtual_root, nodes,\n&num_nodes)``), the bound ``N`` returned here is guaranteed to\nbe greater than or equal to ``num_nodes``.\n\n.. warning:: The precise value returned is not defined and should\n    not be depended on, as it may change from version-to-version.\n\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@return An upper bound on the number nodes reachable from the roots\n    of this tree, or zero if this tree has not been initialised.\n*/\ntsk_size_t tsk_tree_get_size_bound(const tsk_tree_t *self);\n\n/**\n@brief Print out the state of this tree to the specified stream.\n\nThis method is intended for debugging purposes and should not be used\nin production code. The format of the output should **not** be depended\non and may change arbitrarily between versions.\n\n@param self A pointer to a tsk_tree_t object.\n@param out The stream to write the summary to.\n*/\nvoid tsk_tree_print_state(const tsk_tree_t *self, FILE *out);\n\n/** @} */\n\n/**\n@defgroup TREE_API_NODE_QUERY_GROUP Node Queries\n@{\n*/\n\n/**\n@brief Returns the parent of the specified node.\n\n@rst\nEquivalent to ``tree.parent[u]`` with bounds checking for the node u.\nPerformance sensitive code which can guarantee that the node u is\nvalid should use the direct array access in preference to this method.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The tree node.\n@param parent A tsk_id_t pointer to store the returned parent node.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_parent(const tsk_tree_t *self, tsk_id_t u, tsk_id_t *parent);\n\n/**\n@brief Returns the time of the specified node.\n\n@rst\nEquivalent to ``tables->nodes.time[u]`` with bounds checking for the node u.\nPerformance sensitive code which can guarantee that the node u is\nvalid should use the direct array access in preference to this method,\nfor example:\n\n.. code-block:: c\n\n    static void\n    print_times(const tsk_tree_t *tree)\n    {\n        int ret;\n        tsk_size_t num_nodes, j;\n        const double *node_time = tree->tree_sequence->tables->nodes.time;\n        tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));\n\n        if (nodes == NULL) {\n            errx(EXIT_FAILURE, \"Out of memory\");\n        }\n        ret = tsk_tree_preorder(tree, nodes, &num_nodes);\n        check_tsk_error(ret);\n        for (j = 0; j < num_nodes; j++) {\n            printf(\"time = %f\\n\", node_time[nodes[j]]);\n        }\n        free(nodes);\n    }\n\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The tree node.\n@param ret_time A double pointer to store the returned node time.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_time(const tsk_tree_t *self, tsk_id_t u, double *ret_time);\n\n/**\n@brief Return number of nodes on the path from the specified node to root.\n\n@rst\nReturn the number of nodes on the path from u to root, not including u.\nThe depth of a root is therefore zero.\n\nAs a special case, the depth of the\n:ref:`virtual root <sec_data_model_tree_roots>` is defined as -1.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The tree node.\n@param ret_depth An int pointer to store the returned node depth.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_depth(const tsk_tree_t *self, tsk_id_t u, int *ret_depth);\n\n/**\n@brief Return the length of the branch ancestral to the specified node.\n\n@rst\nReturn the length of the branch ancestral to the specified node.\nBranch length is defined as difference between the time\nof a node and its parent. The branch length of a root is zero.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The tree node.\n@param ret_branch_length A double pointer to store the returned branch length.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_branch_length(\n    const tsk_tree_t *self, tsk_id_t u, double *ret_branch_length);\n\n/**\n@brief Computes the sum of the lengths of all branches reachable from\n    the specified node, or from all roots if ``u=TSK_NULL``.\n\n@rst\nReturn the total branch length in a particular subtree or of the\nentire tree. If the specified node is :c:macro:`TSK_NULL` (or the\n:ref:`virtual root<sec_data_model_tree_roots>`)\nthe sum of the lengths of all branches reachable from roots\nis returned. Branch length is defined as difference between the time\nof a node and its parent. The branch length of a root is zero.\n\nNote that if the specified node is internal its branch length is\n*not* included, so that, e.g., the total branch length of a\nleaf node is zero.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The root of the subtree of interest, or ``TSK_NULL`` to return the\n    total branch length of the tree.\n@param ret_tbl A double pointer to store the returned total branch length.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_total_branch_length(\n    const tsk_tree_t *self, tsk_id_t u, double *ret_tbl);\n\n/**\n@brief Counts the number of samples in the subtree rooted at a node.\n\n@rst\nReturns the number of samples descending from a particular node,\nincluding the node itself.\n\nThis is a constant time operation.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The tree node.\n@param ret_num_samples A tsk_size_t pointer to store the returned\n    number of samples.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_num_samples(\n    const tsk_tree_t *self, tsk_id_t u, tsk_size_t *ret_num_samples);\n\n/**\n@brief Compute the most recent common ancestor of two nodes.\n\n@rst\nIf two nodes do not share a common ancestor in the current tree, the MRCA\nnode is :c:macro:`TSK_NULL`.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u A tree node.\n@param v A tree node.\n@param mrca A tsk_id_t pointer to store the returned most recent common ancestor node.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_get_mrca(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v, tsk_id_t *mrca);\n\n/**\n@brief Returns true if u is a descendant of v.\n\n@rst\nReturns true if u and v are both valid nodes in the tree sequence\nand v lies on the path from u to root, and false otherwise.\n\nAny node is a descendant of itself.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param u The descendant node.\n@param v The ancestral node.\n@return true if u is a descendant of v, and false otherwise.\n*/\nbool tsk_tree_is_descendant(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v);\n\n/** @} */\n\n/**\n@defgroup TREE_API_TRAVERSAL_GROUP Traversal orders.\n@{\n*/\n\n/**\n@brief Fill an array with the nodes of this tree in preorder.\n\n@rst\nPopulate an array with the nodes in this tree in preorder. The array\nmust be pre-allocated and be sufficiently large to hold the array\nof nodes visited. The recommended approach is to use the\n:c:func:`tsk_tree_get_size_bound` function, as in the following example:\n\n.. code-block:: c\n\n    static void\n    print_preorder(tsk_tree_t *tree)\n    {\n        int ret;\n        tsk_size_t num_nodes, j;\n        tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));\n\n        if (nodes == NULL) {\n            errx(EXIT_FAILURE, \"Out of memory\");\n        }\n        ret = tsk_tree_preorder(tree, nodes, &num_nodes);\n        check_tsk_error(ret);\n        for (j = 0; j < num_nodes; j++) {\n            printf(\"Visit preorder %lld\\n\", (long long) nodes[j]);\n        }\n        free(nodes);\n    }\n\n.. seealso::\n    See the :ref:`sec_c_api_examples_tree_traversals` section for\n    more examples.\n\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param nodes The tsk_id_t array to store nodes in. See notes above for\n    details.\n@param num_nodes A pointer to a tsk_size_t value where we store the number\n    of nodes in the traversal.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_preorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes);\n\n/**\n@brief Fill an array with the nodes of this tree starting from a particular node.\n\n@rst\nAs for :c:func:`tsk_tree_preorder` but starting the traversal at a particular node\n(which will be the first node in the traversal list). The\n:ref:`virtual root<sec_data_model_tree_roots>` is a valid input for this function\nand will be treated like any other tree node. The value ``-1`` is a special case,\nin which we visit all nodes reachable from the roots, and equivalent to\ncalling :c:func:`tsk_tree_preorder`.\n\nSee :c:func:`tsk_tree_preorder` for details the requirements for the ``nodes``\narray.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param root The root of the subtree to traverse, or -1 to visit all nodes.\n@param nodes The tsk_id_t array to store nodes in.\n@param num_nodes A pointer to a tsk_size_t value where we store the number\n    of nodes in the traversal.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_preorder_from(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);\n\n/**\n@brief Fill an array with the nodes of this tree in postorder.\n\n@rst\nPopulate an array with the nodes in this tree in postorder. The array\nmust be pre-allocated and be sufficiently large to hold the array\nof nodes visited. The recommended approach is to use the\n:c:func:`tsk_tree_get_size_bound` function, as in the following example:\n\n.. code-block:: c\n\n    static void\n    print_postorder(tsk_tree_t *tree)\n    {\n        int ret;\n        tsk_size_t num_nodes, j;\n        tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));\n\n        if (nodes == NULL) {\n            errx(EXIT_FAILURE, \"Out of memory\");\n        }\n        ret = tsk_tree_postorder(tree, nodes, &num_nodes);\n        check_tsk_error(ret);\n        for (j = 0; j < num_nodes; j++) {\n            printf(\"Visit postorder %lld\\n\", (long long) nodes[j]);\n        }\n        free(nodes);\n    }\n\n.. seealso::\n    See the :ref:`sec_c_api_examples_tree_traversals` section for\n    more examples.\n\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param nodes The tsk_id_t array to store nodes in. See notes above for\n    details.\n@param num_nodes A pointer to a tsk_size_t value where we store the number\n    of nodes in the traversal.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_postorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes);\n\n/**\n@brief Fill an array with the nodes of this tree starting from a particular node.\n\n@rst\nAs for :c:func:`tsk_tree_postorder` but starting the traversal at a particular node\n(which will be the last node in the traversal list). The\n:ref:`virtual root<sec_data_model_tree_roots>` is a valid input for this function\nand will be treated like any other tree node. The value ``-1`` is a special case,\nin which we visit all nodes reachable from the roots, and equivalent to\ncalling :c:func:`tsk_tree_postorder`.\n\nSee :c:func:`tsk_tree_postorder` for details the requirements for the ``nodes``\narray.\n@endrst\n\n@param self A pointer to a tsk_tree_t object.\n@param root The root of the subtree to traverse, or -1 to visit all nodes.\n@param nodes The tsk_id_t array to store nodes in. See\n    :c:func:`tsk_tree_postorder` for more details.\n@param num_nodes A pointer to a tsk_size_t value where we store the number\n    of nodes in the traversal.\n@return 0 on success or a negative value on failure.\n*/\nint tsk_tree_postorder_from(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);\n\n/** @} */\n\n/* Undocumented for now */\n\nint tsk_tree_preorder_samples_from(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);\n\nint tsk_tree_set_root_threshold(tsk_tree_t *self, tsk_size_t root_threshold);\ntsk_size_t tsk_tree_get_root_threshold(const tsk_tree_t *self);\n\nbool tsk_tree_has_sample_counts(const tsk_tree_t *self);\nbool tsk_tree_has_sample_lists(const tsk_tree_t *self);\n\nint tsk_tree_get_num_tracked_samples(\n    const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_tracked_samples);\nint tsk_tree_set_tracked_samples(\n    tsk_tree_t *self, tsk_size_t num_tracked_samples, const tsk_id_t *tracked_samples);\nint tsk_tree_track_descendant_samples(tsk_tree_t *self, tsk_id_t node);\n\ntypedef struct {\n    tsk_id_t node;\n    tsk_id_t parent;\n    int32_t state;\n} tsk_state_transition_t;\n\nint tsk_tree_map_mutations(tsk_tree_t *self, int32_t *genotypes, double *cost_matrix,\n    tsk_flags_t options, int32_t *ancestral_state, tsk_size_t *num_transitions,\n    tsk_state_transition_t **transitions);\n\nint tsk_tree_kc_distance(\n    const tsk_tree_t *self, const tsk_tree_t *other, double lambda, double *result);\n\n/* Don't document these balance metrics for now so it doesn't get in the way of\n * C API 1.0, but should be straightforward to document based on Python docs. */\nint tsk_tree_sackin_index(const tsk_tree_t *self, tsk_size_t *result);\nint tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result);\nint tsk_tree_b1_index(const tsk_tree_t *self, double *result);\n/* NOTE: if we document this as part of the C API we'll have to be more careful\n * about the error behaviour on bad log bases. At the moment we're just returning\n * the resulting value which can be nan, inf etc, but some surprising results\n * happen like a base 0 seems to return a finite value. */\nint tsk_tree_b2_index(const tsk_tree_t *self, double base, double *result);\n\nint tsk_tree_num_lineages(const tsk_tree_t *self, double t, tsk_size_t *result);\n\n/* Things to consider removing: */\n\n/* This is redundant, really */\nbool tsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u);\n\n/* Not terribly useful, since the definition is\n * return (self->tree_sequence == other->tree_sequence) && (self->index == other->index)\n * Remove?\n */\nbool tsk_tree_equals(const tsk_tree_t *self, const tsk_tree_t *other);\n\nint tsk_tree_position_init(\n    tsk_tree_position_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options);\nint tsk_tree_position_free(tsk_tree_position_t *self);\nint tsk_tree_position_print_state(const tsk_tree_position_t *self, FILE *out);\nbool tsk_tree_position_next(tsk_tree_position_t *self);\nbool tsk_tree_position_prev(tsk_tree_position_t *self);\nint tsk_tree_position_seek_forward(tsk_tree_position_t *self, tsk_id_t index);\nint tsk_tree_position_seek_backward(tsk_tree_position_t *self, tsk_id_t index);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "c/tskit.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2024 Tskit Developers\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/**\n * @file tskit.h\n * @brief Tskit API.\n */\n#ifndef __TSKIT_H__\n#define __TSKIT_H__\n\n#include <tskit/core.h>\n#include <tskit/trees.h>\n#include <tskit/genotypes.h>\n#include <tskit/convert.h>\n#include <tskit/stats.h>\n#include <tskit/haplotype_matching.h>\n\n#endif\n"
  },
  {
    "path": "codecov.yml",
    "content": "ignore:\n  - \"c/tests/\"\n  - \"c/subprojects/**/*\"\n\ncodecov:\n  require_ci_to_pass: false\n\ncomment:\n  layout: \"header, diff, flags, components\"  # show component info in the PR comment\n\n\ncomponent_management:\n  individual_components:\n    - component_id: python_code\n      name: Python API\n      paths:\n        - python/tskit/*.py\n\n    - component_id: python_c_code\n      name: Python C interface\n      paths:\n        - python/_tskitmodule.c\n        - python/lwt_interface/tskit_lwt_interface.h\n\n    - component_id: c_code\n      name: C library\n      paths:\n        - c/tskit\n"
  },
  {
    "path": "docs/.gitignore",
    "content": "_build\ndoxygen/xml\n"
  },
  {
    "path": "docs/Makefile",
    "content": "BUILDDIR    = _build\nDOXYGEN_XML = doxygen/xml\n\nall: ${DOXYGEN_XML}\n\t./build.sh\n\n${DOXYGEN_XML}: ../c/tskit/*.h\n\tcd doxygen && doxygen\n\nclean:\n\trm -fR $(BUILDDIR) $(DOXYGEN_XML)\n"
  },
  {
    "path": "docs/_config.yml",
    "content": "# Book settings\n# Learn more at https://jupyterbook.org/customize/config.html\n\ntitle: Tskit manual\nauthor: Tskit Developers\ncopyright: \"2022\"\nonly_build_toc_files: true\nlogo: logo.svg\nfavicon: favicon.ico\n\nexecute:\n  execute_notebooks: cache\n  timeout: 120\n\nlaunch_buttons:\n  binderhub_url: \"\"\n\nrepository:\n  url: https://github.com/tskit-dev/tskit\n  branch: main\n  path_to_book: docs\n\nhtml:\n  use_issues_button: true\n  use_repository_button: true\n  use_edit_page_button: true\n\nsphinx:\n    extra_extensions:\n    - sphinx_copybutton\n    - breathe\n    - sphinx.ext.autodoc\n    - sphinx_autodoc_typehints\n    - sphinx.ext.autosummary\n    - sphinx.ext.todo\n    - sphinx.ext.viewcode\n    - sphinx.ext.intersphinx\n    - sphinx_issues\n    - sphinxarg.ext\n    - IPython.sphinxext.ipython_console_highlighting\n    #- sphinxcontrib.prettyspecialmethods\n\n    config:\n      html_theme: sphinx_book_theme\n      html_theme_options:\n        pygments_dark_style: monokai\n        navigation_with_keys: false\n        logo:\n          text: \"Version __PKG_VERSION__\"\n        repository_url: https://github.com/tskit-dev/tskit\n        repository_branch: main\n        path_to_docs: docs\n        use_repository_button: true\n        use_edit_page_button: true\n        use_issues_button: true\n      pygments_style: monokai\n      myst_enable_extensions:\n      - colon_fence\n      - deflist\n      - dollarmath\n      - substitution\n      issues_github_path: tskit-dev/tskit\n      todo_include_todos: true\n      intersphinx_mapping:\n        python: [\"https://docs.python.org/3/\", null]\n        tutorials: [\"https://tskit.dev/tutorials/\", null]\n        stdpopsim: [\"https://stdpopsim.readthedocs.io/en/stable\", null]\n        msprime: [\"https://tskit.dev/msprime/docs/stable/\", null]\n        numpy: [\"https://numpy.org/doc/stable/\", null]\n\n      breathe_projects: {\"tskit\": \"doxygen/xml\"}\n      breathe_default_project: \"tskit\"\n      breathe_domain_by_extension: {\"h\": \"c\"}\n      breathe_show_define_initializer: True\n\n      # Note we have to use the regex version here because of\n      # https://github.com/sphinx-doc/sphinx/issues/9748\n      nitpick_ignore_regex: [\n          [\"c:identifier\", \"uint8_t\"],\n          [\"c:identifier\", \"int32_t\"],\n          [\"c:identifier\", \"uint32_t\"],\n          [\"c:identifier\", \"uint64_t\"],\n          [\"c:identifier\", \"FILE\"],\n          [\"c:identifier\", \"bool\"],\n          # This is for the anonymous interval struct embedded in the tsk_tree_t.\n          [\"c:identifier\", \"tsk_tree_t.@4\"],\n          [\"c:type\", \"int32_t\"],\n          [\"c:type\", \"uint32_t\"],\n          [\"c:type\", \"uint64_t\"],\n          [\"c:type\", \"bool\"],\n          # TODO these have been triaged here to make the docs compile, but we should\n          # sort them out properly. https://github.com/tskit-dev/tskit/issues/336\n          [\"py:class\", \"array_like\"],\n          [\"py:class\", \"row-like\"],\n          [\"py:class\", \"array-like\"],\n          [\"py:class\", \"dtype=np.uint32\"],\n          [\"py:class\", \"dtype=np.uint32.\"],\n          [\"py:class\", \"dtype=np.int32\"],\n          [\"py:class\", \"dtype=np.int8\"],\n          [\"py:class\", \"dtype=np.float64\"],\n          [\"py:class\", \"dtype=np.int64\"],\n      ]\n\n      # Added to allow \"bool\" be used as a :ctype: - this list has to be\n      # manually specifed in order to remove \"bool\" from it.\n      c_extra_keywords: [\n        \"alignas\",\n        \"alignof\",\n        \"complex\",\n        \"imaginary\",\n        \"noreturn\",\n        \"static_assert\",\n        \"thread_local\"\n      ]\n\n      autodoc_member_order: bysource\n\n      # Without this option, autodoc tries to put links for all return types\n      # in terms of the fully-qualified classnames which we don't want, and also\n      # leads to broken links and nitpick failures. So, until we tackle\n      # typehints fully, this is the simplest approach.\n      autodoc_typehints: none\n\n"
  },
  {
    "path": "docs/_static/README",
    "content": "Placeholder file to keep git happy.\n"
  },
  {
    "path": "docs/_static/bespoke.css",
    "content": "/* When a code cell outputs tskit tables in plain text, widen the tab size so column\n   contents line up. Invoke this by adding :tags:[\"output-wide-tabs\"] to the cell */\n.tag_output-wide-tabs .cell_output pre {tab-size: 16}\n"
  },
  {
    "path": "docs/_toc.yml",
    "content": "format: jb-book\nroot: introduction\nparts:\n- caption: Getting started\n  chapters:\n  - file: installation\n  - file: quickstart\n- caption: Concepts\n  chapters:\n  - file: glossary\n  - file: data-model\n  - file: metadata\n  - file: provenance\n- caption: Analysis\n  chapters:\n  - file: stats\n  - file: topological-analysis\n  - file: ibd\n  - file: export\n- caption: Interfaces\n  chapters:\n  - file: python-api\n  - file: numba\n  - file: c-api\n  - file: cli\n  - file: file-formats\n- caption: For developers\n  chapters:\n  - file: development\n  - file: changelogs\n- caption: Miscellaneous\n  chapters:\n  - file: citation\n\n"
  },
  {
    "path": "docs/build.sh",
    "content": "#/bin/bash\n\n# Jupyter-build doesn't have an option to automatically show the \n# saved reports, which makes it difficult to debug the reasons for \n# build failures in CI. This is a simple wrapper to handle that.\n\nREPORTDIR=_build/html/reports\n\nuv run --project=../python --group docs jupyter-book build . -vnW --keep-going\nRETVAL=$?\nif [ $RETVAL -ne 0 ]; then\n    if [ -e $REPORTDIR ]; then\n      echo \"Error occured; showing saved reports\"\n      cat $REPORTDIR/*\n    fi\nelse\n    # Clear out any old reports\n    rm -f $REPORTDIR/*\nfi\nexit $RETVAL\n"
  },
  {
    "path": "docs/c-api.rst",
    "content": ".. _sec_c_api:\n\n=====\nC API\n=====\n\nThis is the documentation for the ``tskit`` C API, a low-level library\nfor manipulating and processing :ref:`tree sequence data <sec_data_model>`.\nThe library is written using the C99 standard and is fully thread safe.\nTskit uses `kastore <https://kastore.readthedocs.io/>`_ to define a\nsimple storage format for the tree sequence data.\n\nTo see the API in action, please see :ref:`sec_c_api_examples` section.\n\n********\nOverview\n********\n\n--------------------\nDo I need the C API?\n--------------------\n\nThe ``tskit`` C API is generally useful in the following situations:\n\n- You want to use the ``tskit`` API in a larger C/C++ application (e.g.,\n  in order to output data in the ``.trees`` format);\n- You need to perform lots of tree traversals/loops etc. to analyse some\n  data that is in tree sequence form.\n\nFor high level operations that are not performance sensitive, the :ref:`sec_python_api`\nis generally more useful. Python is *much* more convenient that C,\nand since the ``tskit`` Python module is essentially a wrapper for this\nC library, there's often no real performance penalty for using it.\n\n-------------------------------\nDifferences with the Python API\n-------------------------------\n\nMuch of the explanatory material (for example tutorials) about the Python API applies to\nthe C-equivalent methods as the Python API wraps this API.\n\nThe main area of difference is, unlike the Python API, the C API doesn't do any\ndecoding, encoding or schema validation of :ref:`sec_metadata` fields,\ninstead only handling the byte string representation of the metadata. Metadata is therefore\nnever used directly by any tskit C API method, just stored.\n\n----------------------\nAPI stability contract\n----------------------\n\nSince the C API 1.0 release we pledge to make **no** breaking changes\nto the documented API in subsequent releases in the 1.0 series.\nWhat this means is that any code that compiles under the 1.0 release\nshould also compile without changes in subsequent 1.x releases. We\nwill not change the semantics of documented functions,  unless it is to\nfix clearly buggy behaviour. We will not change the values of macro\nconstants.\n\nUndocumented functions do not have this guarantee, and may be changed\narbitrarily between releases.\n\n.. note::\n    We do not currently make any guarantees about\n    `ABI <https://en.wikipedia.org/wiki/Application_binary_interface>`__\n    stability, since the primary use-case is for tskit to be embedded\n    within another application rather than used as a shared library. If you\n    do intend to use tskit as a shared library and ABI stability is\n    therefore important to you, please let us know and we can plan\n    accordingly.\n\n.. _sec_c_api_overview_structure:\n\n-------------\nAPI structure\n-------------\n\nTskit uses a set of conventions to provide a pseudo object-oriented API. Each\n'object' is represented by a C struct and has a set of 'methods'. This is\nmost easily explained by an example:\n\n.. literalinclude:: ../c/examples/api_structure.c\n    :language: c\n\nIn this program we create a :c:type:`tsk_edge_table_t` instance, add five rows\nusing :c:func:`tsk_edge_table_add_row`, print out its contents using the\n:c:func:`tsk_edge_table_print_state` debugging method, and finally free\nthe memory used by the edge table object. We define this edge table\n'class' by using some simple naming conventions which are adhered\nto throughout ``tskit``. This is simply a naming convention that helps to\nkeep code written in plain C logically structured; there are no extra C++ style features.\nWe use object oriented terminology freely throughout this documentation\nwith this understanding.\n\nIn this convention, a class is defined by a struct ``tsk_class_name_t`` (e.g.\n``tsk_edge_table_t``) and its methods all have the form ``tsk_class_name_method_name``\nwhose first argument is always a pointer to an instance of the class (e.g.,\n``tsk_edge_table_add_row`` above).\nEach class has an initialise and free method, called ``tsk_class_name_init``\nand ``tsk_class_name_free``, respectively. The init method must\nbe called to ensure that the object is correctly initialised (except\nfor functions such as for :c:func:`tsk_table_collection_load`\nand :c:func:`tsk_table_collection_copy` which automatically initialise\nthe object by default for convenience). The free\nmethod must always be called to avoid leaking memory, even in the\ncase of an error occurring during initialisation. If ``tsk_class_name_init`` has\nbeen called successfully, we say the object has been \"initialised\"; if not,\nit is \"uninitialised\". After ``tsk_class_name_free`` has been called,\nthe object is again uninitialised.\n\nIt is important to note that the init methods only allocate *internal* memory;\nthe memory for the instance itself must be allocated either on the\nheap or the stack:\n\n.. code-block:: c\n\n    // Instance allocated on the stack\n    tsk_node_table_t nodes;\n    tsk_node_table_init(&nodes, 0);\n    tsk_node_table_free(&nodes);\n\n    // Instance allocated on the heap\n    tsk_edge_table_t *edges = malloc(sizeof(tsk_edge_table_t));\n    tsk_edge_table_init(edges, 0);\n    tsk_edge_table_free(edges);\n    free(edges);\n\n\n.. _sec_c_api_error_handling:\n\n--------------\nError handling\n--------------\n\nC does not have a mechanism for propagating exceptions, and great care\nmust be taken to ensure that errors are correctly and safely handled.\nThe convention adopted in ``tskit`` is that\nevery function (except for trivial accessor methods) returns\nan integer. If this return value is negative an error has occured which\nmust be handled. A description of the error that occured can be obtained\nusing the :c:func:`tsk_strerror` function. The following example illustrates\nthe key conventions around error handling in ``tskit``:\n\n.. literalinclude:: ../c/examples/error_handling.c\n    :language: c\n\nIn this example we load a tree sequence from file and print out a summary\nof the number of nodes and edges it contains. After calling\n:c:func:`tsk_treeseq_load` we check the return value ``ret`` to see\nif an error occured. If an error has occured we exit with an error\nmessage produced by :c:func:`tsk_strerror`. Note that in this example we call\n:c:func:`tsk_treeseq_free` whether or not an error occurs: in general,\nonce a function that initialises an object (e.g., ``X_init``, ``X_copy``\nor ``X_load``) is called, then ``X_free`` must\nbe called to ensure that memory is not leaked.\n\nMost functions in ``tskit`` return an error status; we recommend that **every**\nreturn value is checked.\n\n.. _sec_c_api_memory_allocation_strategy:\n\n--------------------------\nMemory allocation strategy\n--------------------------\n\nTo reduce the frequency of memory allocations tskit pre-allocates space for\nadditional table rows in each table, along with space for the contents of\nragged columns. The default behaviour is to start with space for 1,024 rows\nin each table and 65,536 bytes in each ragged column. The table then grows\nas needed by doubling, until a maximum pre-allocation of 2,097,152 rows for\na table or 104,857,600 bytes for a ragged column. This behaviour can be\ndisabled and a fixed increment used, on a per-table and per-ragged-column\nbasis using the ``tsk_X_table_set_max_rows_increment`` and\n``tsk_provenance_table_set_max_X_length_increment`` methods where ``X`` is\nthe name of the table or column.\n\n---------------------------\nUsing tskit in your project\n---------------------------\n\nTskit is built as a standard C library and so there are many different ways\nin which it can be included in downstream projects. It is possible to\ninstall ``tskit`` onto a system (i.e., installing a shared library and\nheader files to a standard locations on Unix) and linking against it,\nbut there are many different ways in which this can go wrong. In the\ninterest of simplicity and improving the end-user experience we recommend\nembedding ``tskit`` directly into your applications.\n\nThere are many different build systems and approaches to compiling\ncode, and so it's not possible to give definitive documentation on\nhow ``tskit`` should be included in downstream projects. Please\nsee the `build examples <https://github.com/tskit-dev/tskit-build-examples>`_\nrepo for some examples of how to incorporate ``tskit`` into\ndifferent project structures and build systems.\n\nTskit uses the `meson <https://mesonbuild.com>`_ build system internally,\nand supports being used a `meson subproject <https://mesonbuild.com/Subprojects.html>`_.\nWe show an `example <https://github.com/tskit-dev/tskit-build-examples/tree/main/meson>`_\nin which this is combined with the tskit distribution tarball to neatly\nabstract many details of cross-platform C development.\n\nSome users may choose to check the source for ``tskit`` directly into their source\ncontrol repositories. If you wish to do this, the code is in the ``c`` subdirectory of the\n`tskit <https://github.com/tskit-dev/tskit/tree/main/c>`_ repo.\nThe following header files should be placed in the search path:\n``subprojects/kastore/kastore.h``, ``tskit.h``, and ``tskit/*.h``.\nThe C files ``subprojects/kastore/kastore.c`` and ``tskit/*.c`` should be compiled.\nFor those who wish to minimise the size of their compiled binaries,\n``tskit`` is quite modular, and C files can be omitted if not needed.\nFor example, if you are just using the :ref:`sec_c_api_tables_api` then\nonly the files ``tskit/core.[c,h]`` and ``tskit/tables.[c,h]`` are\nneeded.\n\nHowever you include ``tskit`` in your project, however, please\nensure that it is a **released version**. Released versions are\ntagged on GitHub using the convention ``C_{VERSION}``. The code\ncan either be downloaded from GitHub on the `releases page\n<https://github.com/tskit-dev/tskit/releases>`_ where each release has a distribution\ntarball for example\nhttps://github.com/tskit-dev/tskit/releases/download/C_1.0.0/tskit-1.0.0.tar.xz\nAlternatively the code can be checked out\nusing git. For example, to check out the ``C_1.0.0`` release::\n\n    $ git clone https://github.com/tskit-dev/tskit.git\n    $ cd tskit\n    $ git checkout C_1.0.0\n\n\n\n***********\nBasic Types\n***********\n\n.. doxygentypedef:: tsk_id_t\n.. doxygentypedef:: tsk_size_t\n.. doxygentypedef:: tsk_flags_t\n.. doxygentypedef:: tsk_bool_t\n\n**************\nCommon options\n**************\n\n.. doxygengroup:: GENERIC_FUNCTION_OPTIONS\n   :content-only:\n\n**********\nTables API\n**********\n\nThe tables API section of ``tskit`` is defined in the ``tskit/tables.h`` header.\n\n-----------------\nTable collections\n-----------------\n\n.. doxygenstruct:: tsk_table_collection_t\n    :members:\n\n.. doxygenstruct:: tsk_bookmark_t\n    :members:\n\n.. doxygengroup:: TABLE_COLLECTION_API_GROUP\n    :content-only:\n\n-----------\nIndividuals\n-----------\n\n.. doxygenstruct:: tsk_individual_t\n    :members:\n\n.. doxygenstruct:: tsk_individual_table_t\n    :members:\n\n.. doxygengroup:: INDIVIDUAL_TABLE_API_GROUP\n   :content-only:\n\n-----\nNodes\n-----\n\n.. doxygenstruct:: tsk_node_t\n    :members:\n\n.. doxygenstruct:: tsk_node_table_t\n    :members:\n\n.. doxygengroup:: NODE_TABLE_API_GROUP\n   :content-only:\n\n-----\nEdges\n-----\n\n.. doxygenstruct:: tsk_edge_t\n    :members:\n\n.. doxygenstruct:: tsk_edge_table_t\n    :members:\n\n.. doxygengroup:: EDGE_TABLE_API_GROUP\n    :content-only:\n\n----------\nMigrations\n----------\n\n.. doxygenstruct:: tsk_migration_t\n    :members:\n\n.. doxygenstruct:: tsk_migration_table_t\n    :members:\n\n.. doxygengroup:: MIGRATION_TABLE_API_GROUP\n    :content-only:\n\n-----\nSites\n-----\n\n.. doxygenstruct:: tsk_site_t\n    :members:\n\n.. doxygenstruct:: tsk_site_table_t\n    :members:\n\n.. doxygengroup:: SITE_TABLE_API_GROUP\n    :content-only:\n\n---------\nMutations\n---------\n\n.. doxygenstruct:: tsk_mutation_t\n    :members:\n\n.. doxygenstruct:: tsk_mutation_table_t\n    :members:\n\n.. doxygengroup:: MUTATION_TABLE_API_GROUP\n    :content-only:\n\n-----------\nPopulations\n-----------\n\n.. doxygenstruct:: tsk_population_t\n    :members:\n\n.. doxygenstruct:: tsk_population_table_t\n    :members:\n\n.. doxygengroup:: POPULATION_TABLE_API_GROUP\n    :content-only:\n\n-----------\nProvenances\n-----------\n\n.. doxygenstruct:: tsk_provenance_t\n    :members:\n\n.. doxygenstruct:: tsk_provenance_table_t\n    :members:\n\n.. doxygengroup:: PROVENANCE_TABLE_API_GROUP\n    :content-only:\n\n\n.. _sec_c_api_table_indexes:\n\n-------------\nTable indexes\n-------------\n\nAlong with the tree sequence :ref:`ordering requirements\n<sec_valid_tree_sequence_requirements>`, the :ref:`sec_table_indexes`\nallow us to take a table collection and efficiently operate\non the trees defined within it. This section defines the rules\nfor safely operating on table indexes and their life-cycle.\n\nThe edge index used for tree generation consists of two arrays,\neach holding ``N`` edge IDs (where ``N`` is the size of the edge\ntable). When the index is computed using\n:c:func:`tsk_table_collection_build_index`, we store the current size\nof the edge table along with the two arrays of edge IDs. The\nfunction :c:func:`tsk_table_collection_has_index` then returns true\niff (a) both of these arrays are not NULL and (b) the stored\nnumber of edges is the same as the current size of the edge table.\n\nUpdating the edge table does not automatically invalidate the indexes.\nThus, if we call :c:func:`tsk_edge_table_clear` on an edge table\nwhich has an index, this index will still exist. However, it will\nnot be considered a valid index by\n:c:func:`tsk_table_collection_has_index` because of the size mismatch.\nSimilarly for functions that increase the size of the table.\nNote that it is possible then to have\n:c:func:`tsk_table_collection_has_index` return true, but the index\nis not actually valid, if, for example, the user has manipulated the\nnode and edge tables to describe a different topology, which happens\nto have the same number of edges. The behaviour of methods that\nuse the indexes will be undefined in this case.\n\nThus, if you are manipulating an existing table collection that may\nbe indexed, it is always recommended to call\n:c:func:`tsk_table_collection_drop_index` first.\n\n.. _sec_c_api_tree_sequences:\n\n**************\nTree sequences\n**************\n\n.. doxygenstruct:: tsk_treeseq_t\n    :members:\n\n.. doxygengroup:: TREESEQ_API_GROUP\n    :content-only:\n\n.. _sec_c_api_trees:\n\n*****\nTrees\n*****\n\n.. doxygenstruct:: tsk_tree_t\n    :members:\n\n---------\nLifecycle\n---------\n\n.. doxygengroup:: TREE_API_LIFECYCLE_GROUP\n    :content-only:\n\n.. _sec_c_api_trees_null:\n\n----------\nNull state\n----------\n\nTrees are initially in a \"null state\" where each sample is a\nroot and there are no branches. The ``index`` of a tree in the\nnull state is ``-1``.\n\nWe must call one of the\n:ref:`seeking<sec_c_api_trees_seeking>` methods\nto make the state of the tree object correspond to a particular tree\nin the sequence.\n\n.. _sec_c_api_trees_seeking:\n\n-------\nSeeking\n-------\n\nWhen we are examining many trees along a tree sequence,\nwe usually allocate a single :c:struct:`tsk_tree_t` object\nand update its state. This allows us to efficiently transform\nthe state of a tree into nearby trees, using the underlying succinct tree\nsequence data structure.\n\nThe simplest example to visit trees left-to-right along the genome:\n\n.. code-block:: c\n    :linenos:\n\n    int\n    visit_trees(const tsk_treeseq_t *ts)\n    {\n        tsk_tree_t tree;\n        int ret;\n\n        ret = tsk_tree_init(&tree, &ts, 0);\n        if (ret != 0) {\n            goto out;\n        }\n        for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {\n            printf(\"\\ttree %lld covers interval left=%f right=%f\\n\",\n                (long long) tree.index, tree.interval.left, tree.interval.right);\n        }\n        if (ret != 0) {\n            goto out;\n        }\n        // Do other things in the function...\n    out:\n        tsk_tree_free(&tree);\n        return ret;\n    }\n\n\nIn this example we first initialise a :c:struct:`tsk_tree_t` object,\nassociating it with the input tree sequence. We then iterate over the\ntrees along the sequence using a ``for`` loop, with the ``ret`` variable\ncontrolling iteration. The usage of ``ret`` here follows a slightly\ndifferent pattern to other functions in the tskit C API\n(see the :ref:`sec_c_api_error_handling` section).\nThe interaction between error handling and states\nof the ``tree`` object here is somewhat subtle, and is worth explaining\nin detail.\n\nAfter successful initialisation (after line 10), the tree is in the\n:ref:`null state<sec_c_api_trees_null>` where all samples are roots.\nThe ``for`` loop begins by calling :c:func:`tsk_tree_first` which\ntransforms the state of the tree into the first (leftmost) tree in\nthe sequence. If this operation is successful, :c:func:`tsk_tree_first`\nreturns :c:data:`TSK_TREE_OK`. We then check the value of ``ret``\nin the loop condition to see if it is equal\nto :c:data:`TSK_TREE_OK` and execute the loop body for the\nfirst tree in the sequence.\n\nOn completing the loop body for the first tree in the sequence,\nwe then execute the ``for`` loop increment operation, which\ncalls :c:func:`tsk_tree_next` and assigns the returned value to\n``ret``. This function efficiently transforms the current state\nof ``tree`` so that it represents the next tree along the genome,\nand returns :c:data:`TSK_TREE_OK` if the operation succeeds.\nWhen :c:func:`tsk_tree_next` is called on the last tree in the\nsequence, the state of ``tree`` is set back to the\n:ref:`null state<sec_c_api_trees_null>` and the return value is 0.\n\nThus, the loop on lines 11-14 can exit in two ways:\n\n1. Either we successfully iterate over all trees in the sequence and\n   ``ret`` has the value ``0`` at line 15; or\n2. An error occurs during :c:func:`tsk_tree_first` or\n   :c:func:`tsk_tree_next`, and ret contains a negative value.\n\n.. warning::\n    It is **vital** that you check the value of ``ret`` immediately\n    after the loop exits like we do here at line 15, or errors can be silently\n    lost. (Although it's redundant here, as we don't do anything else in the\n    function.)\n\n.. seealso::\n    See the :ref:`examples<sec_c_api_examples_tree_iteration>` section for\n    more examples of sequential seeking, including\n    an example of using\n    use :c:func:`tsk_tree_last` and :c:func:`tsk_tree_prev`\n    to iterate from right-to-left.\n\n.. note::\n    Seeking functions\n    :c:func:`tsk_tree_first`,\n    :c:func:`tsk_tree_last`,\n    :c:func:`tsk_tree_next`\n    :c:func:`tsk_tree_prev`,\n    and :c:func:`tsk_tree_seek`\n    can be called in any order and from any non-error state.\n\n.. doxygengroup:: TREE_API_SEEKING_GROUP\n    :content-only:\n\n------------\nTree queries\n------------\n\n.. doxygengroup:: TREE_API_TREE_QUERY_GROUP\n    :content-only:\n\n------------\nNode queries\n------------\n\n.. doxygengroup:: TREE_API_NODE_QUERY_GROUP\n    :content-only:\n\n----------------\nTraversal orders\n----------------\n\n.. doxygengroup:: TREE_API_TRAVERSAL_GROUP\n    :content-only:\n\n\n.. _sec_c_api_low_level_sorting:\n\n*****************\nLow-level sorting\n*****************\n\nIn some highly performance sensitive cases it can be useful to\nhave more control over the process of sorting tables. This low-level\nAPI allows a user to provide their own edge sorting function.\nThis can be useful, for example, to use parallel sorting algorithms,\nor to take advantage of the more efficient sorting procedures\navailable in C++. It is the user's responsibility to ensure that the\nedge sorting requirements are fulfilled by this function.\n\n.. todo::\n    Create an idiomatic C++11 example where we load a table collection\n    file from argv[1], and sort the edges  using std::sort, based\n    on the example in tests/test_minimal_cpp.cpp. We can include\n    this in the examples below, and link to it here.\n\n.. doxygenstruct:: _tsk_table_sorter_t\n    :members:\n\n.. doxygengroup:: TABLE_SORTER_API_GROUP\n    :content-only:\n\n******************\nDecoding genotypes\n******************\n\nObtaining genotypes for samples at specific sites is achieved via\n:c:struct:`tsk_variant_t` and its methods.\n\n.. doxygenstruct:: tsk_variant_t\n    :members:\n\n.. doxygengroup:: VARIANT_API_GROUP\n    :content-only:\n\n\n***********************\nMiscellaneous functions\n***********************\n\n.. doxygenfunction:: tsk_strerror\n\n.. doxygenfunction:: tsk_is_unknown_time\n\n*************************\nFunction Specific Options\n*************************\n\n-------------\nLoad and init\n-------------\n.. doxygengroup:: API_FLAGS_LOAD_INIT_GROUP\n    :content-only:\n\n--------------------------\n:c:func:`tsk_treeseq_init`\n--------------------------\n.. doxygengroup:: API_FLAGS_TS_INIT_GROUP\n    :content-only:\n\n-----------------------------------------------------------------------\n:c:func:`tsk_treeseq_simplify`, :c:func:`tsk_table_collection_simplify`\n-----------------------------------------------------------------------\n.. doxygengroup:: API_FLAGS_SIMPLIFY_GROUP\n    :content-only:\n\n----------------------------------------------\n:c:func:`tsk_table_collection_check_integrity`\n----------------------------------------------\n.. doxygengroup:: API_FLAGS_CHECK_INTEGRITY_GROUP\n    :content-only:\n\n------------------------------------\n:c:func:`tsk_table_collection_clear`\n------------------------------------\n.. doxygengroup:: API_FLAGS_CLEAR_GROUP\n    :content-only:\n\n-----------------------------------\n:c:func:`tsk_table_collection_copy`\n-----------------------------------\n.. doxygengroup:: API_FLAGS_COPY_GROUP\n    :content-only:\n\n----------------------\nAll equality functions\n----------------------\n.. doxygengroup:: API_FLAGS_CMP_GROUP\n    :content-only:\n\n-------------------------------------\n:c:func:`tsk_table_collection_subset`\n-------------------------------------\n.. doxygengroup:: API_FLAGS_SUBSET_GROUP\n    :content-only:\n\n------------------------------------\n:c:func:`tsk_table_collection_union`\n------------------------------------\n.. doxygengroup:: API_FLAGS_UNION_GROUP\n    :content-only:\n\n\n*********\nConstants\n*********\n\n-----------\nAPI Version\n-----------\n\n.. doxygengroup:: API_VERSION_GROUP\n    :content-only:\n\n.. _sec_c_api_error_codes:\n\n----------------\nCommon constants\n----------------\n\n.. doxygengroup:: GENERIC_CONSTANTS\n   :content-only:\n\n.. _sec_c_api_tables_api:\n\n--------------\nGeneric Errors\n--------------\n\n.. doxygengroup:: GENERAL_ERROR_GROUP\n        :content-only:\n\n------------------\nFile format errors\n------------------\n\n.. doxygengroup:: FILE_FORMAT_ERROR_GROUP\n        :content-only:\n\n--------------------\nOut-of-bounds errors\n--------------------\n\n.. doxygengroup:: OOB_ERROR_GROUP\n        :content-only:\n\n-----------\nEdge errors\n-----------\n\n.. doxygengroup:: EDGE_ERROR_GROUP\n        :content-only:\n\n\n-----------\nSite errors\n-----------\n\n.. doxygengroup:: SITE_ERROR_GROUP\n        :content-only:\n\n\n---------------\nMutation errors\n---------------\n\n.. doxygengroup:: MUTATION_ERROR_GROUP\n        :content-only:\n\n\n----------------\nMigration errors\n----------------\n\n.. doxygengroup:: MIGRATION_ERROR_GROUP\n        :content-only:\n\n-------------\nSample errors\n-------------\n\n.. doxygengroup:: SAMPLE_ERROR_GROUP\n        :content-only:\n\n------------\nTable errors\n------------\n\n.. doxygengroup:: TABLE_ERROR_GROUP\n        :content-only:\n\n------------------------\nGenotype decoding errors\n------------------------\n\n.. doxygengroup:: GENOTYPE_ERROR_GROUP\n        :content-only:\n\n------------\nUnion errors\n------------\n\n.. doxygengroup:: UNION_ERROR_GROUP\n        :content-only:\n\n---------------\nSimplify errors\n---------------\n\n.. doxygengroup:: SIMPLIFY_ERROR_GROUP\n        :content-only:\n\n-----------------\nIndividual errors\n-----------------\n\n.. doxygengroup:: INDIVIDUAL_ERROR_GROUP\n        :content-only:\n\n-------------------\nExtend edges errors\n-------------------\n\n.. doxygengroup:: EXTEND_EDGES_ERROR_GROUP\n        :content-only:\n\n\n.. _sec_c_api_examples:\n\n********\nExamples\n********\n\n------------------------\nBasic forwards simulator\n------------------------\n\nThis is an example of using the tables API to define a simple\nhaploid Wright-Fisher simulator. Because this simple example\nrepeatedly sorts the edge data, it is quite inefficient and\nshould not be used as the basis of a large-scale simulator.\n\n.. note::\n\n   This example uses the C function ``rand`` and constant\n   ``RAND_MAX`` for random number generation.  These methods\n   are used for example purposes only and a high-quality\n   random number library should be preferred for code\n   used for research.  Examples include, but are not\n   limited to:\n\n   1. The `GNU Scientific Library <https://www.gnu.org/software/gsl>`_,\n      which is licensed under the GNU General Public License, version\n      3 (`GPL3+ <https://www.gnu.org/licenses/gpl-3.0.en.html>`_.\n   2. For C++ projects using C++11 or later,\n      the built-in `random <https://en.cppreference.com/w/cpp/numeric/random>`_\n      number library.\n   3. The `numpy C API <https://numpy.org/devdocs/reference/random/c-api.html>`_\n      may be useful for those writing Python extension modules in C/C++.\n\n.. todo::\n    Give a pointer to an example that caches and flushes edge data efficiently.\n    Probably using the C++ API?\n\n.. literalinclude:: ../c/examples/haploid_wright_fisher.c\n    :language: c\n\n.. _sec_c_api_examples_tree_iteration:\n\n--------------\nTree iteration\n--------------\n\n.. literalinclude:: ../c/examples/tree_iteration.c\n    :language: c\n\n\n.. _sec_c_api_examples_tree_traversals:\n\n---------------\nTree traversals\n---------------\n\nIn this example we load a tree sequence file, and then traverse the first\ntree in four different ways:\n\n1. We first traverse the tree in preorder and postorder using the\n   :c:func:`tsk_tree_preorder`\n   :c:func:`tsk_tree_postorder` functions to fill an array of\n   nodes in the appropriate orders. This is the recommended approach\n   and will be convenient and efficient for most purposes.\n\n2. As an example of how we might build our own traveral algorithms, we\n   then traverse the tree in preorder using recursion. This is a very\n   common way of navigating around trees and can be convenient for\n   some applications. For example, here we compute the depth of each node\n   (i.e., it's distance from the root) and use this when printing out the\n   nodes as we visit them.\n\n3. Then we traverse the tree in preorder using an iterative approach. This\n   is a little more efficient than using recursion, and is sometimes\n   more convenient than structuring the calculation recursively.\n\n4. In the third example we iterate upwards from the samples rather than\n   downwards from the root.\n\n.. literalinclude:: ../c/examples/tree_traversal.c\n    :language: c\n\n.. _sec_c_api_examples_file_streaming:\n\n--------------\nFile streaming\n--------------\n\nIt is often useful to read tree sequence files from a stream rather than\nfrom a fixed filename. This example shows how to do this using the\n:c:func:`tsk_table_collection_loadf` and\n:c:func:`tsk_table_collection_dumpf` functions. Here, we sequentially\nload table collections from the ``stdin`` stream and write them\nback out to ``stdout`` with their mutations removed.\n\n.. literalinclude:: ../c/examples/streaming.c\n    :language: c\n\nNote that we use the value :c:macro:`TSK_ERR_EOF` to detect when the stream\nends, as we don't know how many tree sequences to expect on the input.\nIn this case, :c:macro:`TSK_ERR_EOF` is not considered an error and we exit\nnormally.\n\nRunning this program on some tree sequence files we might get::\n\n    $ cat tmp1.trees tmp2.trees | ./build/streaming > no_mutations.trees\n    Tree sequence 0 had 38 mutations\n    Tree sequence 1 had 132 mutations\n\nThen, running this program again on the output of the previous command,\nwe see that we now have two tree sequences with their mutations removed\nstored in the file ``no_mutations.trees``::\n\n    $ ./build/streaming < no_mutations.trees > /dev/null\n    Tree sequence 0 had 0 mutations\n    Tree sequence 1 had 0 mutations\n\n------------------------------------\nParallel, multichromosome simulation\n------------------------------------\n\nA substantial bottleneck in forwards simulations using tree sequences\nis *simplification*. This is therefore a natural target for parallelization.\nThe potential for breaking up a chromosome into discrete chunks that\nare separately parallelized is limited, however, since any edge\nthat extends across the boundary between two chunks is split;\nthus creating more work.\nHowever, distinct chromosomes provide a natural target:\nthe edge tables describing inheritance for each chromosome can be\nindependently simplified, as long as the fact that they all refer to\nthe same set of nodes.\nThis simulation keeps each chromosome in a separate tree sequence,\nbut they essentially share a common node table;\nthe :c:macro:`TSK_SIMPLIFY_NO_FILTER_NODES` flag is used so that\neach call to :c:func:`tsk_table_collection_simplify` does not\nchange the common node table.\nAfterwards, we iterate though the edge tables to determine which\nnodes need to be retained, and use\n:c:func:`tsk_node_table_keep_rows` to remove unused nodes.\n\n\n.. literalinclude:: ../c/examples/multichrom_wright_fisher.c\n    :language: c\n\n----------------------------\nReading and writing metadata\n----------------------------\n\nThe C API does not provide any functionality for manipulating\nthe contents of metadata. For JSON metadata it is easy to\nparse metadata using an external JSON library, and for\nstruct-encoded metadata the values can be directly unpacked.\nExamples of both can be found in \n`the SLiM code <https://messerlab.github.com/slim/>`_.\n\nThe :ref:`\"json+struct\" <sec_metadata_codecs_jsonstruct>`\nmetadata codec is a little less straightforward to use,\nso we provide here an example of how to write to it\nand read from it in C. See :ref:`sec_metadata_codecs_jsonstruct` \nfor details of how the metadata is encoded.\n(In Python, tskit automatically decodes both JSON and binary\nmetadata and provides it as Python-data-typed metadata,\njust as for other codecs.)\n\nThe structure of this example is as follows:\n\n1. Values specific to the metadata's header (e.g., the magic bytes `JBLB`).\n2. Functions that encode/decode `uint64_t`, used to store the lengths\n    of the two components in the header.\n3. A method to \"read\" the metadata: really, to get pointers to the\n    json and struct components.\n4. A method to \"write\" the metadata, again just given pointers to\n    and lengths of the two components.\n5. The program itself just round-trips a very simple chunk of metadata,\n    consisting of the JSON \"`{\"a\": 1}`\" and some binary `uint8_t` bytes (\"`1234`\").\n\n.. literalinclude:: ../c/examples/json_struct_metadata.c\n    :language: c\n\nMuch of the complexity of the code is careful error checking of the lengths.\n\nHere ``json_struct_codec_get_components`` takes a pointer to binary metadata\nand returns pointers to *within that memory*.\nA different approach might have copied the two portions of the metadata\ninto two buffers (to then be decoded, for instance).\nHowever, that would double the memory footprint,\nand since this codec is intended for large metadata,\nwe did not use that approach in this example.\n\nAlong the same lines, it is worth noting that this example does make a copy of\nthe JSON and binary data when writing, in ``json_struct_codec_create_buffer()``,\nwhich doubles the memory footprint at that point, and adds the\noverhead of copying the data.  A more efficient approach would be to calculate\nthe buffer length needed for the codec’s data, allocate the buffer with that\nlength, and then generate the necessary JSON and binary metadata directly into\nthat buffer.  This would require the metadata-generating code to be more\nclosely entwined with the code for handling the json+struct codec header and\npadding bytes, and so we have chosen not to adopt that approach here, for\npedagogical purposes; but if your use of this codec will involve large\nmetadata, such an approach is recommended.\n\n"
  },
  {
    "path": "docs/changelogs.rst",
    "content": ".. note: this is left in rst format to avoid Duplicate ID issues\n\n.. _sec_changelogs:\n\n==========\nChangelogs\n==========\n\n******\nPython\n******\n\n.. include:: ../python/CHANGELOG.rst\n\n*****\nC API\n*****\n\n.. include:: ../c/CHANGELOG.rst\n"
  },
  {
    "path": "docs/citation.md",
    "content": "(sec_citation)=\n\n# Citing tskit\n\nIf you use `tskit` in your work, we recommend citing the [2024 ARG Genetics paper](<https://doi.org/10.1093/genetics/iyae100>) and the [2016 msprime PLOS Computational Biology paper](<http://dx.doi.org/10.1371/journal.pcbi.1004842>):\n> Yan Wong, Anastasia Ignatieva, Jere Koskela, Gregor Gorjanc, Anthony W \n> Wohns, Jerome Kelleher, *A general and efficient representation of ancestral \n> recombination graphs*, Genetics, Volume 228, Issue 1, September 2024, iyae100, \n> https://doi.org/10.1093/genetics/iyae100\n\n> Jerome Kelleher, Alison M Etheridge and Gilean McVean (2016),\n> *Efficient Coalescent Simulation and Genealogical Analysis for Large Sample Sizes*,\n> PLOS Comput Biol 12(5): e1004842. doi: 10.1371/journal.pcbi.1004842\n\nIf you use summary statistics, please cite the\n[2020 Genetics paper](https://doi.org/10.1534/genetics.120.303253):\n\n> Peter Ralph, Kevin Thornton, Jerome Kelleher, *Efficiently Summarizing \n> Relationships in Large Samples: A General Duality Between Statistics of \n> Genealogies and Genomes*, Genetics, Volume 215, Issue 3, 1 July 2020, \n> Pages 779–797, https://doi.org/10.1534/genetics.120.303253\n\n\nBibtex records:\n\n```bibtex\n@article{Wong2024ARGs,\n  author    = {Wong, Yan and Ignatieva, Anastasia and Koskela, Jere and Gorjanc, Gregor and \n               Wohns, Anthony W and Kelleher, Jerome},\n  title     = {A general and efficient representation of ancestral recombination graphs},\n  journal   = {Genetics},\n  volume    = {228},\n  number    = {1},\n  pages     = {iyae100},\n  year      = {2024},\n  doi       = {10.1093/genetics/iyae100}\n}\n\n@article{Kelleher2016msprime,\n  author    = {Kelleher, Jerome and Etheridge, Alison M and McVean, Gilean},\n  title     = {Efficient coalescent simulation and genealogical analysis for large sample sizes},\n  journal   = {PLoS Computational Biology},\n  volume    = {12},\n  number    = {5},\n  pages     = {e1004842},\n  year      = {2016},\n  publisher = {Public Library of Science}\n}\n\n@article{Ralph2020Stats,\n  author    = {Ralph, Peter and Thornton, Kevin and Kelleher, Jerome},\n  title     = {Efficiently Summarizing Relationships in Large Samples: A General Duality Between Statistics of Genealogies and Genomes},\n  journal   = {Genetics},\n  volume    = {215},\n  number    = {3},\n  pages     = {779--797},\n  year      = {2020},\n  doi       = {10.1534/genetics.120.303253}\n}\n```"
  },
  {
    "path": "docs/cli.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_cli)=\n\n# Command line interface\n\n```{eval-rst}\n.. argparse::\n    :module: tskit.cli\n    :func: get_tskit_parser\n    :prog: python3 -m tskit\n```"
  },
  {
    "path": "docs/data-model.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n:::{currentmodule} tskit\n:::\n\n\n(sec_data_model)=\n\n# Data model\n\nThe `tskit` library deals with sets of sampled genome sequences through storage\nand analysis of their shared genetic ancestry. This genealogical ancestry (sometimes\nknown as an Ancestral Recombination Graph) is stored concisely in `tskit` in the \n\"succinct tree sequence\" format, which comprises a collection of easy-to-understand\ntables. This page documents the structure of the tables and encoding of table data,\nas well as the encoding of the correlated genetic trees that can be extracted from\na `tskit` tree sequence.\n\nWe begin by defining the the structure of the tables in the {ref}`sec_table_definitions`\nsection. The {ref}`sec_data_model_data_encoding` section then describe how data is\nstored in those tables (also see the {ref}`sec_file_formats`\nchapter). The {ref}`sec_data_model_tree_structure` section then\ndescribes the encoding of the trees that are generated from the {class}`NodeTable`\nand {class}`EdgeTable`. Finally, we describe how genotype data arises from tree\nstructure, especially how we can incorporate the idea of missing data.\n\n(sec_table_definitions)=\n\n## Table definitions\n\n(sec_table_types_definitions)=\n\n### Table types\n\nA tree sequence can be stored in a collection of eight tables:\n{ref}`Node <sec_node_table_definition>`,\n{ref}`Edge <sec_edge_table_definition>`,\n{ref}`Individual <sec_individual_table_definition>`,\n{ref}`Site <sec_site_table_definition>`,\n{ref}`Mutation <sec_mutation_table_definition>`,\n{ref}`Migration <sec_migration_table_definition>`,\n{ref}`Population <sec_population_table_definition>`, and\n{ref}`Provenance <sec_provenance_table_definition>`.\nThe Node and Edge tables store the genealogical\nrelationships that define the trees, and the Individual table\ndescribes how multiple genomes are grouped within individuals;\nthe Site and Mutation tables describe where mutations fall\non the trees; the Migration table describes how lineages move across space;\nand the Provenance table contains information on where the data came from.\nOnly Node and Edge tables are necessary to encode the genealogical trees;\nSites and Mutations are optional but necessary to encode polymorphism\n(sequence) data; the remainder are optional.\nIn the following sections we define these components of a tree sequence in\nmore detail.\n\n(sec_node_table_definition)=\n\n#### Node Table\n\nA **node** defines a monoploid set of chromosomes (a \"genome\") of a specific\nindividual that was born at some time in the past: the set of\nchromosomes inherited from a particular one of the individual's parents.\n(See {ref}`sec_nodes_or_individuals` for more discussion.)\nEvery vertex in the marginal trees of a tree sequence corresponds\nto exactly one node, and a node may be present in many trees. The\nnode table contains five columns, of which `flags` and `time` are\nmandatory:\n\n\n| Column        |  Type       |  Description                           |\n| :------------ | ----------- | -------------------------------------: |\n| flags         |  uint32     |  Bitwise flags.                        |\n| time          |  double     |  Birth time of node.                   |\n| population    |  int32      |  Birth population of node.             |\n| individual    |  int32      |  The individual the node belongs to.   |\n| metadata      |  binary     |  Node {ref}`sec_metadata_definition`.  |\n\nThe `time` column records the birth time of the individual in question,\nand is a floating point value. Similarly,\nthe `population` column records the ID of the population where this\nindividual was born. If not provided, `population` defaults to the\nnull ID (-1). Otherwise, the population ID must refer to a row in the\n{ref}`sec_population_table_definition`.\nThe `individual` column records the ID of the\n{ref}`Individual <sec_individual_table_definition>`\nindividual that this node belongs to. If specified, the ID must refer\nto a valid individual. If not provided, `individual`\ndefaults to the null ID (-1).\n\nThe `flags` column stores information about a particular node, and\nis composed of 32 bitwise boolean values. Currently, the only flag defined\nis `NODE_IS_SAMPLE = 1`, which defines the *sample* status of nodes. Marking\na particular node as a \"sample\" means, for example, that the mutational state\nof the node will be included in the genotypes produced by\n{meth}`TreeSequence.variants`.\n\nBits 0-15 (inclusive) of the `flags` column are reserved for internal use by\n`tskit` and should not be used by applications for anything other\nthan the purposes documented here. Bits 16-31 (inclusive) are free for applications\nto use for any purpose and will not be altered or interpreteted by\n`tskit`.\n\nSee the {ref}`sec_node_requirements` section for details on the properties\nrequired for a valid set of nodes.\n\nFor convenience, the {ref}`text format <sec_text_file_format>` for nodes\ndecomposes the `flags` value into its separate values. Thus, in the\ntext format we have a column for `is_sample`, which corresponds to the\n`flags` column in the underlying table. As more flags values are\ndefined, these will be added to the text file format.\n\nThe `metadata` column provides a location for client code to store\ninformation about each node. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\n:::{note}\nThe distinction between `flags` and `metadata` is that flags\nholds information about a node that the library understands, whereas\nmetadata holds information about a node that the library *does not*\nunderstand. Metadata is for storing auxiliarly information that is\nnot necessary for the core tree sequence algorithms.\n:::\n\n\n(sec_individual_table_definition)=\n\n#### Individual Table\n\nAn **individual** defines how nodes (which can be seen\nas representing single chromosomes) group together in a polyploid individual.\nThe individual table contains three columns, of which only `flags` is mandatory.\n\n| Column        | Type       | Description                                |\n| :------------ | ---------- | -----------------------------------------: |\n| flags         | uint32     | Bitwise flags.                             |\n| location      | double     | Location in arbitrary dimensions.          |\n| parents       | int32      | Ids of parent individuals.                 |\n| metadata      | binary     | Individual {ref}`sec_metadata_definition`. |\n\nSee the {ref}`sec_individual_requirements` section for details on the properties\nrequired for a valid set of individuals.\n\nThe `flags` column stores information about a particular individual, and\nis composed of 32 bitwise boolean values. Currently, no flags are\ndefined.\n\nBits 0-15 (inclusive) of the `flags` column are reserved for internal use by\n`tskit` and should not be used by applications for anything other\nthan the purposes documented here. Bits 16-31 (inclusive) are free for applications\nto use for any purpose and will not be altered or interpreteted by\n`tskit`.\n\nThe `location` column stores the location of an individual in arbitrary\ndimensions. This column is {ref}`ragged <sec_encoding_ragged_columns>`, and\nso different individuals can have locations with different dimensions (i.e.,\none individual may have location `[]` and another `[0, 1, 0]`. This could\ntherefore be used to store other quantities (e.g., phenotype).\n\nThe `parents` column stores the ids of other individuals that are the parents of\nan individual. This can be used to store pedigree information for individuals.\nThis column is {ref}`ragged <sec_encoding_ragged_columns>` such that an\nindividual can have any number of parents.\n\nThe `metadata` column provides a location for client code to store\ninformation about each individual. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\n:::{note}\nThe distinction between `flags` and `metadata` is that flags\nholds information about a individual that the library understands, whereas\nmetadata holds information about a individual that the library *does not*\nunderstand. Metadata is for storing auxiliarly information that is\nnot necessary for the core tree sequence algorithms.\n:::\n\n\n(sec_edge_table_definition)=\n\n#### Edge Table\n\nAn **edge** defines a parent-child relationship between a pair of nodes\nover a specific sequence interval. The edge table contains five columns,\nall of which are mandatory except `metadata`:\n\n| Column        | Type       | Description                                |\n| :------------ | ---------- | -----------------------------------------: |\n| left          | double     | Left coordinate of the edge (inclusive).   |\n| right         | double     | Right coordinate of the edge (exclusive).  |\n| parent        | int32      | Parent node ID.                            |\n| child         | int32      | Child node ID.                             |\n| metadata      | binary     | Node {ref}`sec_metadata_definition`.       |\n\nEach row in an edge table describes a half-open genomic interval `[left, right)`\nover which the `child` inherited from the given `parent`.\nThe `left` and `right` columns are defined using double precision\nfloating point values. The `parent` and `child`\ncolumns specify integer IDs in the associated {ref}`sec_node_table_definition`.\n\nThe `metadata` column provides a location for client code to store\ninformation about each edge. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\nSee the {ref}`sec_edge_requirements` section for details on the properties\nrequired for a valid set of edges.\n\n\n(sec_site_table_definition)=\n\n#### Site Table\n\nA **site** defines a particular location along the genome in which\nwe are interested in observing the allelic state. The site table\ncontains three columns, of which `position` and `ancestral_state`\nare mandatory.\n\n| Column          | Type       | Description                                |\n| :-------------- | ---------- | -----------------------------------------: |\n| position        | double     | Position of site in genome coordinates.    |\n| ancestral_state | text       | The state at the root of the tree.         |\n| metadata        | binary     | Site {ref}`sec_metadata_definition`.       |\n\nThe `position` column is a floating point value defining the location\nof the site in question along the genome.\n\nThe `ancestral_state` column specifies the allelic state at the root\nof the tree, thus defining the state that nodes inherit if no mutations\nintervene. The column stores text character data of arbitrary length.\n\nThe `metadata` column provides a location for client code to store\ninformation about each site. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\nSee the {ref}`sec_site_requirements` section for details on the properties\nrequired for a valid set of sites.\n\n\n(sec_mutation_table_definition)=\n\n#### Mutation Table\n\nA **mutation** defines a change of allelic state on a tree at a particular site.\nThe mutation table contains five columns, of which `site`, `node` and\n`derived_state` are mandatory.\n\n| Column          | Type       | Description                                    |\n| :-------------- | ---------- | ---------------------------------------------: |\n| site            | int32      | The ID of the site the mutation occurs at.     |\n| node            | int32      | The node this mutation occurs at.              |\n| parent          | int32      | The ID of the parent mutation.                 |\n| time            | double     | Time at which the mutation occurred.           |\n| derived_state   | char       | The allelic state resulting from the mutation. |\n| metadata        | binary     | Mutation {ref}`sec_metadata_definition`.       |\n\nThe `site` column is an integer value defining the ID of the\n{ref}`site <sec_site_table_definition>` at which this mutation occurred.\n\nThe `node` column is an integer value defining the ID of the\nfirst {ref}`node <sec_node_table_definition>` in the tree below this mutation.\n\nThe  `time` column is a double precision floating point value recording how long ago\nthe mutation happened.\n\nThe `derived_state` column specifies the allelic state resulting from the mutation,\nthus defining the state that the `node` and any descendant nodes in the\nsubtree inherit unless further mutations occur. The column stores text\ncharacter data of arbitrary length.\n\nThe `parent` column is an integer value defining the ID of the mutation whose\nallelic state this mutation replaced. If there is no mutation at the\nsite in question on the path back to root, then this field is set to the\nnull ID (-1). (The `parent` column is only required in situations\nwhere there are multiple mutations at a given site. For\n\"infinite sites\" mutations, it can be ignored.)\n\nThe `metadata` column provides a location for client code to store\ninformation about each site. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\nSee the {ref}`sec_mutation_requirements` section for details on the properties\nrequired for a valid set of mutations.\n\n\n(sec_migration_table_definition)=\n\n#### Migration Table\n\n:::{note}\nEncoding migration in the migrations table is a legacy approach\nassociated with older versions of `msprime`; recording movement between\npopulations in the migration table is entirely optional, even when related\nnodes are assigned to different populations.\n:::\n\n:::{warning}\nThe migration table may be entirely removed from the `tskit` data model\nin the future. Meanwhile, a number of `tskit` functions, such as\n{meth}`~TreeSequence.simplify()` will raise an error if data exists in\nthe migrations table. \n:::\n\n:::{seealso}\nThe {ref}`msprime:sec_ancestry_record_migrations`\nsections and the associated discussion of\n{ref}`msprime:sec_demography_migration` in the `msprime` documentation.\n:::\n\nIn simulations, trees can be thought of as spread across space, and it is\nhelpful for inferring demographic history to record this history.\nMigrations are performed by individual ancestors, but might not be tagged by an\nindividual whose genome is tracked as a `node` (as in a discrete-deme model they are\nunlikely to be both a migrant and a most recent common ancestor).  So,\n`tskit` can record separately when a segment of ancestry has moved between\npopulations. This table is not required, even if different nodes come from\ndifferent populations.\n\n\n| Column     | Type     | Description                                            |\n| :--------- | -------- | -----------------------------------------------------: |\n| left       | double   | Left coordinate of the migrating segment (inclusive).  |\n| right      | double   | Right coordinate of the migrating segment (exclusive). |\n| node       | int32    | Node ID.                                               |\n| source     | int32    | Source population ID.                                  |\n| dest       | int32    | Destination population ID.                             |\n| time       | double   | Time of migration event.                               |\n| metadata   | binary   | Migration {ref}`sec_metadata_definition`.              |\n\n\nThe `left` and `right` columns are floating point values defining the\nhalf-open segment of genome affected (these need not exactly correspond to\nbreakpoints between edges). The `source` and `dest` columns record the IDs of\nthe respective populations (note that by `msprime` convention, \"source\" and\n\"destination\" are defined in reverse time, see\n{ref}`msprime:sec_demography_direction_of_time`.). The `time` column\nholds floating point values recording the time of the event, with migrations\nassumed to occur instantaneously. The `node` column records the ID of the child\nnode of the migrating segment; in consequence the population ID of the `node` will\nmatch the `src` ID (unless sequential migrations affect the same `node`, in which\ncase it will match the `src` value of the youngest of those migrations).\n\nThe `metadata` column provides a location for client code to store\ninformation about each migration. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\nSee the {ref}`sec_migration_requirements` section for details on the properties\nrequired for a valid set of migrations.\n\n\n(sec_population_table_definition)=\n\n#### Population Table\n\nA **population** defines a grouping of individuals that a node can\nbe said to belong to.\n\nThe population table contains one column, `metadata`.\n\n| Column     | Type     | Description                                |\n| :--------- | -------- | -----------------------------------------: |\n| metadata   | binary   | Population {ref}`sec_metadata_definition`. |\n\n\nThe `metadata` column provides a location for client code to store\ninformation about each population. See the {ref}`sec_metadata_definition` section for\nmore details on how metadata columns should be used.\n\nSee the {ref}`sec_population_requirements` section for details on the properties\nrequired for a valid set of populations.\n\n\n(sec_provenance_table_definition)=\n\n#### Provenance Table\n\n| Column    | Type  | Description                                                             |\n| :-------- | ----- | ----------------------------------------------------------------------: |\n| timestamp | char  | Timestamp in [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) format. |\n| record    | char  | Provenance record as JSON.                                              |\n\n\n(sec_metadata_definition)=\n\n### Metadata\n\nEach table (excluding provenance) has a metadata column for storing and passing along\ninformation that tskit does not use or interpret. See {ref}`sec_metadata` for details.\nThe metadata columns are {ref}`binary columns <sec_tables_api_binary_columns>`.\n\nWhen using the {ref}`sec_text_file_format`, metadata values are written as opaque\ntext. By default, :meth:`TreeSequence.dump_text` will base64-encode metadata values\nthat are stored as raw bytes (when ``base64_metadata=True``) so that binary data can\nbe safely printed and exchanged; in this case :func:`tskit.load_text` will base64-decode\nthe corresponding text fields back to bytes. When metadata has already been decoded\nto a structured Python object (for example via a metadata schema), the textual\nrepresentation written by :meth:`TreeSequence.dump_text` is the ``repr`` of that\nobject, and :func:`tskit.load_text` does not attempt to reconstruct the original\nstructured value from this representation. For reliable metadata round-tripping,\nprefer the native binary tree sequence file format over the text formats.\n\nThe tree sequence itself also has metadata stored as a byte array.\n\n\n(sec_valid_tree_sequence_requirements)=\n\n### Valid tree sequence requirements\n\nArbitrary data can be stored in tables using the classes in the\n{ref}`sec_tables_api`. The {meth}`TableCollection.tree_sequence` method\ncan be used to turn such a {class}`TableCollection` into an immutable\n{class}`TreeSequence` object, but this requires the tables to\nfulfil a specific set of requirements. In this\nsection we list these requirements, and explain their rationale.\nViolations of most of these requirements are detected when the\nuser attempts to load a tree sequence via {func}`tskit.load` or\n{meth}`TableCollection.tree_sequence`, raising an informative\nerror message. Some more complex requirements may not be detectable at load-time,\nand errors may not occur until certain operations are attempted.\nThese are documented below.\n\nAt the tree-sequence level, we require that the coordinate space has a finite,\nstrictly positive length; that is, the `sequence_length` attribute must be a\nfinite value greater than zero.\n\nThe Python API also provides tools that can transform a collection of\ntables into a valid collection of tables, so long as they are logically\nconsistent, see {ref}`sec_tables_api_creating_valid_tree_sequence`.\n\n\n(sec_individual_requirements)=\n\n#### Individual requirements\n\nIndividuals are a basic type in a tree sequence and are not defined with\nrespect to any other tables. Individuals can have a reference to their parent\nindividuals, if present these references must be valid or null (-1). An\nindividual cannot list itself as its own parent.\n\nA valid tree sequence does not require individuals to be sorted in any\nparticular order, and sorting a set of tables using {meth}`TableCollection.sort`\nhas no effect on individuals. However, individuals can be optionally sorted\nusing {meth}`TableCollection.sort_individuals`.\n\n(sec_node_requirements)=\n\n#### Node requirements\n\nGiven a valid set of individuals and populations, the requirements for\neach node are:\n\n- `time` must be a finite (non-NaN, non-infinite) value;\n- `population` must either be null (-1) or refer to a valid population ID;\n- `individual` must either be null (-1) or refer to a valid individual ID.\n\nAn ID refers to a zero-indexed row number in the relevant table,\nand so is \"valid\" if is between 0 and one less than the number of rows in the relevant table.\n\nThere are no requirements regarding the ordering of nodes with respect to time.\n\nSorting a set of tables using {meth}`TableCollection.sort`\nhas no effect on nodes.\n\n\n(sec_edge_requirements)=\n\n#### Edge requirements\n\nGiven a valid set of nodes and a sequence length {math}`L`, the simple\nrequirements for each edge are:\n\n- We must have finite coordinates with {math}`0 \\leq` `left` {math}`<` `right` {math}`\\leq L`;\n- `parent` and `child` must be valid node IDs;\n- `time[parent]` > `time[child]`;\n- edges must be unique (i.e., no duplicate edges are allowed).\n\nThe first requirement simply ensures that the interval makes sense. The\nthird requirement ensures that we cannot have loops, since time is\nalways increasing as we ascend the tree.\n\nTo ensure a valid tree sequence there is one further requirement:\n\n- The set of intervals on which each node is a child must be disjoint.\n\nThis guarantees that we cannot have contradictory edges (i.e.,\nwhere a node `a` is a child of both `b` and `c`), and ensures that\nat each point on the sequence we have a well-formed forest of trees.\n\nIn the interest of algorithmic efficiency, edges must have the following\nsortedness properties:\n\n- All edges for a given parent must be contiguous;\n- Edges must be listed in nondecreasing order of `parent` time;\n- Within the edges for a given `parent`, edges must be sorted\n  first by `child` ID and then by `left` coordinate.\n\nViolations of these requirements are detected at load time.\nThe {meth}`TableCollection.sort` method will ensure that these sortedness\nproperties are fulfilled.\n\n\n(sec_site_requirements)=\n\n#### Site requirements\n\nGiven a valid set of nodes and a sequence length {math}`L`, the simple\nrequirements for a valid set of sites are:\n\n- We must have a finite coordinate with {math}`0 \\leq` `position` {math}`< L`;\n- `position` values must be unique.\n\nFor simplicity and algorithmic efficiency, sites must also:\n\n- Be sorted in increasing order of `position`.\n\nViolations of these requirements are detected at load time.\nThe {meth}`TableCollection.sort` method ensures that sites are sorted\naccording to these criteria.\n\n\n(sec_mutation_requirements)=\n\n#### Mutation requirements\n\nGiven a valid set of nodes, edges and sites, the\nrequirements for a valid set of mutations are:\n\n- `site` must refer to a valid site ID;\n- `node` must refer to a valid node ID;\n- `time` must either be `UNKNOWN_TIME` (a NAN value which indicates\n  the time is unknown) or be a finite value which is greater or equal to the\n  mutation `node`'s `time`, less than the `node` above the mutation's\n  `time` and equal to or less than the `time` of the `parent` mutation\n  if this mutation has one. If one mutation on a site has `UNKNOWN_TIME` then\n  all mutations at that site must, as a mixture of known and unknown is not valid.\n- `parent` must either be the null ID (-1), if the mutation has no parent, or a\n  valid mutation ID within the current table. \n\nFurthermore,\n\n- The `parent` value must be consistent with the topology of the tree at the site\n  of the mutation, such that a path from the child mutation to the parent mutation\n  exists without passing through any other mutations at the same site.\n\nFor simplicity and algorithmic efficiency, mutations must also:\n\n- be sorted by site ID;\n- when there are multiple mutations per site, mutations should be ordered by\n  decreasing time, if known, and parent mutations must occur\n  **before** their children (i.e. if a mutation with ID {math}`x` has\n  `parent` with ID {math}`y`, then we must have {math}`y < x`).\n\nViolations of these sorting requirements are detected at load time.\nThe {meth}`TableCollection.sort` method ensures that mutations are sorted\naccording site ID, but does not at present enforce that mutations occur\nafter their parent mutations.\n\nSilent mutations (i.e., mutations for which the ancestral and derived\nstates are the same) are allowed.\nFor example, if we have a site with ancestral state\nof \"A\" and a single mutation with derived state \"A\", then this\nmutation does not result in any change of state.\n(This addition was made in release C_0.99.11.)\n\n:::{note}\nAs `tskit.UNKNOWN_TIME` is implemented as a `NaN` value, tests for equality\nwill always fail. Use `tskit.is_unknown_time` to detect unknown values.\n:::\n\n\n(sec_migration_requirements)=\n\n#### Migration requirements\n\nGiven a valid set of nodes and edges, the requirements for a valid set of\nmigrations are:\n\n- `left` and `right` must be finite values that lie within the tree sequence\n  coordinate space (i.e., from 0 to `sequence_length`), with {math}`0 \\leq`\n  `left` {math}`<` `right` {math}`\\leq L`;\n- `node` must be a valid node ID;\n- if population references are checked, `source` and `dest` must be valid\n  population IDs;\n- `time` must be a finite value.\n\nTo enable efficient processing, migrations must also be sorted by\nnondecreasing `time` value.\n\nConceptually, a migration records that a segment of ancestry for the given\n`node` moves between populations along the tree. In typical demographic\nmodels we expect:\n\n- `time` to lie strictly between the time of the migrating `node` and the time\n  of any ancestral node from which that node inherits on the segment\n  `[left, right)`;\n- the `population` of any such ancestor to match the `source` population,\n  until another `migration` intervenes.\n\nThese conceptual relationships are not currently validated. It is\nthe responsibility of code that creates migrations to satisfy them where\nrequired.\n\nNote in particular that there is no requirement that adjacent migration records\nshould be \"squashed\". That is, we can have two records `m1` and `m2`\nsuch that `m1.right` = `m2.left` and with the `node`, `source`,\n`dest` and `time` fields equal. This is because such records will usually\nrepresent two independent ancestral segments migrating at the same time, and\nas such squashing them into a single record would result in a loss of information.\n\n\n(sec_population_requirements)=\n\n#### Population requirements\n\nThere are no requirements on a population table.\n\n\n(sec_provenance_requirements)=\n\n#### Provenance requirements\n\nThe `timestamp` column of a provenance table should be in\n[ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) format.\n\nThe `record` column stores a JSON document describing how and where the tree sequence\nwas produced. For tree sequences generated by tskit and related tools, this JSON is\nexpected to conform to the :ref:`provenance schema <sec_provenance_schema>` described\nin {ref}`sec_provenance`. \n\n\n(sec_table_indexes)=\n\n### Table indexes\n\nTo efficiently iterate over the trees in a tree sequence, `tskit` uses\nindexes built on the edges. To create a tree sequence from a table collection\nthe tables must be indexed; the {meth}`TableCollection.build_index` method\ncan be used to create an index on a table collection if necessary.\n\n:::{todo}\nAdd more details on what the indexes actually are.\n:::\n\n\n(sec_data_model_saving)=\n\n### Saving to file\n\nWhen serializing (e.g. storing a {class}`TreeSequence` to disk using\n{meth}`dump<TreeSequence.dump>`), the underlying tables are stored along with the\nindexes, top-level metadata, attributes such as the sequence length and time units, and\nthe {ref}`sec_data_model_reference_sequence` if it exists. {func}`Loading <load>` such a\nfile returns an immutable tree sequence object, with pre-calculated indexes immediately\navailable. See the {ref}`sec_tree_sequence_file_format` section for more details.\n\nAlthough data in a raw {class}`TableCollection` need not conform to the\n{ref}`sec_valid_tree_sequence_requirements`, it too can be\n{meth}`dumped <TableCollection.dump>` to a file (with indexes stored if they exist).\n\n\n(sec_data_model_data_encoding)=\n\n## Data encoding\n\nIn this section we describe the high-level details of how data is encoded in\ntables. Tables store data in a **columnar** manner. In memory, each\ntable is organised as a number of blocks of contiguous storage, one for\neach column. There are many advantages to this approach, but the key\nproperty for us is that allows for very efficient transfer of data\nin and out of tables. Rather than inserting data into tables row-by-row\n(which can be done in Python\n{ref}`using the add_row methods<sec_tables_api_accessing_table_data>`), it is much\nmore efficient to add many rows at the same time by providing pointers to blocks of\ncontiguous memory. By taking this approach, we can work with tables containing\ngigabytes of data very efficiently.\n\nFor instance, in the {ref}`sec_python_api` we can use the\n[numpy Array API](https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.html)\nto allow us to define and work with numeric arrays of the required types.\nNode IDs, for example, are defined using 32 bit integers. Thus, the\n`parent` column of an {ref}`sec_edge_table_definition`'s with `n` rows\nis a block `4n` bytes.\n\nThis approach is very straightforward for columns in which each row contains\na fixed number of values. However, dealing with columns containing a\n**variable** number of values is more problematic.\n\n\n(sec_encoding_ragged_columns)=\n\n### Encoding ragged columns\n\nA **ragged** column is a column in which the rows are not of a fixed length.\nFor example, {ref}`sec_metadata_definition` columns contain binary of data of arbitrary\nlength. To encode such columns in the tables API, we store **two** columns:\none contains the flattened array of data and another stores the **offsets**\nof each row into this flattened array. Consider an example:\n\n```{code-cell} ipython3\nimport tskit\n\ns = tskit.SiteTable()\ns.add_row(0, \"A\")\ns.add_row(0, \"\")\ns.add_row(0, \"TTT\")\ns.add_row(0, \"G\")\ns\n```\n\nIn this example we create a {ref}`sec_site_table_definition` with four rows,\nand then display this table. We can see that the second row has the\nempty string as its `ancestral_state`, and the third row's\n`ancestral_state` is `TTT`. Now let's print out the columns:\n\n```{code-cell} ipython3\nprint(\"Ancestral state (numerical): \", s.ancestral_state)\nprint(\"Ancestral state (as bytes): \", s.ancestral_state.tobytes())\nprint(\"Ancestral state offsets: \", s.ancestral_state_offset)\n```\n\nWhen we print out the tables `ancestral_state`\ncolumn, we see that its a numpy array of length 5: this is the\nflattened array of [ASCII encoded](https://en.wikipedia.org/wiki/ASCII)\nvalues for these rows. When we decode these bytes using the\nnumpy {meth}`tobytes<numpy:numpy.ndarray.tobytes>` method, we get the string 'ATTTG'.\nThis flattened array can now be transferred efficiently in memory like any other column\nWe then use the `ancestral_state_offset` column to allow us find the individual rows.\nFor a row `j`:\n\n    ancestral_state[ancestral_state_offset[j]: ancestral_state_offset[j + 1]]\n\ngives us the array of bytes for the ancestral state in that row. For example, here is\nrow 2:\n\n```{code-cell} ipython3\ns.ancestral_state[s.ancestral_state_offset[2]: s.ancestral_state_offset[3]].tobytes()\n```\n\nFor a table with `n` rows, any offset column must have `n + 1`\nvalues, the first of which is always `0`. The values in this column must be\nnondecreasing, and cannot exceed the length of the ragged column in question.\n\n\n(sec_data_model_reference_sequence)=\n\n## Reference sequence\n\nAlong with the topology and site information stored in the tskit tree\nsequence, we can also optionally store an associated reference sequence.\nReference sequences are flexible, and can consist simply of some\nmetadata recording which assembly build a tree sequence uses, or\nstoring the entire sequence itself.\n\n:::{warning}\nReference sequence support in tskit is preliminary. Reference sequence\ndata can be stored and accessed via the C API. Support in the Python\nAPI is limited to usage in {meth}`.TreeSequence.alignments` and\nrelated methods, where it provides the default values for nucleotide\npositions between {ref}`sites<sec_data_model_definitions_site>`.\n:::\n\n(sec_data_model_tree_structure)=\n\n## Tree structure\n\n(sec_data_model_quintuply_linked_trees)=\n\n### Quintuply linked trees\n\nTree structure in `tskit` is encoded internally as a \"quintuply\nlinked tree\", a generalisation of the triply linked tree encoding\nused by Knuth and others. Nodes are represented by their integer\nIDs, and their relationships to other nodes are recorded in the\n`parent`, `left_child`, `right_child`, `left_sib` and\n`right_sib` arrays. For example, consider the following tree\nand its associated arrays:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\nimport io\n\nimport tskit\nfrom IPython.display import SVG\n\nnodes = \"\"\"\\\nid      is_sample   time\n0       1           0\n1       1           0\n2       1           0\n3       1           0\n4       1           0\n5       0           1\n6       0           2\n7       0           3\n\"\"\"\nedges = \"\"\"\\\nleft    right   parent  child\n0       60      5       4,3\n0       40      6       2\n0       60      6       1,0\n20      40      6       5\n0       20      7       5\n40      60      7       5\n0       60      7       6\n40      60      7       2\n\"\"\"\nts = tskit.load_text(\n    nodes=io.StringIO(nodes), edges=io.StringIO(edges), strict=False\n)\n\nSVG(ts.first().draw_svg(time_scale=\"rank\"))\n```\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\nfrom IPython.display import HTML\n\ndef html_quintuple_table(ts, show_virtual_root=False, show_convenience_arrays=False):\n    tree = ts.first()\n    columns = [\"node\", \"parent\", \"left_child\", \"right_child\", \"left_sib\", \"right_sib\"]\n    convenience_arrays = [\"num_children\", \"edge\"]\n    if show_convenience_arrays:\n        columns += convenience_arrays\n    data = {k:[] for k in columns}\n    for u in sorted(tree.nodes(tree.virtual_root if show_virtual_root else None)):\n        for colname in columns:\n            data[colname].append(u if colname == \"node\" else getattr(tree, colname)(u))\n    html = \"<tr>\"\n    for colname in columns:\n        html += f\"<th>{colname}</th>\"\n    html += \"</tr>\"\n    for u in range(len(data[\"node\"])):\n        html += \"<tr>\" if u < ts.num_nodes else \"<tr style='font-style: italic; color:red'>\"\n        for colname in columns:\n            html += f\"<td>{data[colname][u]}</td>\"\n        html += \"</tr>\"\n    return \"<table>\" + html + \"</table>\"\n\nHTML(html_quintuple_table(ts))\n```\n\nEach node in the tree corresponds to a row in this table, and\nthe columns are the individual arrays recording the quintuply linked\nstructure. Thus, we can see that the parent of nodes `0`, `1`, and `2`\nis `6`. Similarly, the left child of `6` is `0` and the\nright child of `6` is `2`. The `left_sib` and `right_sib` arrays\nthen record each nodes sibling on its left or right, respectively;\nhence the right sib of `0` is `1`, and the right sib of `1` is `2`.\nThus, sibling information allows us to efficiently support trees\nwith arbitrary numbers of children. In each of the five pointer arrays,\nthe null node (-1) is used to indicate the end of a path; thus,\nfor example, the parent of `7` and left sib of `0` are null.\n\nPlease see this {ref}`example <sec_c_api_examples_tree_traversals>` for\ndetails of how to use the quintuply linked structure in the C API.\n\n:::{note}\nFor many applications we do not need the quintuply linked trees,\nand (for example) the `left_sib` and `right_child` arrays can be\nignored. The reason for using a quintuply instead of triply linked\nencoding is that it is not possible to efficiently update the trees\nas we move along the sequence without the quintuply linked structure.\n:::\n\n:::{warning}\nThe left-to-right ordering of nodes is determined by the order\nin which edges are inserted into the tree during iteration along the sequence.\nThus, if we arrive at the same tree by iterating from different directions,\nthe left-to-right ordering of nodes may be different! The specific\nordering of the children of a node should therefore not be depended on.\n:::\n\n### Convenience arrays\n \nSimilar to the five arrays representing the {ref}`quintuply linked tree<sec_data_model_quintuply_linked_trees>`,\nconvenience arrays track information on each node in the tree. These arrays are not essential to \nrepresent the trees within a tree sequence. However, they can be useful for\nspecific algorithms (e.g. when computing tree (im)balance metrics). Two \nconvenience arrays have been implemented so far: \n{attr}`Tree.num_children_array` and {attr}`Tree.edge_array`.\n\nHere is the table above with the convenience arrays also shown:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\n\nHTML(html_quintuple_table(ts, show_convenience_arrays=True))\n```\n\n(sec_data_model_tree_roots)=\n\n### Roots\n\nIn the `tskit` {class}`trees <Tree>` we have shown so far, all the sample nodes have\nbeen connected to each other. This means each tree has only a single {attr}`~Tree.root`\n(i.e. the oldest node found when tracing a path backwards in time from any sample).\nHowever, a tree can contain {ref}`sec_data_model_tree_isolated_sample_nodes`\nor unconnected topologies, and can therefore have *multiple* {attr}`~Tree.roots`.\nHere's an example, created by deleting the edge joining `6` and `7` in the tree sequence\nused above:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\ntables = ts.dump_tables()\ntables.edges.truncate(ts.num_edges - 1)\nts_multiroot = tables.tree_sequence()\nSVG(ts_multiroot.first().draw_svg(time_scale=\"rank\"))\n```\n\nIn `tskit` terminology, this should *not* be thought\nof as two separate trees, but as a single multi-root \"tree\", comprising\ntwo unlinked topologies. This fits with the definition of a tree\nin a tree sequence: a tree describes the ancestry of the same\nfixed set of sample nodes at a single position in the genome. In the\npicture above, *both* the left and right hand topologies are required\nto describe the genealogy of samples 0..4 at this position.\n\nHere's what the entire tree sequence now looks like:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\nSVG(ts_multiroot.draw_svg(time_scale=\"rank\"))\n```\n\nFrom the terminology above, it can be seen that this tree sequence consists of only\nthree trees (not five). The first tree, which applies from position 0 to 20, is the one\nused in our example. As we saw, removing the edge connecting node 6 to node 7 has\ncreated a tree with 2 roots (and thus 2 unconnected topologies in a single tree).\nIn contrast, the second tree, from position 20 to 40, has a single root. Finally the\nthird tree, from position 40 to 60, again has two roots.\n\n(sec_data_model_tree_root_threshold)=\n\n#### The root threshold\n\nThe roots of a tree are defined by reference to the\n{ref}`sample nodes<sec_data_model_definitions_sample>`. By default, roots are the unique\nendpoints of the paths traced upwards from the sample nodes; equivalently, each root\ncounts one or more samples among its descendants (or is itself a sample node). This is\nthe case when the {attr}`~Tree.root_threshold` property of a tree is left at its default\nvalue of `1`. If, however, the `root_threshold` is (say) `2`, then a node is\nconsidered a root only if it counts at least two samples among its descendants. Setting\nan alternative `root_threshold` value can be used to avoid visiting\n{ref}`sec_data_model_tree_isolated_sample_nodes`, for example when dealing with trees\ncontaining {ref}`sec_data_model_missing_data`.\n\n(sec_data_model_tree_virtual_root)=\n\n#### The virtual root\n\nTo access all the {attr}`~Tree.roots` in a tree, tskit uses a special additional node\ncalled the **virtual root**. This is primarily a bookkeeping device, and\ncan normally be ignored: it is not plotted in any visualizations and\ndoes not exist as an independent node in the node table.\nHowever, the virtual root can be useful in certain algorithms because its\nchildren are defined as all the \"real\" roots in a tree. Hence by\ndescending downwards from the virtual root, it is possible\nto access the entire genealogy at a given site, even in a multi-root\ntree. In the quintuply linked tree encoding, the virtual root appears as an\nextra element at the end of each of the tree arrays. Here's the same table\nas before but with the virtual root also shown, using red italics to\nemphasise that it is not a \"real\" node:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\nHTML(html_quintuple_table(ts_multiroot, show_virtual_root=True))\n```\n\nYou can see that the virtual root (node 8) has 6 as its left child and 7\nas its right child. Importantly, though, this is an asymmetric\nrelationship: the parent of the \"real\" roots 6 and 7 is null\n(-1) and *not* the virtual root. Hence when we ascend up the tree from the\nsample nodes to their parents, we stop at the \"real\" roots, and never\nencounter the virtual root.\n\nBecause the virtual root can be useful in some algorithms, it can\noptionally be returned in traversal orders (see {meth}`.Tree.nodes`).\nThe following properties apply:\n\n- All trees in a tree sequence share the same virtual root.\n- The virtual root's ID is always equal to the number of nodes in the tree sequence\n  (i.e. the length of the node table). However, there is **no corresponding row**\n  in the node table, and any attempts to access information about the\n  virtual root via either the tree sequence or tables APIs will fail with\n  an out-of-bounds error.\n- The parent and siblings of the virtual root are null.\n- The time of the virtual root is defined as positive infinity (if\n  accessed via {meth}`.Tree.time`). This is useful in defining the\n  time-based node traversal orderings.\n- The virtual root is the parent of no other node---roots do **not**\n  have parent pointers to the virtual root.\n\n\n(sec_data_model_tree_isolated_nodes)=\n\n### Isolated nodes\n\nIn a tree, it is possible for a node to have no children and no parent. Such a node is\nsaid to be *isolated*, meaning that we don't know anything about its relationships\nover a specific genomic interval. This is commonly true for ancestral genomes, which\noften have large regions that have\nnot been inherited by any of the {ref}`sample nodes<sec_data_model_definitions_sample>`\nin the tree sequence, and therefore regions about which we know nothing. This is true,\nfor example, of node 7 in the middle tree of our previous example, which is why it is\nnot plotted on that tree:\n\n```{code-cell} ipython3\ndisplay(SVG(ts_multiroot.draw_svg(time_scale=\"rank\")))\nfor tree in ts_multiroot.trees():\n    print(\n        \"Node 7\",\n        \"is\" if tree.is_isolated(7) else \"is not\",\n        \"isolated from position\",\n        tree.interval.left,\n        \"to\",\n        tree.interval.right,\n    )\n```\n\n\n(sec_data_model_tree_isolated_sample_nodes)=\n\n#### Isolated sample nodes\n\nIt is also possible for a {ref}`sample node<sec_data_model_definitions_sample>`\nto be isolated. As long as the {ref}`root threshold<sec_data_model_tree_root_threshold>`\nis set to its default value, an isolated *sample* node will count as a root, and\ntherefore be considered as being present on the tree (meaning it will be\nreturned by the {meth}`Tree.nodes`\nand {meth}`Tree.samples` methods). When displaying a tree, isolated samples are shown\nunconnected to other nodes. To illustrate, we can remove the edge from node 2 to node 7:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\ntables = ts_multiroot.dump_tables()\ntables.edges.set_columns(\n    **tables.edges[(tables.edges.parent != 7) | (tables.edges.child != 2)].asdict())\nts_isolated = tables.tree_sequence()\nSVG(ts_isolated.draw_svg(time_scale=\"rank\"))\n```\n\nThe rightmost tree now contains an isolated sample node (node 2), which counts as\none of the {ref}`sec_data_model_tree_roots` of the tree. This tree therefore has three\nroots, one of which is node 2:\n\n```{code-cell} ipython3\nrightmost_tree = ts_isolated.at_index(-1)\nprint(rightmost_tree.num_roots, \"roots in the rightmost tree, with IDs\", rightmost_tree.roots)\nprint(\n    \"IDs of isolated samples in this tree:\",\n    [u for u in rightmost_tree.samples() if rightmost_tree.is_isolated(u)],\n)\n```\n\nIn `tskit`, isolated sample nodes are closely associated with the encoding of\n{ref}`sec_data_model_missing_data`.\n\n\n(sec_data_model_tree_dead_leaves_and_branches)=\n\n### Dead leaves and branches\n\nIn a `tskit` tree, a *leaf node* is defined as a node without any children. The\nimplications of this turn out to be slighly unintuitive, and so are worth briefly\ndocumenting here. Firstly, the same node can be a leaf in one tree, and not a leaf\nin the next tree along the tree sequence. Secondly all isolated nodes must be leaves\n(as by definition they have no children). Thirdly sample nodes need not be leaves\n(they could be \"internal samples\"); likewise leaf nodes need not be samples.\n\nNode 7 in the example above provides a good case study. Note that it is a root node with\nat least one child (i.e. not a leaf) in trees 0 and 2; in contrast in tree 1 it is\nisolated. Strictly, because it is isolated in tree 1, it is also a leaf node there,\nalthough it is not attached to a root, not a sample, and is therefore not plotted. In\nthis case, in that tree we can think of node 7 as a \"dead leaf\" (and we don't normally\nplot dead leaves). In fact, in a large tree sequence of many trees, most ancestral nodes\nwill be isolated in any given tree, and therefore most nodes in such a tree will be of\nthis sort. However, these dead leaves are excluded from most calculations on trees,\nbecause algorithms usually traverse the tree by starting at a root and working down,\nor by starting at a sample and working up. Hence when we refer to the leaves of a tree,\nit is usually shorthand for the leaves **on** the tree (that is, attached via branches,\nto one of the the tree roots). Dead leaves are excluded from this definition.\n\nNote that it is also possible to have trees in which there are \"dead branches\": that is\nsections of topology which are not accessible from a root, and whose tips are all\ndead leaves. Although valid, this is a relatively unusual state of affairs, and such\nbranches are not plotted by the standard {ref}`sec_tskit_viz` methods. The\n{meth}`Tree.nodes` method will not, by default, traverse through dead branches, although\nit can be made to do so by specifying the ID of a dead node as the root for traversal.\n\n\n(sec_data_model_genetic_data)=\n\n## Encoding genetic variation\n\nGenetic variation is incorporated into a tree sequence by placing\n{ref}`mutations<sec_mutation_table_definition>` at\n{ref}`sites<sec_mutation_table_definition>` along the genome.\nThe genotypes of the different samples at each site\ncan be found by using the tree to calculate which mutations are inherited by\nthe different samples. This is the fundamental basis of how tree sequences\nefficiently encode DNA sequences, and is explained in depth elsewhere\n(e.g. {ref}`in the tutorials<sec_what_is_dna_data>`).\n\nBelow, we discuss some implications of this encoding in more detail, in particular\nthe way in which it can be used to model missing data.\n\n(sec_data_model_missing_data)=\n\n### Missing data\n\nIf, at a particular genomic position, a node is\n{ref}`isolated<sec_data_model_tree_isolated_nodes>` *and* additionally has\nno mutations directly above it, its genotype at that position is considered to be\nunknown (however, if there is a mutation above an isolated node, it\ncan be thought of as saying directly what the genotype is, and so renders the\ngenotype at that position not missing).\n\nBy way of illustration, we'll use the {meth}`~TableCollection.delete_intervals` method\nto remove all knowledge of the ancestry in the\nmiddle portion of the previous example (say from position 15 to 45) sprinkle\non some mutations, and make sure there are sites at every position:\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\n\nimport numpy as np\nimport msprime\n\ntables = msprime.sim_mutations(ts_isolated, rate=0.1, random_seed=123).dump_tables()\ntables.delete_intervals([[15, 45]], simplify=False)\nmissing_sites = np.setdiff1d(np.arange(tables.sequence_length), tables.sites.position)\nfor pos in missing_sites:\n    tables.sites.add_row(position=pos, ancestral_state=\"A\")  # Add sites at every pos\ntables.sort()\nmissing_ts = tables.tree_sequence()\nSVG(missing_ts.draw_svg())\n```\n\n\nThe middle section of the genome now has no ancestry at all, and therefore for any site\nthat is in this region, the genotypic state that it is assigned is a special value\n`tskit.MISSING_DATA`, or `-1`. The {meth}`~TreeSequence.haplotypes()` method, which\noutputs the actual allelic state for each sample, defaults to outputting an `N` at\nthese sites. Therefore where any sample node is isolated, the haplotype will show\nan `N`, indicating the DNA sequence is unknown. This will be so not only in the\nmiddle of all of the sample genomes, but also at the right hand end of the genome of\nsample 2, as it is an isolated sample node in the rightmost tree:\n\n```{code-cell} ipython3\nfor i, h in enumerate(missing_ts.haplotypes()):\n    print(f\"Sample {i}: {h}\")\n```\n\nSee the {meth}`TreeSequence.variants` method and {class}`Variant` class for\nmore information on how missing data is represented in variant data.\n\n\n(sec_gotchas)=\n\n## Possibly surprising consequences of the data model\n\nThis is a section of miscellaneous issues that might trip even an experienced user up,\nalso known as \"gotchas\".\nThe current examples are quite uncommon, so can be ignored for most purposes,\nbut the list may be expanded in the future.\n\n### Unrelated material\n\nUsually, all parts of a tree sequence are ancestral to at least one sample,\nsince that's essentially the definition of a sample: the genomes that\nwe're describing the ancestry of.\nHowever, in some cases there will be portions of the tree sequence from which\nno samples inherit - notably, the result of a forwards simulation that has\nnot been simplified.\nIn fact, if the simulation has not coalesced,\none can have entire portions of some marginal tree that are\nunrelated to any of the samples\n(for instance, an individual in the initial generation of the simulation\nthat had no offspring).\nThis can lead to a gotcha:\nthe *roots* of a tree are defined to be only those roots *reachable from the samples*\n(and, furthermore, reachable from at least `root_threshold` samples;\nsee {meth}`TreeSequence.trees`).\nSo, our unlucky ancestor would not appear in the list of `roots`, even though\nif we drew all the relationships provided by the tree sequence,\nthey'd definitely be a root.\nFurthermore, only nodes *reachable from a root* are included in the\n{meth}`Tree.nodes`. So, if you iterate over all the nodes in each marginal tree,\nyou won't see those parts of the tree sequence that are unrelated to the samples.\nIf you need to get those, too, you could either\nwork with the {meth}`TreeSequence.edge_diffs` directly,\nor iterate over all nodes (instead of over {meth}`Tree.nodes`).\n"
  },
  {
    "path": "docs/development.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_development)=\n\n\n# Development\n\nIf you would like to add some features to `tskit`, this\ndocumentation should help you get set up and contributing.\nPlease help us to improve the documentation by either\nopening an [issue](http://github.com/tskit-dev/tskit/issues) or\n[pull request](http://github.com/tskit-dev/tskit/pulls) if you\nsee any problems.\n\nThe tskit-dev team strives to create a welcoming and open environment for\ncontributors; please see our\n[code of conduct](https://github.com/tskit-dev/.github/blob/main/CODE_OF_CONDUCT.md) for\ndetails. We wish our code and documentation to be\n[inclusive](https://chromium.googlesource.com/chromium/src/+/master/styleguide/inclusive_code.md)\nand in particular to be gender and racially neutral.\n\n(sec_development_repo_admin)=\n\n\n## Repo administration\n\ntskit is one of several packages in the tskit-dev ecosystem. Shared conventions\nfor CI workflows, dependency management, repository layout, and releases are\ndocumented in the\n[repo administration guide](https://github.com/tskit-dev/.github/blob/main/repo_administration.md)\nin the `tskit-dev/.github` repository. Maintainers should read that document\nbefore making changes to CI configuration, dependency groups, or the release process.\n\n\n(sec_development_structure)=\n\n\n## Project structure\n\nTskit is a multi-language project, which is reflected in the directory\nstructure:\n\n- The `python` directory contains the Python library and command line interface,\n  which is what most contributors are likely to be interested in. Please\n  see the {ref}`sec_development_python` section for details. The\n  low-level {ref}`sec_development_python_c` is also defined here.\n\n- The `c` directory contains the high-performance C library code. Please\n  see the {ref}`sec_development_c` for details on how to contribute.\n\n- The `docs` directory contains the source for this documentation,\n  which covers both the Python and C APIs. Please see the {ref}`sec_development_documentation`\n  for details.\n\nThe remaining files in the root directory of the project are for\ncontrolling {ref}`sec_development_continuous_integration` providers\nand other administrative purposes.\n\nPlease see the {ref}`sec_development_best_practices` section for\nan overview of how to contribute a new feature to `tskit`.\n\n(sec_development_getting_started)=\n\n\n## Getting started\n\n(sec_development_getting_started_requirements)=\n\n\n### Requirements\n\nTo develop the Python code you will need a working C compiler and a\nand some build utilities. Additionally, the doxygen package\nis required for building the C API documentation.\nOn Debian/Ubuntu we can install these with:\n\n```bash\nsudo apt install build-essential doxygen\n```\n\nOn macOS, either `brew install doxygen` or\n`sudo port install doxygen` should get doxygen.\nYou'll also need a \"essential build\" tools:\na compiler (`gcc`) and a few other things (e.g., `make`).\n\nAll Python development is managed using [uv](https://docs.astral.sh/uv/),\nwhich takes the place of virtual/conda environments.\nIt is not strictly necessary to use uv in order to make small changes, but\nif you don't use it, you'll need to figure out how to install python\ndependencies on your own,\nand the development workflows of all tskit-dev packages are organised around\nusing uv, and therefore we strongly recommend using it. Uv is straightforward\nto install, and not invasive (existing Python installations can be completely\nisolated if you don't use features like ``uv tool`` etc which update your\n``$HOME/.local/bin``). Uv manages an isolated local environment per project\nand allows us to deterministically pin package versions and easily switch\nbetween Python versions, so that CI environments can be replicated exactly\nlocally.\n\nThe packages needed for development are specified as dependency groups\nin ``python/pyproject.toml`` and managed with [uv](https://docs.astral.sh/uv/).\nInstall all development dependencies by running:\n\n```bash\ncd python\nuv sync\n```\n\nSince `uv` operates from the `python/` subdirectory,\n**all `uv` commands below must be run from within that subdirectory**;\notherwise errors like \"No such file or directory\" will occur.\nThe lock file lives at `python/uv.lock` and must be kept up to date. Run\n`uv lock` after any change to the dependencies in `python/pyproject.toml`.\n\nA few extra dependencies are required if you wish to work on the\n{ref}`C library <sec_development_c_requirements>`.\n\n(sec_development_getting_started_environment)=\n\n\n### Environment\n\nTo get a local git development environment, please follow these steps:\n\n- Make a fork of the tskit repo on [GitHub](http://github.com/tskit-dev/tskit)\n- Clone your fork into a local directory:\n  ```bash\n  git clone git@github.com:YOUR_GITHUB_USERNAME/tskit.git\n  ```\n- Install the {ref}`sec_development_workflow_prek` pre-commit hook\n  (again from the ``python/`` subdirectory):\n  ```bash\n  uv run prek install\n  ```\n\nSee the {ref}`sec_development_workflow_git` section for detailed information\non the recommended way to use git and GitHub.\n\n(sec_development_workflow)=\n\n\n## Workflow\n\n(sec_development_workflow_git)=\n\n\n### Git workflow\n\nIf you would like to make an addition/fix to tskit, then follow the steps below\nto get things set up.\nIf you would just like to review someone else's proposed changes\n(either to the code or to the docs), then\nskip to {ref}`sec_development_workflow_anothers_commit`.\n\n0.  Open an [issue](http://github.com/tskit-dev/tskit/issues) with your proposed\n    functionality/fix. If adding or changing the public API close thought should be given to\n    names and signatures of proposed functions. If consensus is reached that your\n    proposed addition should be added to the codebase, proceed!\n\n1. Make your own [fork](https://help.github.com/articles/fork-a-repo/)\n   of the `tskit` repository on GitHub, and\n   [clone](https://help.github.com/articles/cloning-a-repository/)\n   a local copy as detailed in {ref}`sec_development_getting_started_environment`.\n\n2. Make sure that your local repository has been configured with an\n   [upstream remote](\n   https://help.github.com/articles/configuring-a-remote-for-a-fork/):\n   ```bash\n   git remote add upstream https://github.com/tskit-dev/tskit.git\n   ```\n\n3. Create a \"topic branch\" to work on. One reliable way to do it\n   is to follow this recipe:\n   ```bash\n   git fetch upstream\n   git checkout -b topic_branch_name upstream/main\n   ```\n\n4. Write your code following the outline in {ref}`sec_development_best_practices`.\n   As you work on your topic branch you can add commits to it. Once you're\n   ready to share this, you can then open a\n   [pull request (PR)](https://help.github.com/articles/about-pull-requests/). This can be done at any\n   time! You don't have to have code that is completely functional and tested to get\n   feedback. Use the drop-down button to create a \"draft PR\" to indicate that it's not\n   done, and explain in the comments what feedback you need and/or what you think needs\n   to be done.\n\n5. As you code it is best to\n   [rebase](https://stdpopsim.readthedocs.io/en/latest/development.html#rebasing) your\n   work onto the `main` branch periodically (e.g. once a week) to keep up with changes.\n   If you merge `main` via `git pull upstream main`\n   it will create a much more complex rebase when your code is finally ready to be\n   incorporated into the main branch, so should be avoided.\n\n6. Once you're done coding add content to the tutorial and other documentation pages if\n   appropriate.\n\n7. Update the change logs at `python/CHANGELOG.rst` and `c/CHANGELOG.rst`, taking care\n   to document any breaking changes separately in a \"breaking changes\" section.\n\n8. Push your changes to your topic branch and either open the PR or, if you\n   already opened a draft PR change it to a non-draft PR by clicking \"Ready to\n   Review\".\n\n9. The tskit community will review the code, asking you to make changes where appropriate.\n   This usually takes at least two rounds of review.\n\n10. Once the review process is complete, squash the commits to the minimal set of changes -\n    usually one or two commits. Please follow\n    [this guide](https://stdpopsim.readthedocs.io/en/stable/development.html#rebasing) for\n    step-by-step instructions on rebasing and squashing commits.\n\n11. Your PR will be merged, time to celebrate! 🎉🍾\n\n\n(sec_development_workflow_anothers_commit)=\n\n\n### Checking out someone else's pull request\n\nSometimes you want to just check out someone else's pull request,\nfor the purpose of trying it out and giving them feedback.\nTo do this, you first need your own local version of the git repository,\nso you should first do steps 1 and 2 above.\n(Strictly speaking, you don't need a fork on github\nif you don't plan to edit, but it won't hurt.)\nContinuing from there, let's say you want to check out the current\nstate of the code on [pull request #854](https://github.com/tskit-dev/tskit/pull/854).\n(So, below you should replace `854` with the number of the pull request\nthat you actually want to investigate.)\nThen, continuing from above:\n\n3. Fetch the pull request, and store it as a local branch.\n   For instance, to name the local branch `my_pr_copy`:\n   ```bash\n   git fetch upstream pull/854/head:my_pr_copy\n   ```\n   You should probably call the branch something more descriptive,\n   though. (Also note that you might need to put `origin` instead\n   of `upstream` for the remote repository name: see `git remote -v`\n   for a list of possible remotes.)\n\n4. Check out the pull request's local branch:\n   ```bash\n   git checkout my_pr_copy\n   ```\n\nNow, your repository will be in exactly the same state as\nthat of the person who's submitted the pull request.\nGreat! Now you can test things out.\n\nTo view the documentation,\n`cd docs && make`, which should build the documentation,\nand then navigate your web browser to the `docs/_build/html/`\nsubdirectory.\n\nTo test out changes to the *code*, you can change to the `python/` subdirectory,\nand run `make` to compile the C code.\nIf you then execute python commands from this subdirectory (and only this one!),\nit will use the modified version of the package.\n(For instance, you might want to open an interactive python shell by running\n`uv run python` in the `python/` subdirectory,\nor running `uv run pytest` from this subdirectory.)\n\nAfter you're done, you should do:\n\n```bash\ngit checkout main\n```\n\nto get your repository back to the \"main\" branch of development.\nIf the pull request is changed and you want to do the same thing again,\nthen to avoid conflicts with any changes you might have made,\nfirst *delete* your local copy (by doing `git branch -d my_pr_copy`)\nand repeat the steps again.\n\n\n(sec_development_workflow_prek)=\n\n\n### Lint checks (prek)\n\nOn each commit a [prek](https://prek.j178.dev) hook will run checks for\ncode style (see the {ref}`sec_development_python_style` section for details)\nand other common problems.\n\nTo run checks manually without committing, from the `python/` subdirectory:\n\n```bash\nuv run prek --all-files\n```\n\nIf local results differ from CI, run `uv run prek cache clean` to clear the cache.\nTo bypass the checks temporarily use `git commit --no-verify`.\n\n(sec_development_documentation)=\n\n\n## Documentation\n\nThe documentation for tskit is written using\n[Sphinx](http://www.sphinx-doc.org/en/stable/) and contained in the `docs`\ndirectory. Narrative pages are written in\n[MyST Markdown](https://jupyterbook.org/content/myst.html) and built with\n[JupyterBook](https://jupyterbook.org/), which executes embedded Python code\ncells and inserts their output before deployment. API docstrings are written in\n[reStructuredText](http://docutils.sourceforge.net/rst.html). For the C code,\na combination of [Doxygen](http://www.doxygen.nl/) and\n[breathe](https://breathe.readthedocs.io/en/latest/) generates API documentation.\nThe docs are deployed automatically to the [tskit.dev website](https://tskit.dev/).\n\nPlease help us to improve the documentation! You can check on the list of\n[documentation issues](https://github.com/tskit-dev/tskit/issues?q=is%3Aissue+is%3Aopen+label%3Adocumentation)\non GitHub, and help us fix any, or add issues for anything that's wrong or missing.\n\n\n### Small edits\n\nIf you see a typo or some other small problem that you'd like to fix,\nthis is most easily done through the GitHub UI.\n\nMouse over the GitHub icon at the top right of the page and\nclick on the \"Suggest edit\" button. This will bring you to a web\neditor on GitHub for the source file in question, allowing you to\nquickly fix the typo and submit a pull request with the changes.\nFix the typo, click the \"Commit changes\", add a commit message like\n\"Fixed typo\" and click on the green \"Propose file change\" button.\nThen follow the dialogues until you've created a new pull request\nwith your changes, so that we can incorporate them.\n\nIf the change you'd like to make is in the API documentation\nfor a particular function, then you'll need to find where this\nfunction is defined first. The simplest way to do this is\nto click the green \"[source]\" link next to the function. This\nwill show you a HTML rendered version of the function, and the\nrest of the file that it is in. You can then navigate to this\nfile on GitHub, and edit it using the same approach as above.\n\n\n### Significant edits\n\nWhen making changes more substantial than typo fixes it's best\nto check out a local copy.\nFollow the steps in the {ref}`sec_development_workflow_git` to\nget a fork of tskit, a local clone and newly checked out\nfeature branch. Then follow the steps in the\n{ref}`sec_development_getting_started` section to get a\nworking development environment.\n\nOnce you are ready to make edits to the documentation,\n`cd` into the `docs` directory and run `make`.\nThis should build the HTML\ndocumentation in `docs/_build/html/`, which you can then view in\nyour browser. As you make changes, run `make` regularly and\nview the final result to see if it matches your expectations.\n\nOnce you are happy with the changes, commit your updates and\nopen a pull request on GitHub.\n\n\n(sec_development_documentation_markup)=\n\n### Markup languages\n\nBecause of the mixture of API documentation and notebook content, documentation\nis written using **two different markup languages**:\n\n- **MyST Markdown** for all narrative pages, thematic sections, and code\n  examples. This is a superset of [CommonMark](https://commonmark.org) that\n  enables executable Jupyter content and Sphinx cross-referencing.\n- **reStructuredText (rST)** for API docstrings embedded in the source code.\n  These are processed by Sphinx and appear in the API reference pages.\n\nSome useful links for MyST:\n\n- The [MyST cheat sheet](https://jupyterbook.org/reference/cheatsheet.html)\n- The \"Write Book Content\" section of the [Jupyter Book](https://jupyterbook.org/) docs\n- The [MyST Syntax Guide](https://myst-parser.readthedocs.io/en/latest/using/syntax.html)\n- The [Sphinx domains reference](https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html)\n  for marking up Python and C API elements\n- The [types of source files](https://jupyterbook.org/file-types/index.html)\n  in the Jupyter Book docs (useful for understanding the MyST/rST mix)\n\nSome directives are only available in rST and must be wrapped in an\n``eval-rst`` block within a Markdown file:\n\n````md\n```{eval-rst}\n.. autoclass:: tskit.TreeSequence\n```\n````\n\n(sec_development_documentation_api)=\n\n### API Reference\n\nAPI reference documentation comes from\n[docstrings](https://www.python.org/dev/peps/pep-0257/) in the source code,\nwritten in rST. Docstrings should be **concise** and **precise**. Examples\nshould not be embedded directly in docstrings; instead, each significant\nparameter should link to the relevant section in the narrative documentation.\n\n(sec_development_documentation_examples)=\n\n### Examples\n\nNarrative sections should provide context and worked examples using inline\nJupyter code cells. These behave exactly like cells in a Jupyter notebook —\nthe whole page is executed as one notebook during the build.\n\nCode cells are written like this:\n\n````md\n```{code-cell}\nimport tskit\n# example code here\n```\n````\n\n:::{warning}\nFor a page to be executed as a notebook you **must** have the correct\n[YAML frontmatter](https://jupyterbook.org/reference/cheatsheet.html#executable-code)\nat the top of the file.\n:::\n\n(sec_development_documentation_cross_referencing)=\n\n### Cross referencing\n\nUse the ``{ref}`` role to link to labelled sections within the docs:\n\n````md\nSee the {ref}`sec_development_documentation_cross_referencing` section for details.\n````\n\nSections should be labelled hierarchically immediately above the heading:\n\n````md\n(sec_development_documentation_cross_referencing)=\n### Cross referencing\n````\n\nThe label is used as link text automatically, but can be overridden:\n\n````md\nSee {ref}`this section <sec_development_documentation_cross_referencing>` for more.\n````\n\nTo refer to API elements, use the appropriate inline role:\n\n````md\nThe {class}`.TreeSequence` class, the {meth}`.TreeSequence.trees` method,\nand the {func}`.load` function.\n````\n\nFrom an rST docstring, use the colon-prefixed equivalents:\n\n````rst\nSee :ref:`sec_development_documentation_cross_referencing` for details.\nThe :meth:`.TreeSequence.trees` method returns an iterator.\n````\n\nSome errors may occur because of out-of-date cached results,\nwhich can be cleared by running `make clean`.\n\n(sec_development_python)=\n\n\n## Python library\n\nThe Python library is defined in the `python` directory. We assume throughout\nthis section that you have `cd`'d into this directory.\nThe low-level C extension is built automatically as part of `uv sync`.\nPlease see the {ref}`sec_development_python_troubleshooting` section for help\nif you encounter problems with compiling or running the tests.\n\n\n### Getting started\n\nAfter you have installed the basic {ref}`sec_development_getting_started_requirements`\nand created a {ref}`development environment <sec_development_getting_started_environment>`,\nrun `uv sync` at the repo root. This will install all dependencies and build\nthe low-level {ref}`sec_development_python_c` module automatically.\n\nTo make sure that your development environment is working, run some\n{ref}`tests <sec_development_python_tests>`.\n\n\n### Layout\n\nCode for the `tskit` module is in the `tskit` directory. The code is split\ninto a number of modules that are roughly split by function; for example,\ncode for visualisation is kept in the `tskit/drawing.py`.\n\nTest code is contained in the `tests` directory. Tests are also roughly split\nby function, so that tests for the `drawing` module are in the\n`tests/test_drawing.py` file. This is not a one-to-one mapping, though.\n\nDevelopment dependencies are specified in the `pyproject.toml` file\nand can be installed using `uv sync`.\n\n(sec_development_python_style)=\n\n\n### Code style\n\nPython code in tskit is formatted and linted using\n[ruff](https://docs.astral.sh/ruff/). These checks run automatically as part of\nthe {ref}`prek checks <sec_development_workflow_prek>` on each commit.\n\nRuff is quite opinionated and it gains more opinions on each version.\nWe therefore pin ruff to an exact version and maintain a list of \"ignore\"\nclasses in pyproject.toml. The version of ruff should be updated periodically\nwith fixes applied or the the list ignore extended as necessary.\n\n\n(sec_development_python_tests)=\n\n\n### Tests\n\nThe tests are defined in the `tests` directory, and run using\n[pytest](https://docs.pytest.org/en/stable/) from the `python` directory.\nIf you want to run the tests in a particular module (say, `test_tables.py`), use:\n\n```bash\nuv run pytest tests/test_tables.py\n```\n\nTo run all the tests in a particular class in this module (say, `TestNodeTable`)\nuse:\n\n```bash\nuv run pytest tests/test_tables.py::TestNodeTable\n```\n\nTo run a specific test case in this class (say, `test_copy`) use:\n\n```bash\nuv run pytest tests/test_tables.py::TestNodeTable::test_copy\n```\n\nIn general, you can copy-paste the string describing a failed test from the\noutput of pytest to re-run just that test (including specific parametrized\narguments present as `[args]`).\n\nYou can also run tests with a keyword expression search. For example this will\nrun all tests that have `TestNodeTable` but not `copy` in their name:\n\n```bash\nuv run pytest -k \"TestNodeTable and not copy\"\n```\n\nWhen developing your own tests, it is much quicker to run the specific tests\nthat you are developing rather than rerunning large sections of the test\nsuite each time.\n\nTo run all of the tests, we can use:\n\n```bash\nuv run pytest\n```\n\nBy default the tests are run on 4 cores, if you have more you can specify:\n\n```bash\nuv run pytest -n8\n```\n\nA few of the tests take most of the time, we can skip the slow tests to get the test run\nunder 20 seconds on an modern workstation:\n\n```bash\nuv run pytest --skip-slow\n```\n\nIf you have an agent running the tests in a sandboxed environment, you may need to\nskip tests thsat require network access or FIFOs:\n\n```bash\nuv run pytest --skip-network\n```\n\nIf you have a lot of failing tests it can be useful to have a shorter summary\nof the failing lines:\n\n```bash\nuv run pytest --tb=line\n```\n\nIf you need to see the output of tests (e.g. `print` statements) then you need to use\nthese flags to run a single thread and capture output:\n\n```bash\nuv run pytest -n0 -vs\n```\n\nAll new code must have high test coverage, which will be checked as part of the\n{ref}`sec_development_continuous_integration`\ntests by [CodeCov](https://codecov.io/gh/tskit-dev/tskit/).\nAll tests must pass for a PR to be accepted.\n\n\n### Packaging\n\nThe `tskit` Python module follows the current\n[best-practices](http://packaging.python.org) advocated by the\n[Python Packaging Authority](http://pypa.io/en/latest/). The primary means of\ndistribution is though [PyPI](http://pypi.python.org/pypi/tskit), which provides the\ncanonical source for each release.\n\nA package for [conda](http://conda.io/docs/) is also available on\n[conda-forge](https://github.com/conda-forge/tskit-feedstock).\n\n\n### Interfacing with low-level module\n\nMuch of the high-level Python code only exists to provide a simpler interface to\nthe low-level {ref}`_tskit <sec_development_python_c>` module.\nAs such, many objects (e.g. {class}`.Tree`)\nare really just a shallow layer on top of the corresponding low-level object.\nThe usual convention here is to keep a reference to the low-level object via\na private instance variable such as `self._ll_tree`.\n\n\n### Command line interface\n\nThe command line interface for `tskit` is defined in the `tskit/cli.py` file.\nThe entry point `tskit_main` is declared under `[project.scripts]` in\n`python/pyproject.toml`, which makes `tskit` available as a command after\ninstallation.\n\nThe CLI can also be run using `uv run python -m tskit` during development.\n\n(sec_development_installing)=\n\n### Installing development versions\n\nWe **strongly** recommend that you do not install development versions of\n`tskit` and instead use versions released to PyPI and conda-forge.\nHowever, if you really need to be on the bleeding edge, you can use\nthe following command to install:\n\n```bash\npython3 -m pip install git+https://github.com/tskit-dev/tskit.git#subdirectory=python\n```\n\n(Because the Python package is not defined in the project root directory, using pip to\ninstall directly from  GitHub requires you to specify `subdirectory=python`.)\n\n\n(sec_development_python_troubleshooting)=\n\n### Troubleshooting\n\n- If `make` is giving you strange errors, or if tests are failing for\n  strange reasons, try running `make clean` in the project root\n  and then rebuilding.\n- Beware of multiple versions of the python library installed by different\n  programs (e.g., pip versus installing locally from source)! In python,\n  `tskit.__file__` will tell you the location of the package that is being\n  used.\n- Installation of development version is not supported in Windows. Windows\n  users should try using a Linux envronment by using\n  [WSL](https://learn.microsoft.com/windows/wsl/), for example.\n\n\n(sec_development_c)=\n\n## C Library\n\nThe Python module uses the high-performance tskit {ref}`sec_c_api`\nbehind the scenes. All C code and associated development infrastructure\nis held in the `c` directory.\n\n\n(sec_development_c_requirements)=\n\n### Requirements\n\nWe use the\n[meson](https://mesonbuild.com) build system in conjunction with\n[ninja-build](https://ninja-build.org) to compile the C code.\nUnit tests use the [CUnit](http://cunit.sourceforge.net) library\nand we use [clang-format](https://clang.llvm.org/docs/ClangFormat.html)\nto automatically format code.\nOn Debian/Ubuntu, install the system dependencies with:\n\n```bash\nsudo apt install libcunit1-dev ninja-build\n```\n\nOn macOS, you can run `brew install cunit ninja`\nor `sudo port install cunit ninja`.\n\nYou can install meson using uv:\n\n```bash\nuv tool install meson\n```\n\nAn exact version of clang-format is required because formatting rules\nchange from version to version. This is why we pin to an exact version\nof clang-format in pyproject.toml, which gets used by prek linting.\nIf you wish to run clang-format yourself (e.g., within your editor)\na straightforward way to do this is to use ``uv tool install clang-format==[version]``,\nwhich will install to your PATH.\nHowever, you will need to manually keep track of what version is installed\n(``uv tool list`` is useful for this).\n\n\n(sec_development_c_code_style)=\n\n### Code style\n\nC code is formatted using\n[clang-format](https://clang.llvm.org/docs/ClangFormat.html)\nwith a custom configuration. This is checked as part of the\n{ref}`prek checks <sec_development_workflow_prek>`. To manually format all files run:\n\n```bash\nuv run prek --all-files\n```\n\nIf you are doing this in the ``c`` directory, use\n``uv run --project=../python prek --all-files``.\n\n\nIf you are getting obscure errors from prek, sometimes this is caused by\nprek searching for configuration within subdirectories. To avoid this, tell\nprek where to find its config explicitly:\n\n```bash\nuv run prek --all-files -c prek.toml\n```\n\n\n### Building\n\nWe use [meson](https://mesonbuild.com) and [ninja-build](https://ninja-build.org) to\ncompile the C code. Meson keeps all compiled binaries in a build directory (this has many advantages\nsuch as allowing multiple builds with different options to coexist). The build configuration\nis defined in `meson.build`. To set up the initial build\ndirectory, run\n\n```bash\ncd c\nmeson setup build\n```\n\nTo setup a debug build add `--buildtype=debug` to the above command.\n(Re-running the command with this argument will have the desired effect.)\nThis will set the `TSK_TRACE_ERRORS`\nflag, which will print error messages to `stderr` when errors occur which is useful for debugging.\n\nTo compile the code run\n\n```bash\nninja -C build\n```\n\nAll the tests and other artefacts are in the build directory. Individual test\nsuites can be run, via (e.g.) `./build/test_trees`. To run all of the tests,\nrun\n\n```bash\nninja -C build test\n```\n\nFor vim users, the [mesonic](https://www.vim.org/scripts/script.php?script_id=5378) plugin\nsimplifies this process and allows code to be compiled seamlessly within the\neditor.\n\n### Compile flags\n\nIf the flag `TSK_TRACE_ERRORS` is defined (by e.g. `-DTSK_TRACE_ERRORS` to gcc),\nthen error messages will be printed to `stderr` when errors occur. This also allows\nbreakpoints to be set in the `_tsk_trace_error` function to break on all errors.\n\n### Unit Tests\n\nThe C-library has an extensive suite of unit tests written using\n[CUnit](http://cunit.sourceforge.net). These tests aim to establish that the\nlow-level APIs work correctly over a variety of inputs, and particularly, that\nthe tests don't result in leaked memory or illegal memory accesses. All tests\nare run under valgrind to make sure of this as part of the\n{ref}`sec_development_continuous_integration`.\n\nTests are defined in the `tests/*.c` files. These are roughly split by\nthe source files, so that the tests for functionality in the `tskit/tables.c` file\nwill be tested in `tests/test_tables.c`.\nTo run all the tests\nin the `test_tables` suite, run (e.g.) `./build/test_tables`.\nTo just run a specific test on its own, provide\nthis test name as a command line argument, e.g.:\n\n```bash\n./build/test_tables test_node_table\n```\n\nAfter making sure tests pass, you should next run the tests through valgrind,\nto check for memory leaks, for instance:\n\n```bash\nvalgrind ./build/test_tables test_node_table\n```\n\nWhile 100% test coverage is not feasible for C code, we aim to cover all code\nthat can be reached. (Some classes of error such as malloc failures\nand IO errors are difficult to simulate in C.) Code coverage statistics are\nautomatically tracked using [CodeCov](https://codecov.io/gh/tskit-dev/tskit/).\n\n\n### Viewing coverage reports\n\nTo generate and view coverage reports for the C tests locally:\n\nCompile with coverage enabled:\n   ```bash\n   cd c\n   meson setup build -D b_coverage=true\n   ninja -C build\n   ```\n\nRun the tests:\n   ```bash\n   ninja -C build test\n   ```\n\nGenerate coverage data:\n   ```bash\n   cd build\n   find ../tskit/*.c -type f -printf \"%f\\n\" | xargs -i gcov -pb libtskit.a.p/tskit_{}.gcno ../tskit/{}\n   ```\n\nThe generated `.gcov` files can then be viewed directly with `cat filename.c.gcov`.\nLines prefixed with `#####` were never executed, lines with numbers show execution counts, and lines with `-` are non-executable code.\n\n`lcov` can be used to create browsable HTML coverage reports:\n  ```bash\n  sudo apt-get install lcov  # if needed\n  lcov --capture --directory build --output-file coverage.info\n  genhtml coverage.info --output-directory coverage_html\n  firefox coverage_html/index.html\n  ```\n\n### Coding conventions\n\nThe code is written using the [C99](https://en.wikipedia.org/wiki/C99) standard. All\nvariable declarations should be done at the start of a function, and functions\nkept short and simple where at all possible.\n\nNo global or module level variables are used for production code.\n\nFunction parameters should be marked as ``const`` where possible.\nParameters that are used as return variables should come last.\nThe common ``options`` parameter should be the last non-output\nparameter.\n\nPlease see the {ref}`sec_c_api_overview_structure` section for more information\nabout how the API is structured.\n\n### Error handling\n\nA critical element of producing reliable C programs is consistent error handling\nand checking of return values. All return values **must** be checked! In tskit,\nall functions (except the most trivial accessors) return an integer to indicate\nsuccess or failure. Any negative value is an error, and must be handled accordingly.\nThe following pattern is canonical:\n\n```C\n   ret = tsk_tree_do_something(self, argument);\n    if (ret != 0) {\n        goto out;\n    }\n    // rest of function\nout:\n    return ret;\n```\n\nHere we test the return value of `tsk_tree_do_something` and if it is non-zero,\nabort the function and return this same value from the current function. This\nis a bit like throwing an exception in higher-level languages, but discipline\nis required to ensure that the error codes are propagated back to the original\ncaller correctly.\n\nParticular care must be taken in functions that allocate memory, because\nwe must ensure that this memory is freed in all possible success and\nfailure scenarios. The following pattern is used throughout for this purpose:\n\n```C\n    double *x = NULL;\n\n    x = malloc(n * sizeof(double));\n    if (x == NULL) {\n        ret = tsk_trace_error(TSK_ERR_NO_MEMORY);\n        goto out;\n    }\n    // rest of function\nout:\n    tsk_safe_free(x);\n    return ret;\n```\n\nIt is vital here that `x` is initialised to `NULL` so that we are guaranteed\ncorrect behaviour in all cases. For this reason, the convention is to declare all\npointer variables on a single line and to initialise them to `NULL` as part\nof the declaration.\n\nError codes are defined in `core.h`, and these can be translated into a\nmessage using `tsk_strerror(err)`.\n\nWhen setting error codes in the C code, please use the `tsk_trace_error` function.\nIf `TSK_TRACE_ERRORS` is defined, this will print a message to stderr with the\ndetails of the error.\n\n\n#### Using assertions\n\nThere are two different ways to express assertions in tskit code.\nThe first is using the custom `tsk_bug_assert` macro, which is used to\nmake inexpensive checks at key points during execution. These assertions\nare always run, regardless of the compiler settings, and should not\ncontribute significantly to the overall runtime.\n\nMore expensive assertions, used, for example, to check pre and post conditions\non performance critical loops should be expressed using the standard\n`assert` macro from `assert.h`. These assertions will be checked\nduring the execution of C unit tests, but will not be enabled when\ncompiled into the Python C module.\n\n\n### Type conventions\n\n- `tsk_id_t` is an ID for any entity in a table.\n- `tsk_size_t` refers to any size or count values in tskit.\n- `size_t` is a standard C type and refers to the size of a memory block.\n  This should only be used when computing memory block sizes for functions\n  like `malloc` or passing the size of a memory buffer as a parameter.\n- Error indicators (the return type of most functions) are `int`.\n- `uint32_t` etc should be avoided (any that exist are a leftover from older\n  code that didn't use `tsk_size_t` etc.)\n- `int64_t` and `uint64_t` are sometimes useful when working with\n  bitstrings (e.g. to implement a set).\n\n(sec_development_python_c)=\n\n\n## Python C Interface\n\n\n### Overview\n\nThe Python C interface is defined in the `python` directory\nand written using the [Python C API](https://docs.python.org/3.6/c-api/).\nThe source code for this interface is in the `_tskitmodule.c` file.\nWhen compiled, this produces the `_tskit` module,\nwhich is imported by the high-level Python code. The low-level Python module is\nnot intended to be used directly by users and may change arbitrarily over time.\n\nThe usual pattern in the low-level Python API is to define a Python class\nwhich corresponds to a given \"class\" in the C API. For example, we define\na `TreeSequence` class, which is essentially a thin wrapper around the\n`tsk_tree_t` type from the C library.\n\nThe `_tskitmodule.c` file follows the standard conventions given in the\n[Python documentation](https://docs.python.org/3.6/extending/index.html).\n\n\n### Compiling and debugging\n\nThe `setup.py` file describes the requirements for the low-level `_tskit`\nmodule and how it is built from source. The module is built automatically by\n`uv sync`, but if you modify the C extension code you will need to rebuild it.\nThe simplest way to do this is to run `make` in the `python` directory:\n\n```bash\nmake\n```\n\nIf `make` is not available, you can run the same command manually:\n\n```bash\nuv run python setup.py build_ext --inplace\n```\n\nIt is sometimes useful to specify compiler flags when building the low\nlevel module. For example, to make a debug build you can use:\n\n```bash\nCFLAGS='-Wall -O0 -g' make\n```\n\nIf you need to track down a segfault etc, running some code through gdb can\nbe very useful. For example, to run a particular test case, we can do:\n\n\n```bash\nuv run gdb python\n(gdb) run  -m pytest -vs tests/test_tables.py::TestNodeTable::test_copy\nStarting program: /usr/bin/python3 run  -m pytest tests/test_tables.py::TestNodeTable::test_copy\n[Thread debugging using libthread_db enabled]\nUsing host libthread_db library \"/lib/x86_64-linux-gnu/libthread_db.so.1\".\n[New Thread 0x7ffff1e48700 (LWP 1503)]\n[New Thread 0x7fffef647700 (LWP 1504)]\n[New Thread 0x7fffeee46700 (LWP 1505)]\n[Thread 0x7fffeee46700 (LWP 1505) exited]\n[Thread 0x7fffef647700 (LWP 1504) exited]\n[Thread 0x7ffff1e48700 (LWP 1503) exited]\ncollected 1 item\n\ntests/test_tables.py::TestNodeTable::test_copy PASSED\n\n[Inferior 1 (process 1499) exited normally]\n(gdb)\n```\n\nTracing problems in C code is many times more difficult when the Python C API\nis involved because of the complexity of Python's memory management. It is\nnearly always best to start by making sure that the tskit C API part of your\naddition is thoroughly tested with valgrind before resorting to the debugger.\n\n\n### Testing for memory leaks\n\nThe Python C API can be subtle, and it is easy to get the reference counting wrong.\nThe `stress_lowlevel.py` script makes it easier to track down memory leaks\nwhen they do occur. The script runs the unit tests in a loop, and outputs\nmemory usage statistics.\n\n\n(sec_development_continuous_integration)=\n\n\n## Continuous Integration tests\n\nContinuous integration is handled by [GitHub Actions](https://help.github.com/en/actions).\ntskit uses shared workflows defined in the\n[tskit-dev/.github](https://github.com/tskit-dev/.github) repository:\n\n- **lint** — runs ruff and clang (using prek) against all files\n- **python-tests** — runs the pytest suite with coverage on Linux, macOS and Windows\n- **python-c-tests** — builds the C extension with coverage and runs low-level tests\n- **c-tests** — runs C unit tests under gcc, clang, and valgrind\n- **docs** — builds the documentation and deploys it on merge to `main`\n- **python-packaging** — validates the sdist and wheel\n\n[CodeCov](https://codecov.io/gh) tracks test coverage for Python and C.\n\n\n(sec_development_best_practices)=\n\n\n## Best Practices for Development\n\nThe following is a rough guide of best practices for contributing a function to the\ntskit codebase.\n\nNote that this guide covers the most complex case of adding a new function to both\nthe C and Python APIs.\n\n0.  Draft a docstring for your function, that describes exactly what the function\n    takes as arguments and what it returns under what conditions. Update this\n    docstring as you go along and make modifications.\n1.  Write your function in Python: in `python/tests/` find the test module that\n    pertains to the functionality you wish to add. For instance, the kc_distance\n    metric was added to\n    [test_topology.py](https://github.com/tskit-dev/tskit/blob/main/python/tests/test_topology.py).\n    Add a python version of your function here.\n2.  Create a new class in this module to write unit tests for your function: in addition\n    to making sure that your function is correct, make sure it fails on inappropriate inputs.\n    This can often require judgement. For instance, {meth}`Tree.kc_distance` fails on a tree\n    with multiple roots, but allows users to input parameter values that are nonsensical,\n    as long as they don't break functionality. See the\n    [TestKCMetric](https://github.com/tskit-dev/tskit/blob/4e707ea04adca256036669cd852656a08ec45590/python/tests/test_topology.py#L293) for example.\n3.  Write your function in C: check out the {ref}`sec_c_api` for guidance. There\n    are also many examples in the\n    [c directory](https://github.com/tskit-dev/tskit/tree/main/c/tskit).\n    Your function will probably go in\n    [trees.c](https://github.com/tskit-dev/tskit/blob/main/c/tskit/trees.c).\n4.  Write a few tests for your function in C: again, write your tests in\n    [tskit/c/tests/test_tree.c](https://github.com/tskit-dev/tskit/blob/main/c/tests/test_trees.c).\n    The key here is code coverage, you don't need to worry as much about covering every\n    corner case, as we will proceed to link this function to the Python tests you\n    wrote earlier.\n5.  Create a low-level definition of your function using Python's C API: this will\n    go in [_tskitmodule.c](https://github.com/tskit-dev/tskit/blob/main/python/_tskitmodule.c).\n6.  Test your low-level implementation in [tskit/python/tests/test_python_c.py\n   ](https://github.com/tskit-dev/tskit/blob/main/python/tests/test_python_c.py):\n    again, these tests don't need to be as comprehensive as your first python tests,\n    instead, they should focus on the interface, e.g., does the function behave\n    correctly on malformed inputs?\n7.  Link your C function to the Python API: write a function in tskit's Python API,\n    for example the kc_distance function lives in\n    [tskit/python/tskit/trees.py](https://github.com/tskit-dev/tskit/blob/main/python/tskit/trees.py).\n8.  Modify your Python tests to test the new C-linked function: if you followed\n    the example of other tests, you might need to only add a single line of code\n    here. In this case, the tests are well factored so that we can easily compare\n    the results from both the Python and C versions.\n9.  Finalize your docstring and insert it into the Python API: for instance, the kc_distance\n    docstring is in\n    [tskit/python/tskit/trees.py](https://github.com/tskit-dev/tskit/blob/main/python/tskit/trees.py).\n    Ensure that your docstring renders correctly by building the documentation\n    (see {ref}`sec_development_documentation`).\n\n\n## Troubleshooting\n\n### prek is blocking me!\n\nThe prek hook is designed to make things easier, not harder. If the checks are\nblocking you, feel free to skip them with `--no-verify` and sort it out before\nthe PR is merged. There’s no shame in a broken build.\n\n```bash\n> git commit -a -m ‘my changes’ --no-verify\n```\n\n### prek reports unexpected failures\n\nIf prek reports failures on files you didn’t edit, try clearing the cache:\n\n```bash\n> uv run prek cache clean\n```\n\nIf that doesn’t help, you can reinstall the hook:\n\n```bash\n> uv run prek uninstall\n> uv run prek install\n```\n\n\n## Benchmarking\n\nTskit has a simple benchmarking tool to help keep track of performance.\n\n### Running benchmarks\n\nThe benchmark suite can be run with:\n\n```bash\n> cd python/benchmark\n> python run.py\n```\n\nA subset of benchmarks can be run by specifying a string. For example, the following command runs all the benchmarks whose names contain \"genotype\", e.g. \"genotype_matrix\".\n\n```bash\n> python run.py -k genotype\n```\n\nIf desired, the results of the benchmarks can be printed to STDOUT.\n\n```bash\n> python run.py -k genotype -p\n```\n\nResults are written to `bench-results.json` in the same folder. Note that if any version of `tskit`\nis installed then that will be used for the benchmarking. To use the local development version of\ntskit ensure you have `pip uninstall tskit` before running the benchmarking. The version used is\nshown in the header of the report.\n\n### Adding a new benchmark\n\nThe benchmarks are specified by the `config.yaml` file in `python/benchmark`. To add a new benchmark\nadd an entry to the `benchmarks` dictionary. For example:\n\n```yaml\n  - code: do_my_thing({option_name})\n    setup: |\n      import a_module\n    name: my_benchmark #optional, the code is used by default\n    parameters:\n      option_name:\n        - \"reticulate_splines\"\n        - \"foobar\"\n```\n\nStrings are interpreted as Python f-strings, so you can use the `parameters` dictionary to provide\nvalues that will be interpolated into both the `setup` and `code` strings.\n\nThe suite can be run for all released versions with the `run-for-all-releases.py` script.\n\n## Releasing a new version\n\nSee the [repo administration guide](https://github.com/tskit-dev/.github/blob/main/repo_administration.md)\nfor the release process. Tskit has both a C API release and a Python package release,\neach covered in the tskit/kastore section of that document.\n\nIt is worth running the benchmarks (see above) before a Python release to check\nfor any unexpected major regressions. For a major release the website\n(github repo tskit-dev/tskit-site) should be updated with a notebook of new\nfeatures and the `bench-results.html` updated.\n"
  },
  {
    "path": "docs/doxygen/Doxyfile",
    "content": "# Doxyfile 1.9.1\n\n# This file describes the settings to be used by the documentation system\n# doxygen (www.doxygen.org) for a project.\n#\n# All text after a double hash (##) is considered a comment and is placed in\n# front of the TAG it is preceding.\n#\n# All text after a single hash (#) is considered a comment and will be ignored.\n# The format is:\n# TAG = value [value, ...]\n# For lists, items can also be appended using:\n# TAG += value [value, ...]\n# Values that contain spaces should be placed between quotes (\\\" \\\").\n\n#---------------------------------------------------------------------------\n# Project related configuration options\n#---------------------------------------------------------------------------\n\n# This tag specifies the encoding used for all characters in the configuration\n# file that follow. The default is UTF-8 which is also the encoding used for all\n# text before the first occurrence of this tag. Doxygen uses libiconv (or the\n# iconv built into libc) for the transcoding. See\n# https://www.gnu.org/software/libiconv/ for the list of possible encodings.\n# The default value is: UTF-8.\n\nDOXYFILE_ENCODING      = UTF-8\n\n# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by\n# double-quotes, unless you are using Doxywizard) that should identify the\n# project for which the documentation is generated. This name is used in the\n# title of most generated pages and in a few other places.\n# The default value is: My Project.\n\nPROJECT_NAME           = tskit\n\n# The PROJECT_NUMBER tag can be used to enter a project or revision number. This\n# could be handy for archiving the generated documentation or if some version\n# control system is used.\n\nPROJECT_NUMBER         =\n\n# Using the PROJECT_BRIEF tag one can provide an optional one line description\n# for a project that appears at the top of each page and should give viewer a\n# quick idea about the purpose of the project. Keep the description short.\n\nPROJECT_BRIEF          = \"The tree sequence toolkit\"\n\n# With the PROJECT_LOGO tag one can specify a logo or an icon that is included\n# in the documentation. The maximum height of the logo should not exceed 55\n# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy\n# the logo to the output directory.\n\nPROJECT_LOGO           =\n\n# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path\n# into which the generated documentation will be written. If a relative path is\n# entered, it will be relative to the location where doxygen was started. If\n# left blank the current directory will be used.\n\nOUTPUT_DIRECTORY       =\n\n# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-\n# directories (in 2 levels) under the output directory of each output format and\n# will distribute the generated files over these directories. Enabling this\n# option can be useful when feeding doxygen a huge amount of source files, where\n# putting all generated files in the same directory would otherwise causes\n# performance problems for the file system.\n# The default value is: NO.\n\nCREATE_SUBDIRS         = NO\n\n# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII\n# characters to appear in the names of generated files. If set to NO, non-ASCII\n# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode\n# U+3044.\n# The default value is: NO.\n\nALLOW_UNICODE_NAMES    = NO\n\n# The OUTPUT_LANGUAGE tag is used to specify the language in which all\n# documentation generated by doxygen is written. Doxygen will use this\n# information to generate all constant output in the proper language.\n# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,\n# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),\n# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,\n# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),\n# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,\n# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,\n# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,\n# Ukrainian and Vietnamese.\n# The default value is: English.\n\nOUTPUT_LANGUAGE        = English\n\n# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all\n# documentation generated by doxygen is written. Doxygen will use this\n# information to generate all generated output in the proper direction.\n# Possible values are: None, LTR, RTL and Context.\n# The default value is: None.\n\nOUTPUT_TEXT_DIRECTION  = None\n\n# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member\n# descriptions after the members that are listed in the file and class\n# documentation (similar to Javadoc). Set to NO to disable this.\n# The default value is: YES.\n\nBRIEF_MEMBER_DESC      = YES\n\n# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief\n# description of a member or function before the detailed description\n#\n# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the\n# brief descriptions will be completely suppressed.\n# The default value is: YES.\n\nREPEAT_BRIEF           = YES\n\n# This tag implements a quasi-intelligent brief description abbreviator that is\n# used to form the text in various listings. Each string in this list, if found\n# as the leading text of the brief description, will be stripped from the text\n# and the result, after processing the whole list, is used as the annotated\n# text. Otherwise, the brief description is used as-is. If left blank, the\n# following values are used ($name is automatically replaced with the name of\n# the entity):The $name class, The $name widget, The $name file, is, provides,\n# specifies, contains, represents, a, an and the.\n\nABBREVIATE_BRIEF       = \"The $name class\" \\\n                         \"The $name widget\" \\\n                         \"The $name file\" \\\n                         is \\\n                         provides \\\n                         specifies \\\n                         contains \\\n                         represents \\\n                         a \\\n                         an \\\n                         the\n\n# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then\n# doxygen will generate a detailed section even if there is only a brief\n# description.\n# The default value is: NO.\n\nALWAYS_DETAILED_SEC    = NO\n\n# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all\n# inherited members of a class in the documentation of that class as if those\n# members were ordinary class members. Constructors, destructors and assignment\n# operators of the base classes will not be shown.\n# The default value is: NO.\n\nINLINE_INHERITED_MEMB  = NO\n\n# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path\n# before files name in the file list and in the header files. If set to NO the\n# shortest path that makes the file name unique will be used\n# The default value is: YES.\n\nFULL_PATH_NAMES        = YES\n\n# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.\n# Stripping is only done if one of the specified strings matches the left-hand\n# part of the path. The tag can be used to show relative paths in the file list.\n# If left blank the directory from which doxygen is run is used as the path to\n# strip.\n#\n# Note that you can specify absolute paths here, but also relative paths, which\n# will be relative from the directory where doxygen is started.\n# This tag requires that the tag FULL_PATH_NAMES is set to YES.\n\nSTRIP_FROM_PATH        =\n\n# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the\n# path mentioned in the documentation of a class, which tells the reader which\n# header file to include in order to use a class. If left blank only the name of\n# the header file containing the class definition is used. Otherwise one should\n# specify the list of include paths that are normally passed to the compiler\n# using the -I flag.\n\nSTRIP_FROM_INC_PATH    =\n\n# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but\n# less readable) file names. This can be useful is your file systems doesn't\n# support long names like on DOS, Mac, or CD-ROM.\n# The default value is: NO.\n\nSHORT_NAMES            = NO\n\n# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the\n# first line (until the first dot) of a Javadoc-style comment as the brief\n# description. If set to NO, the Javadoc-style will behave just like regular Qt-\n# style comments (thus requiring an explicit @brief command for a brief\n# description.)\n# The default value is: NO.\n\nJAVADOC_AUTOBRIEF      = NO\n\n# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line\n# such as\n# /***************\n# as being the beginning of a Javadoc-style comment \"banner\". If set to NO, the\n# Javadoc-style will behave just like regular comments and it will not be\n# interpreted by doxygen.\n# The default value is: NO.\n\nJAVADOC_BANNER         = NO\n\n# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first\n# line (until the first dot) of a Qt-style comment as the brief description. If\n# set to NO, the Qt-style will behave just like regular Qt-style comments (thus\n# requiring an explicit \\brief command for a brief description.)\n# The default value is: NO.\n\nQT_AUTOBRIEF           = NO\n\n# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a\n# multi-line C++ special comment block (i.e. a block of //! or /// comments) as\n# a brief description. This used to be the default behavior. The new default is\n# to treat a multi-line C++ comment block as a detailed description. Set this\n# tag to YES if you prefer the old behavior instead.\n#\n# Note that setting this tag to YES also means that rational rose comments are\n# not recognized any more.\n# The default value is: NO.\n\nMULTILINE_CPP_IS_BRIEF = NO\n\n# By default Python docstrings are displayed as preformatted text and doxygen's\n# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the\n# doxygen's special commands can be used and the contents of the docstring\n# documentation blocks is shown as doxygen documentation.\n# The default value is: YES.\n\nPYTHON_DOCSTRING       = YES\n\n# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the\n# documentation from any documented member that it re-implements.\n# The default value is: YES.\n\nINHERIT_DOCS           = YES\n\n# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new\n# page for each member. If set to NO, the documentation of a member will be part\n# of the file/class/namespace that contains it.\n# The default value is: NO.\n\nSEPARATE_MEMBER_PAGES  = NO\n\n# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen\n# uses this value to replace tabs by spaces in code fragments.\n# Minimum value: 1, maximum value: 16, default value: 4.\n\nTAB_SIZE               = 4\n\n# This tag can be used to specify a number of aliases that act as commands in\n# the documentation. An alias has the form:\n# name=value\n# For example adding\n# \"sideeffect=@par Side Effects:\\n\"\n# will allow you to put the command \\sideeffect (or @sideeffect) in the\n# documentation, which will result in a user-defined paragraph with heading\n# \"Side Effects:\". You can put \\n's in the value part of an alias to insert\n# newlines (in the resulting output). You can put ^^ in the value part of an\n# alias to insert a newline as if a physical newline was in the original file.\n# When you need a literal { or } or , in the value part of an alias you have to\n# escape them by means of a backslash (\\), this can lead to conflicts with the\n# commands \\{ and \\} for these it is advised to use the version @{ and @} or use\n# a double escape (\\\\{ and \\\\})\n\nALIASES                = \"rst=\\verbatim embed:rst\" \\\n                         endrst=\\endverbatim\n\n# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources\n# only. Doxygen will then generate output that is more tailored for C. For\n# instance, some of the names that are used will be different. The list of all\n# members will be omitted, etc.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_FOR_C  = YES\n\n# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or\n# Python sources only. Doxygen will then generate output that is more tailored\n# for that language. For instance, namespaces will be presented as packages,\n# qualified scopes will look different, etc.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_JAVA   = NO\n\n# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran\n# sources. Doxygen will then generate output that is tailored for Fortran.\n# The default value is: NO.\n\nOPTIMIZE_FOR_FORTRAN   = NO\n\n# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL\n# sources. Doxygen will then generate output that is tailored for VHDL.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_VHDL   = NO\n\n# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice\n# sources only. Doxygen will then generate output that is more tailored for that\n# language. For instance, namespaces will be presented as modules, types will be\n# separated into more groups, etc.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_SLICE  = NO\n\n# Doxygen selects the parser to use depending on the extension of the files it\n# parses. With this tag you can assign which parser to use for a given\n# extension. Doxygen has a built-in mapping, but you can override or extend it\n# using this tag. The format is ext=language, where ext is a file extension, and\n# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,\n# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,\n# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:\n# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser\n# tries to guess whether the code is fixed or free formatted code, this is the\n# default for Fortran type files). For instance to make doxygen treat .inc files\n# as Fortran files (default is PHP), and .f files as C (default is Fortran),\n# use: inc=Fortran f=C.\n#\n# Note: For files without extension you can use no_extension as a placeholder.\n#\n# Note that for custom extensions you also need to set FILE_PATTERNS otherwise\n# the files are not read by doxygen. When specifying no_extension you should add\n# * to the FILE_PATTERNS.\n#\n# Note see also the list of default file extension mappings.\n\nEXTENSION_MAPPING      =\n\n# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments\n# according to the Markdown format, which allows for more readable\n# documentation. See https://daringfireball.net/projects/markdown/ for details.\n# The output of markdown processing is further processed by doxygen, so you can\n# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in\n# case of backward compatibilities issues.\n# The default value is: YES.\n\nMARKDOWN_SUPPORT       = YES\n\n# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up\n# to that level are automatically included in the table of contents, even if\n# they do not have an id attribute.\n# Note: This feature currently applies only to Markdown headings.\n# Minimum value: 0, maximum value: 99, default value: 5.\n# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.\n\nTOC_INCLUDE_HEADINGS   = 0\n\n# When enabled doxygen tries to link words that correspond to documented\n# classes, or namespaces to their corresponding documentation. Such a link can\n# be prevented in individual cases by putting a % sign in front of the word or\n# globally by setting AUTOLINK_SUPPORT to NO.\n# The default value is: YES.\n\nAUTOLINK_SUPPORT       = YES\n\n# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want\n# to include (a tag file for) the STL sources as input, then you should set this\n# tag to YES in order to let doxygen match functions declarations and\n# definitions whose arguments contain STL classes (e.g. func(std::string);\n# versus func(std::string) {}). This also make the inheritance and collaboration\n# diagrams that involve STL classes more complete and accurate.\n# The default value is: NO.\n\nBUILTIN_STL_SUPPORT    = NO\n\n# If you use Microsoft's C++/CLI language, you should set this option to YES to\n# enable parsing support.\n# The default value is: NO.\n\nCPP_CLI_SUPPORT        = NO\n\n# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:\n# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen\n# will parse them like normal C++ but will assume all classes use public instead\n# of private inheritance when no explicit protection keyword is present.\n# The default value is: NO.\n\nSIP_SUPPORT            = NO\n\n# For Microsoft's IDL there are propget and propput attributes to indicate\n# getter and setter methods for a property. Setting this option to YES will make\n# doxygen to replace the get and set methods by a property in the documentation.\n# This will only work if the methods are indeed getting or setting a simple\n# type. If this is not the case, or you want to show the methods anyway, you\n# should set this option to NO.\n# The default value is: YES.\n\nIDL_PROPERTY_SUPPORT   = YES\n\n# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC\n# tag is set to YES then doxygen will reuse the documentation of the first\n# member in the group (if any) for the other members of the group. By default\n# all members of a group must be documented explicitly.\n# The default value is: NO.\n\nDISTRIBUTE_GROUP_DOC   = NO\n\n# If one adds a struct or class to a group and this option is enabled, then also\n# any nested class or struct is added to the same group. By default this option\n# is disabled and one has to add nested compounds explicitly via \\ingroup.\n# The default value is: NO.\n\nGROUP_NESTED_COMPOUNDS = NO\n\n# Set the SUBGROUPING tag to YES to allow class member groups of the same type\n# (for instance a group of public functions) to be put as a subgroup of that\n# type (e.g. under the Public Functions section). Set it to NO to prevent\n# subgrouping. Alternatively, this can be done per class using the\n# \\nosubgrouping command.\n# The default value is: YES.\n\nSUBGROUPING            = YES\n\n# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions\n# are shown inside the group in which they are included (e.g. using \\ingroup)\n# instead of on a separate page (for HTML and Man pages) or section (for LaTeX\n# and RTF).\n#\n# Note that this feature does not work in combination with\n# SEPARATE_MEMBER_PAGES.\n# The default value is: NO.\n\nINLINE_GROUPED_CLASSES = NO\n\n# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions\n# with only public data fields or simple typedef fields will be shown inline in\n# the documentation of the scope in which they are defined (i.e. file,\n# namespace, or group documentation), provided this scope is documented. If set\n# to NO, structs, classes, and unions are shown on a separate page (for HTML and\n# Man pages) or section (for LaTeX and RTF).\n# The default value is: NO.\n\nINLINE_SIMPLE_STRUCTS  = NO\n\n# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or\n# enum is documented as struct, union, or enum with the name of the typedef. So\n# typedef struct TypeS {} TypeT, will appear in the documentation as a struct\n# with name TypeT. When disabled the typedef will appear as a member of a file,\n# namespace, or class. And the struct will be named TypeS. This can typically be\n# useful for C code in case the coding convention dictates that all compound\n# types are typedef'ed and only the typedef is referenced, never the tag name.\n# The default value is: NO.\n\nTYPEDEF_HIDES_STRUCT   = NO\n\n# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This\n# cache is used to resolve symbols given their name and scope. Since this can be\n# an expensive process and often the same symbol appears multiple times in the\n# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small\n# doxygen will become slower. If the cache is too large, memory is wasted. The\n# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range\n# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536\n# symbols. At the end of a run doxygen will report the cache usage and suggest\n# the optimal cache size from a speed point of view.\n# Minimum value: 0, maximum value: 9, default value: 0.\n\nLOOKUP_CACHE_SIZE      = 0\n\n# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use\n# during processing. When set to 0 doxygen will based this on the number of\n# cores available in the system. You can set it explicitly to a value larger\n# than 0 to get more control over the balance between CPU load and processing\n# speed. At this moment only the input processing can be done using multiple\n# threads. Since this is still an experimental feature the default is set to 1,\n# which efficively disables parallel processing. Please report any issues you\n# encounter. Generating dot graphs in parallel is controlled by the\n# DOT_NUM_THREADS setting.\n# Minimum value: 0, maximum value: 32, default value: 1.\n\nNUM_PROC_THREADS       = 1\n\n#---------------------------------------------------------------------------\n# Build related configuration options\n#---------------------------------------------------------------------------\n\n# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in\n# documentation are documented, even if no documentation was available. Private\n# class members and static file members will be hidden unless the\n# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.\n# Note: This will also disable the warnings about undocumented members that are\n# normally produced when WARNINGS is set to YES.\n# The default value is: NO.\n\nEXTRACT_ALL            = NO\n\n# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will\n# be included in the documentation.\n# The default value is: NO.\n\nEXTRACT_PRIVATE        = NO\n\n# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual\n# methods of a class will be included in the documentation.\n# The default value is: NO.\n\nEXTRACT_PRIV_VIRTUAL   = NO\n\n# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal\n# scope will be included in the documentation.\n# The default value is: NO.\n\nEXTRACT_PACKAGE        = NO\n\n# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be\n# included in the documentation.\n# The default value is: NO.\n\nEXTRACT_STATIC         = NO\n\n# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined\n# locally in source files will be included in the documentation. If set to NO,\n# only classes defined in header files are included. Does not have any effect\n# for Java sources.\n# The default value is: YES.\n\nEXTRACT_LOCAL_CLASSES  = YES\n\n# This flag is only useful for Objective-C code. If set to YES, local methods,\n# which are defined in the implementation section but not in the interface are\n# included in the documentation. If set to NO, only methods in the interface are\n# included.\n# The default value is: NO.\n\nEXTRACT_LOCAL_METHODS  = NO\n\n# If this flag is set to YES, the members of anonymous namespaces will be\n# extracted and appear in the documentation as a namespace called\n# 'anonymous_namespace{file}', where file will be replaced with the base name of\n# the file that contains the anonymous namespace. By default anonymous namespace\n# are hidden.\n# The default value is: NO.\n\nEXTRACT_ANON_NSPACES   = NO\n\n# If this flag is set to YES, the name of an unnamed parameter in a declaration\n# will be determined by the corresponding definition. By default unnamed\n# parameters remain unnamed in the output.\n# The default value is: YES.\n\nRESOLVE_UNNAMED_PARAMS = YES\n\n# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all\n# undocumented members inside documented classes or files. If set to NO these\n# members will be included in the various overviews, but no documentation\n# section is generated. This option has no effect if EXTRACT_ALL is enabled.\n# The default value is: NO.\n\nHIDE_UNDOC_MEMBERS     = NO\n\n# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all\n# undocumented classes that are normally visible in the class hierarchy. If set\n# to NO, these classes will be included in the various overviews. This option\n# has no effect if EXTRACT_ALL is enabled.\n# The default value is: NO.\n\nHIDE_UNDOC_CLASSES     = NO\n\n# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend\n# declarations. If set to NO, these declarations will be included in the\n# documentation.\n# The default value is: NO.\n\nHIDE_FRIEND_COMPOUNDS  = NO\n\n# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any\n# documentation blocks found inside the body of a function. If set to NO, these\n# blocks will be appended to the function's detailed documentation block.\n# The default value is: NO.\n\nHIDE_IN_BODY_DOCS      = NO\n\n# The INTERNAL_DOCS tag determines if documentation that is typed after a\n# \\internal command is included. If the tag is set to NO then the documentation\n# will be excluded. Set it to YES to include the internal documentation.\n# The default value is: NO.\n\nINTERNAL_DOCS          = NO\n\n# With the correct setting of option CASE_SENSE_NAMES doxygen will better be\n# able to match the capabilities of the underlying filesystem. In case the\n# filesystem is case sensitive (i.e. it supports files in the same directory\n# whose names only differ in casing), the option must be set to YES to properly\n# deal with such files in case they appear in the input. For filesystems that\n# are not case sensitive the option should be be set to NO to properly deal with\n# output files written for symbols that only differ in casing, such as for two\n# classes, one named CLASS and the other named Class, and to also support\n# references to files without having to specify the exact matching casing. On\n# Windows (including Cygwin) and MacOS, users should typically set this option\n# to NO, whereas on Linux or other Unix flavors it should typically be set to\n# YES.\n# The default value is: system dependent.\n\nCASE_SENSE_NAMES       = NO\n\n# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with\n# their full class and namespace scopes in the documentation. If set to YES, the\n# scope will be hidden.\n# The default value is: NO.\n\nHIDE_SCOPE_NAMES       = YES\n\n# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will\n# append additional text to a page's title, such as Class Reference. If set to\n# YES the compound reference will be hidden.\n# The default value is: NO.\n\nHIDE_COMPOUND_REFERENCE= NO\n\n# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of\n# the files that are included by a file in the documentation of that file.\n# The default value is: YES.\n\nSHOW_INCLUDE_FILES     = YES\n\n# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each\n# grouped member an include statement to the documentation, telling the reader\n# which file to include in order to use the member.\n# The default value is: NO.\n\nSHOW_GROUPED_MEMB_INC  = NO\n\n# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include\n# files with double quotes in the documentation rather than with sharp brackets.\n# The default value is: NO.\n\nFORCE_LOCAL_INCLUDES   = NO\n\n# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the\n# documentation for inline members.\n# The default value is: YES.\n\nINLINE_INFO            = YES\n\n# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the\n# (detailed) documentation of file and class members alphabetically by member\n# name. If set to NO, the members will appear in declaration order.\n# The default value is: YES.\n\nSORT_MEMBER_DOCS       = YES\n\n# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief\n# descriptions of file, namespace and class members alphabetically by member\n# name. If set to NO, the members will appear in declaration order. Note that\n# this will also influence the order of the classes in the class list.\n# The default value is: NO.\n\nSORT_BRIEF_DOCS        = NO\n\n# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the\n# (brief and detailed) documentation of class members so that constructors and\n# destructors are listed first. If set to NO the constructors will appear in the\n# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.\n# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief\n# member documentation.\n# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting\n# detailed member documentation.\n# The default value is: NO.\n\nSORT_MEMBERS_CTORS_1ST = NO\n\n# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy\n# of group names into alphabetical order. If set to NO the group names will\n# appear in their defined order.\n# The default value is: NO.\n\nSORT_GROUP_NAMES       = NO\n\n# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by\n# fully-qualified names, including namespaces. If set to NO, the class list will\n# be sorted only by class name, not including the namespace part.\n# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.\n# Note: This option applies only to the class list, not to the alphabetical\n# list.\n# The default value is: NO.\n\nSORT_BY_SCOPE_NAME     = NO\n\n# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper\n# type resolution of all parameters of a function it will reject a match between\n# the prototype and the implementation of a member function even if there is\n# only one candidate or it is obvious which candidate to choose by doing a\n# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still\n# accept a match between prototype and implementation in such cases.\n# The default value is: NO.\n\nSTRICT_PROTO_MATCHING  = NO\n\n# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo\n# list. This list is created by putting \\todo commands in the documentation.\n# The default value is: YES.\n\nGENERATE_TODOLIST      = YES\n\n# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test\n# list. This list is created by putting \\test commands in the documentation.\n# The default value is: YES.\n\nGENERATE_TESTLIST      = YES\n\n# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug\n# list. This list is created by putting \\bug commands in the documentation.\n# The default value is: YES.\n\nGENERATE_BUGLIST       = YES\n\n# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)\n# the deprecated list. This list is created by putting \\deprecated commands in\n# the documentation.\n# The default value is: YES.\n\nGENERATE_DEPRECATEDLIST= YES\n\n# The ENABLED_SECTIONS tag can be used to enable conditional documentation\n# sections, marked by \\if <section_label> ... \\endif and \\cond <section_label>\n# ... \\endcond blocks.\n\nENABLED_SECTIONS       =\n\n# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the\n# initial value of a variable or macro / define can have for it to appear in the\n# documentation. If the initializer consists of more lines than specified here\n# it will be hidden. Use a value of 0 to hide initializers completely. The\n# appearance of the value of individual variables and macros / defines can be\n# controlled using \\showinitializer or \\hideinitializer command in the\n# documentation regardless of this setting.\n# Minimum value: 0, maximum value: 10000, default value: 30.\n\nMAX_INITIALIZER_LINES  = 30\n\n# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at\n# the bottom of the documentation of classes and structs. If set to YES, the\n# list will mention the files that were used to generate the documentation.\n# The default value is: YES.\n\nSHOW_USED_FILES        = YES\n\n# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This\n# will remove the Files entry from the Quick Index and from the Folder Tree View\n# (if specified).\n# The default value is: YES.\n\nSHOW_FILES             = YES\n\n# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces\n# page. This will remove the Namespaces entry from the Quick Index and from the\n# Folder Tree View (if specified).\n# The default value is: YES.\n\nSHOW_NAMESPACES        = YES\n\n# The FILE_VERSION_FILTER tag can be used to specify a program or script that\n# doxygen should invoke to get the current version for each file (typically from\n# the version control system). Doxygen will invoke the program by executing (via\n# popen()) the command command input-file, where command is the value of the\n# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided\n# by doxygen. Whatever the program writes to standard output is used as the file\n# version. For an example see the documentation.\n\nFILE_VERSION_FILTER    =\n\n# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed\n# by doxygen. The layout file controls the global structure of the generated\n# output files in an output format independent way. To create the layout file\n# that represents doxygen's defaults, run doxygen with the -l option. You can\n# optionally specify a file name after the option, if omitted DoxygenLayout.xml\n# will be used as the name of the layout file.\n#\n# Note that if you run doxygen from a directory containing a file called\n# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE\n# tag is left empty.\n\nLAYOUT_FILE            =\n\n# The CITE_BIB_FILES tag can be used to specify one or more bib files containing\n# the reference definitions. This must be a list of .bib files. The .bib\n# extension is automatically appended if omitted. This requires the bibtex tool\n# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.\n# For LaTeX the style of the bibliography can be controlled using\n# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the\n# search path. See also \\cite for info how to create references.\n\nCITE_BIB_FILES         =\n\n#---------------------------------------------------------------------------\n# Configuration options related to warning and progress messages\n#---------------------------------------------------------------------------\n\n# The QUIET tag can be used to turn on/off the messages that are generated to\n# standard output by doxygen. If QUIET is set to YES this implies that the\n# messages are off.\n# The default value is: NO.\n\nQUIET                  = NO\n\n# The WARNINGS tag can be used to turn on/off the warning messages that are\n# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES\n# this implies that the warnings are on.\n#\n# Tip: Turn warnings on while writing the documentation.\n# The default value is: YES.\n\nWARNINGS               = YES\n\n# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate\n# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag\n# will automatically be disabled.\n# The default value is: YES.\n\nWARN_IF_UNDOCUMENTED   = NO\n\n# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for\n# potential errors in the documentation, such as not documenting some parameters\n# in a documented function, or documenting parameters that don't exist or using\n# markup commands wrongly.\n# The default value is: YES.\n\nWARN_IF_DOC_ERROR      = YES\n\n# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that\n# are documented, but have no documentation for their parameters or return\n# value. If set to NO, doxygen will only warn about wrong or incomplete\n# parameter documentation, but not about the absence of documentation. If\n# EXTRACT_ALL is set to YES then this flag will automatically be disabled.\n# The default value is: NO.\n\nWARN_NO_PARAMDOC       = YES\n\n# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when\n# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS\n# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but\n# at the end of the doxygen process doxygen will return with a non-zero status.\n# Possible values are: NO, YES and FAIL_ON_WARNINGS.\n# The default value is: NO.\n\nWARN_AS_ERROR          = YES\n\n# The WARN_FORMAT tag determines the format of the warning messages that doxygen\n# can produce. The string should contain the $file, $line, and $text tags, which\n# will be replaced by the file and line number from which the warning originated\n# and the warning text. Optionally the format may contain $version, which will\n# be replaced by the version of the file (if it could be obtained via\n# FILE_VERSION_FILTER)\n# The default value is: $file:$line: $text.\n\nWARN_FORMAT            = \"$file:$line: $text\"\n\n# The WARN_LOGFILE tag can be used to specify a file to which warning and error\n# messages should be written. If left blank the output is written to standard\n# error (stderr).\n\nWARN_LOGFILE           =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the input files\n#---------------------------------------------------------------------------\n\n# The INPUT tag is used to specify the files and/or directories that contain\n# documented source files. You may enter file names like myfile.cpp or\n# directories like /usr/src/myproject. Separate the files or directories with\n# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING\n# Note: If this tag is empty the current directory is searched.\n\nINPUT                  = ../../c/tskit\n\n# This tag can be used to specify the character encoding of the source files\n# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses\n# libiconv (or the iconv built into libc) for the transcoding. See the libiconv\n# documentation (see:\n# https://www.gnu.org/software/libiconv/) for the list of possible encodings.\n# The default value is: UTF-8.\n\nINPUT_ENCODING         = UTF-8\n\n# If the value of the INPUT tag contains directories, you can use the\n# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and\n# *.h) to filter out the source-files in the directories.\n#\n# Note that for custom extensions or not directly supported extensions you also\n# need to set EXTENSION_MAPPING for the extension otherwise the files are not\n# read by doxygen.\n#\n# Note the list of default checked file patterns might differ from the list of\n# default file extension mappings.\n#\n# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,\n# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,\n# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,\n# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),\n# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,\n# *.ucf, *.qsf and *.ice.\n\nFILE_PATTERNS          = *.h\n\n# The RECURSIVE tag can be used to specify whether or not subdirectories should\n# be searched for input files as well.\n# The default value is: NO.\n\nRECURSIVE              = NO\n\n# The EXCLUDE tag can be used to specify files and/or directories that should be\n# excluded from the INPUT source files. This way you can easily exclude a\n# subdirectory from a directory tree whose root is specified with the INPUT tag.\n#\n# Note that relative paths are relative to the directory from which doxygen is\n# run.\n\nEXCLUDE                =\n\n# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or\n# directories that are symbolic links (a Unix file system feature) are excluded\n# from the input.\n# The default value is: NO.\n\nEXCLUDE_SYMLINKS       = NO\n\n# If the value of the INPUT tag contains directories, you can use the\n# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude\n# certain files from those directories.\n#\n# Note that the wildcards are matched against the file with absolute path, so to\n# exclude all test directories for example use the pattern */test/*\n\nEXCLUDE_PATTERNS       =\n\n# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names\n# (namespaces, classes, functions, etc.) that should be excluded from the\n# output. The symbol name can be a fully qualified name, a word, or if the\n# wildcard * is used, a substring. Examples: ANamespace, AClass,\n# AClass::ANamespace, ANamespace::*Test\n#\n# Note that the wildcards are matched against the file with absolute path, so to\n# exclude all test directories use the pattern */test/*\n\nEXCLUDE_SYMBOLS        =\n\n# The EXAMPLE_PATH tag can be used to specify one or more files or directories\n# that contain example code fragments that are included (see the \\include\n# command).\n\nEXAMPLE_PATH           =\n\n# If the value of the EXAMPLE_PATH tag contains directories, you can use the\n# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and\n# *.h) to filter out the source-files in the directories. If left blank all\n# files are included.\n\nEXAMPLE_PATTERNS       = *\n\n# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be\n# searched for input files to be used with the \\include or \\dontinclude commands\n# irrespective of the value of the RECURSIVE tag.\n# The default value is: NO.\n\nEXAMPLE_RECURSIVE      = NO\n\n# The IMAGE_PATH tag can be used to specify one or more files or directories\n# that contain images that are to be included in the documentation (see the\n# \\image command).\n\nIMAGE_PATH             =\n\n# The INPUT_FILTER tag can be used to specify a program that doxygen should\n# invoke to filter for each input file. Doxygen will invoke the filter program\n# by executing (via popen()) the command:\n#\n# <filter> <input-file>\n#\n# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the\n# name of an input file. Doxygen will then use the output that the filter\n# program writes to standard output. If FILTER_PATTERNS is specified, this tag\n# will be ignored.\n#\n# Note that the filter must not add or remove lines; it is applied before the\n# code is scanned, but not when the output code is generated. If lines are added\n# or removed, the anchors will not be placed correctly.\n#\n# Note that for custom extensions or not directly supported extensions you also\n# need to set EXTENSION_MAPPING for the extension otherwise the files are not\n# properly processed by doxygen.\n\nINPUT_FILTER           =\n\n# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern\n# basis. Doxygen will compare the file name with each pattern and apply the\n# filter if there is a match. The filters are a list of the form: pattern=filter\n# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how\n# filters are used. If the FILTER_PATTERNS tag is empty or if none of the\n# patterns match the file name, INPUT_FILTER is applied.\n#\n# Note that for custom extensions or not directly supported extensions you also\n# need to set EXTENSION_MAPPING for the extension otherwise the files are not\n# properly processed by doxygen.\n\nFILTER_PATTERNS        =\n\n# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using\n# INPUT_FILTER) will also be used to filter the input files that are used for\n# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).\n# The default value is: NO.\n\nFILTER_SOURCE_FILES    = NO\n\n# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file\n# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and\n# it is also possible to disable source filtering for a specific pattern using\n# *.ext= (so without naming a filter).\n# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.\n\nFILTER_SOURCE_PATTERNS =\n\n# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that\n# is part of the input, its contents will be placed on the main page\n# (index.html). This can be useful if you have a project on for instance GitHub\n# and want to reuse the introduction page also for the doxygen output.\n\nUSE_MDFILE_AS_MAINPAGE =\n\n#---------------------------------------------------------------------------\n# Configuration options related to source browsing\n#---------------------------------------------------------------------------\n\n# If the SOURCE_BROWSER tag is set to YES then a list of source files will be\n# generated. Documented entities will be cross-referenced with these sources.\n#\n# Note: To get rid of all source code in the generated output, make sure that\n# also VERBATIM_HEADERS is set to NO.\n# The default value is: NO.\n\nSOURCE_BROWSER         = NO\n\n# Setting the INLINE_SOURCES tag to YES will include the body of functions,\n# classes and enums directly into the documentation.\n# The default value is: NO.\n\nINLINE_SOURCES         = NO\n\n# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any\n# special comment blocks from generated source code fragments. Normal C, C++ and\n# Fortran comments will always remain visible.\n# The default value is: YES.\n\nSTRIP_CODE_COMMENTS    = YES\n\n# If the REFERENCED_BY_RELATION tag is set to YES then for each documented\n# entity all documented functions referencing it will be listed.\n# The default value is: NO.\n\nREFERENCED_BY_RELATION = NO\n\n# If the REFERENCES_RELATION tag is set to YES then for each documented function\n# all documented entities called/used by that function will be listed.\n# The default value is: NO.\n\nREFERENCES_RELATION    = NO\n\n# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set\n# to YES then the hyperlinks from functions in REFERENCES_RELATION and\n# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will\n# link to the documentation.\n# The default value is: YES.\n\nREFERENCES_LINK_SOURCE = YES\n\n# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the\n# source code will show a tooltip with additional information such as prototype,\n# brief description and links to the definition and documentation. Since this\n# will make the HTML file larger and loading of large files a bit slower, you\n# can opt to disable this feature.\n# The default value is: YES.\n# This tag requires that the tag SOURCE_BROWSER is set to YES.\n\nSOURCE_TOOLTIPS        = YES\n\n# If the USE_HTAGS tag is set to YES then the references to source code will\n# point to the HTML generated by the htags(1) tool instead of doxygen built-in\n# source browser. The htags tool is part of GNU's global source tagging system\n# (see https://www.gnu.org/software/global/global.html). You will need version\n# 4.8.6 or higher.\n#\n# To use it do the following:\n# - Install the latest version of global\n# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file\n# - Make sure the INPUT points to the root of the source tree\n# - Run doxygen as normal\n#\n# Doxygen will invoke htags (and that will in turn invoke gtags), so these\n# tools must be available from the command line (i.e. in the search path).\n#\n# The result: instead of the source browser generated by doxygen, the links to\n# source code will now point to the output of htags.\n# The default value is: NO.\n# This tag requires that the tag SOURCE_BROWSER is set to YES.\n\nUSE_HTAGS              = NO\n\n# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a\n# verbatim copy of the header file for each class for which an include is\n# specified. Set to NO to disable this.\n# See also: Section \\class.\n# The default value is: YES.\n\nVERBATIM_HEADERS       = YES\n\n# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the\n# clang parser (see:\n# http://clang.llvm.org/) for more accurate parsing at the cost of reduced\n# performance. This can be particularly helpful with template rich C++ code for\n# which doxygen's built-in parser lacks the necessary type information.\n# Note: The availability of this option depends on whether or not doxygen was\n# generated with the -Duse_libclang=ON option for CMake.\n# The default value is: NO.\n\nCLANG_ASSISTED_PARSING = NO\n\n# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to\n# YES then doxygen will add the directory of each input to the include path.\n# The default value is: YES.\n\nCLANG_ADD_INC_PATHS    = YES\n\n# If clang assisted parsing is enabled you can provide the compiler with command\n# line options that you would normally use when invoking the compiler. Note that\n# the include paths will already be set by doxygen for the files and directories\n# specified with INPUT and INCLUDE_PATH.\n# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.\n\nCLANG_OPTIONS          =\n\n# If clang assisted parsing is enabled you can provide the clang parser with the\n# path to the directory containing a file called compile_commands.json. This\n# file is the compilation database (see:\n# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the\n# options used when the source files were built. This is equivalent to\n# specifying the -p option to a clang tool, such as clang-check. These options\n# will then be passed to the parser. Any options specified with CLANG_OPTIONS\n# will be added as well.\n# Note: The availability of this option depends on whether or not doxygen was\n# generated with the -Duse_libclang=ON option for CMake.\n\nCLANG_DATABASE_PATH    =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the alphabetical class index\n#---------------------------------------------------------------------------\n\n# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all\n# compounds will be generated. Enable this if the project contains a lot of\n# classes, structs, unions or interfaces.\n# The default value is: YES.\n\nALPHABETICAL_INDEX     = YES\n\n# In case all classes in a project start with a common prefix, all classes will\n# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag\n# can be used to specify a prefix (or a list of prefixes) that should be ignored\n# while generating the index headers.\n# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.\n\nIGNORE_PREFIX          =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the HTML output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output\n# The default value is: YES.\n\nGENERATE_HTML          = NO\n\n# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: html.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_OUTPUT            = html\n\n# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each\n# generated HTML page (for example: .htm, .php, .asp).\n# The default value is: .html.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_FILE_EXTENSION    = .html\n\n# The HTML_HEADER tag can be used to specify a user-defined HTML header file for\n# each generated HTML page. If the tag is left blank doxygen will generate a\n# standard header.\n#\n# To get valid HTML the header file that includes any scripts and style sheets\n# that doxygen needs, which is dependent on the configuration options used (e.g.\n# the setting GENERATE_TREEVIEW). It is highly recommended to start with a\n# default header using\n# doxygen -w html new_header.html new_footer.html new_stylesheet.css\n# YourConfigFile\n# and then modify the file new_header.html. See also section \"Doxygen usage\"\n# for information on how to generate the default header that doxygen normally\n# uses.\n# Note: The header is subject to change so you typically have to regenerate the\n# default header when upgrading to a newer version of doxygen. For a description\n# of the possible markers and block names see the documentation.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_HEADER            =\n\n# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each\n# generated HTML page. If the tag is left blank doxygen will generate a standard\n# footer. See HTML_HEADER for more information on how to generate a default\n# footer and what special commands can be used inside the footer. See also\n# section \"Doxygen usage\" for information on how to generate the default footer\n# that doxygen normally uses.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_FOOTER            =\n\n# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style\n# sheet that is used by each HTML page. It can be used to fine-tune the look of\n# the HTML output. If left blank doxygen will generate a default style sheet.\n# See also section \"Doxygen usage\" for information on how to generate the style\n# sheet that doxygen normally uses.\n# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as\n# it is more robust and this tag (HTML_STYLESHEET) will in the future become\n# obsolete.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_STYLESHEET        =\n\n# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined\n# cascading style sheets that are included after the standard style sheets\n# created by doxygen. Using this option one can overrule certain style aspects.\n# This is preferred over using HTML_STYLESHEET since it does not replace the\n# standard style sheet and is therefore more robust against future updates.\n# Doxygen will copy the style sheet files to the output directory.\n# Note: The order of the extra style sheet files is of importance (e.g. the last\n# style sheet in the list overrules the setting of the previous ones in the\n# list). For an example see the documentation.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_EXTRA_STYLESHEET  =\n\n# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or\n# other source files which should be copied to the HTML output directory. Note\n# that these files will be copied to the base HTML output directory. Use the\n# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these\n# files. In the HTML_STYLESHEET file, use the file name only. Also note that the\n# files will be copied as-is; there are no commands or markers available.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_EXTRA_FILES       =\n\n# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen\n# will adjust the colors in the style sheet and background images according to\n# this color. Hue is specified as an angle on a colorwheel, see\n# https://en.wikipedia.org/wiki/Hue for more information. For instance the value\n# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300\n# purple, and 360 is red again.\n# Minimum value: 0, maximum value: 359, default value: 220.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_COLORSTYLE_HUE    = 220\n\n# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors\n# in the HTML output. For a value of 0 the output will use grayscales only. A\n# value of 255 will produce the most vivid colors.\n# Minimum value: 0, maximum value: 255, default value: 100.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_COLORSTYLE_SAT    = 100\n\n# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the\n# luminance component of the colors in the HTML output. Values below 100\n# gradually make the output lighter, whereas values above 100 make the output\n# darker. The value divided by 100 is the actual gamma applied, so 80 represents\n# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not\n# change the gamma.\n# Minimum value: 40, maximum value: 240, default value: 80.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_COLORSTYLE_GAMMA  = 80\n\n# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML\n# page will contain the date and time when the page was generated. Setting this\n# to YES can help to show when doxygen was last run and thus if the\n# documentation is up to date.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_TIMESTAMP         = NO\n\n# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML\n# documentation will contain a main index with vertical navigation menus that\n# are dynamically created via JavaScript. If disabled, the navigation index will\n# consists of multiple levels of tabs that are statically embedded in every HTML\n# page. Disable this option to support browsers that do not have JavaScript,\n# like the Qt help browser.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_DYNAMIC_MENUS     = YES\n\n# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML\n# documentation will contain sections that can be hidden and shown after the\n# page has loaded.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_DYNAMIC_SECTIONS  = NO\n\n# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries\n# shown in the various tree structured indices initially; the user can expand\n# and collapse entries dynamically later on. Doxygen will expand the tree to\n# such a level that at most the specified number of entries are visible (unless\n# a fully collapsed tree already exceeds this amount). So setting the number of\n# entries 1 will produce a full collapsed tree by default. 0 is a special value\n# representing an infinite number of entries and will result in a full expanded\n# tree by default.\n# Minimum value: 0, maximum value: 9999, default value: 100.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_INDEX_NUM_ENTRIES = 100\n\n# If the GENERATE_DOCSET tag is set to YES, additional index files will be\n# generated that can be used as input for Apple's Xcode 3 integrated development\n# environment (see:\n# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To\n# create a documentation set, doxygen will generate a Makefile in the HTML\n# output directory. Running make will produce the docset in that directory and\n# running make install will install the docset in\n# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at\n# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy\n# genXcode/_index.html for more information.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_DOCSET        = NO\n\n# This tag determines the name of the docset feed. A documentation feed provides\n# an umbrella under which multiple documentation sets from a single provider\n# (such as a company or product suite) can be grouped.\n# The default value is: Doxygen generated docs.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_FEEDNAME        = \"Doxygen generated docs\"\n\n# This tag specifies a string that should uniquely identify the documentation\n# set bundle. This should be a reverse domain-name style string, e.g.\n# com.mycompany.MyDocSet. Doxygen will append .docset to the name.\n# The default value is: org.doxygen.Project.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_BUNDLE_ID       = org.doxygen.Project\n\n# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify\n# the documentation publisher. This should be a reverse domain-name style\n# string, e.g. com.mycompany.MyDocSet.documentation.\n# The default value is: org.doxygen.Publisher.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_PUBLISHER_ID    = org.doxygen.Publisher\n\n# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.\n# The default value is: Publisher.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_PUBLISHER_NAME  = Publisher\n\n# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three\n# additional HTML index files: index.hhp, index.hhc, and index.hhk. The\n# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop\n# (see:\n# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.\n#\n# The HTML Help Workshop contains a compiler that can convert all HTML output\n# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML\n# files are now used as the Windows 98 help format, and will replace the old\n# Windows help format (.hlp) on all Windows platforms in the future. Compressed\n# HTML files also contain an index, a table of contents, and you can search for\n# words in the documentation. The HTML workshop also contains a viewer for\n# compressed HTML files.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_HTMLHELP      = NO\n\n# The CHM_FILE tag can be used to specify the file name of the resulting .chm\n# file. You can add a path in front of the file if the result should not be\n# written to the html output directory.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nCHM_FILE               =\n\n# The HHC_LOCATION tag can be used to specify the location (absolute path\n# including file name) of the HTML help compiler (hhc.exe). If non-empty,\n# doxygen will try to run the HTML help compiler on the generated index.hhp.\n# The file has to be specified with full path.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nHHC_LOCATION           =\n\n# The GENERATE_CHI flag controls if a separate .chi index file is generated\n# (YES) or that it should be included in the main .chm file (NO).\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nGENERATE_CHI           = NO\n\n# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)\n# and project file content.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nCHM_INDEX_ENCODING     =\n\n# The BINARY_TOC flag controls whether a binary table of contents is generated\n# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it\n# enables the Previous and Next buttons.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nBINARY_TOC             = NO\n\n# The TOC_EXPAND flag can be set to YES to add extra items for group members to\n# the table of contents of the HTML help documentation and to the tree view.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nTOC_EXPAND             = NO\n\n# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and\n# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that\n# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help\n# (.qch) of the generated HTML documentation.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_QHP           = NO\n\n# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify\n# the file name of the resulting .qch file. The path specified is relative to\n# the HTML output folder.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQCH_FILE               =\n\n# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help\n# Project output. For more information please see Qt Help Project / Namespace\n# (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).\n# The default value is: org.doxygen.Project.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_NAMESPACE          = org.doxygen.Project\n\n# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt\n# Help Project output. For more information please see Qt Help Project / Virtual\n# Folders (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).\n# The default value is: doc.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_VIRTUAL_FOLDER     = doc\n\n# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom\n# filter to add. For more information please see Qt Help Project / Custom\n# Filters (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_CUST_FILTER_NAME   =\n\n# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the\n# custom filter to add. For more information please see Qt Help Project / Custom\n# Filters (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_CUST_FILTER_ATTRS  =\n\n# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this\n# project's filter section matches. Qt Help Project / Filter Attributes (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_SECT_FILTER_ATTRS  =\n\n# The QHG_LOCATION tag can be used to specify the location (absolute path\n# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to\n# run qhelpgenerator on the generated .qhp file.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHG_LOCATION           =\n\n# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be\n# generated, together with the HTML files, they form an Eclipse help plugin. To\n# install this plugin and make it available under the help contents menu in\n# Eclipse, the contents of the directory containing the HTML and XML files needs\n# to be copied into the plugins directory of eclipse. The name of the directory\n# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.\n# After copying Eclipse needs to be restarted before the help appears.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_ECLIPSEHELP   = NO\n\n# A unique identifier for the Eclipse help plugin. When installing the plugin\n# the directory name containing the HTML and XML files should also have this\n# name. Each documentation set should have its own identifier.\n# The default value is: org.doxygen.Project.\n# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.\n\nECLIPSE_DOC_ID         = org.doxygen.Project\n\n# If you want full control over the layout of the generated HTML pages it might\n# be necessary to disable the index and replace it with your own. The\n# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top\n# of each HTML page. A value of NO enables the index and the value YES disables\n# it. Since the tabs in the index contain the same information as the navigation\n# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nDISABLE_INDEX          = NO\n\n# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index\n# structure should be generated to display hierarchical information. If the tag\n# value is set to YES, a side panel will be generated containing a tree-like\n# index structure (just like the one that is generated for HTML Help). For this\n# to work a browser that supports JavaScript, DHTML, CSS and frames is required\n# (i.e. any modern browser). Windows users are probably better off using the\n# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can\n# further fine-tune the look of the index. As an example, the default style\n# sheet generated by doxygen has an example that shows how to put an image at\n# the root of the tree instead of the PROJECT_NAME. Since the tree basically has\n# the same information as the tab index, you could consider setting\n# DISABLE_INDEX to YES when enabling this option.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_TREEVIEW      = NO\n\n# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that\n# doxygen will group on one line in the generated HTML documentation.\n#\n# Note that a value of 0 will completely suppress the enum values from appearing\n# in the overview section.\n# Minimum value: 0, maximum value: 20, default value: 4.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nENUM_VALUES_PER_LINE   = 4\n\n# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used\n# to set the initial width (in pixels) of the frame in which the tree is shown.\n# Minimum value: 0, maximum value: 1500, default value: 250.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nTREEVIEW_WIDTH         = 250\n\n# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to\n# external symbols imported via tag files in a separate window.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nEXT_LINKS_IN_WINDOW    = NO\n\n# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg\n# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see\n# https://inkscape.org) to generate formulas as SVG images instead of PNGs for\n# the HTML output. These images will generally look nicer at scaled resolutions.\n# Possible values are: png (the default) and svg (looks nicer but requires the\n# pdf2svg or inkscape tool).\n# The default value is: png.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_FORMULA_FORMAT    = png\n\n# Use this tag to change the font size of LaTeX formulas included as images in\n# the HTML documentation. When you change the font size after a successful\n# doxygen run you need to manually remove any form_*.png images from the HTML\n# output directory to force them to be regenerated.\n# Minimum value: 8, maximum value: 50, default value: 10.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nFORMULA_FONTSIZE       = 10\n\n# Use the FORMULA_TRANSPARENT tag to determine whether or not the images\n# generated for formulas are transparent PNGs. Transparent PNGs are not\n# supported properly for IE 6.0, but are supported on all modern browsers.\n#\n# Note that when changing this option you need to delete any form_*.png files in\n# the HTML output directory before the changes have effect.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nFORMULA_TRANSPARENT    = YES\n\n# The FORMULA_MACROFILE can contain LaTeX \\newcommand and \\renewcommand commands\n# to create new LaTeX commands to be used in formulas as building blocks. See\n# the section \"Including formulas\" for details.\n\nFORMULA_MACROFILE      =\n\n# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see\n# https://www.mathjax.org) which uses client side JavaScript for the rendering\n# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX\n# installed or if you want to formulas look prettier in the HTML output. When\n# enabled you may also need to install MathJax separately and configure the path\n# to it using the MATHJAX_RELPATH option.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nUSE_MATHJAX            = NO\n\n# When MathJax is enabled you can set the default output format to be used for\n# the MathJax output. See the MathJax site (see:\n# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.\n# Possible values are: HTML-CSS (which is slower, but has the best\n# compatibility), NativeMML (i.e. MathML) and SVG.\n# The default value is: HTML-CSS.\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_FORMAT         = HTML-CSS\n\n# When MathJax is enabled you need to specify the location relative to the HTML\n# output directory using the MATHJAX_RELPATH option. The destination directory\n# should contain the MathJax.js script. For instance, if the mathjax directory\n# is located at the same level as the HTML output directory, then\n# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax\n# Content Delivery Network so you can quickly see the result without installing\n# MathJax. However, it is strongly recommended to install a local copy of\n# MathJax from https://www.mathjax.org before deployment.\n# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest\n\n# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax\n# extension names that should be enabled during MathJax rendering. For example\n# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_EXTENSIONS     =\n\n# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces\n# of code that will be used on startup of the MathJax code. See the MathJax site\n# (see:\n# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an\n# example see the documentation.\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_CODEFILE       =\n\n# When the SEARCHENGINE tag is enabled doxygen will generate a search box for\n# the HTML output. The underlying search engine uses javascript and DHTML and\n# should work on any modern browser. Note that when using HTML help\n# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)\n# there is already a search function so this one should typically be disabled.\n# For large projects the javascript based search engine can be slow, then\n# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to\n# search using the keyboard; to jump to the search box use <access key> + S\n# (what the <access key> is depends on the OS and browser, but it is typically\n# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down\n# key> to jump into the search results window, the results can be navigated\n# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel\n# the search. The filter options can be selected when the cursor is inside the\n# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>\n# to select a filter and <Enter> or <escape> to activate or cancel the filter\n# option.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nSEARCHENGINE           = NO\n\n# When the SERVER_BASED_SEARCH tag is enabled the search engine will be\n# implemented using a web server instead of a web client using JavaScript. There\n# are two flavors of web server based searching depending on the EXTERNAL_SEARCH\n# setting. When disabled, doxygen will generate a PHP script for searching and\n# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing\n# and searching needs to be provided by external tools. See the section\n# \"External Indexing and Searching\" for details.\n# The default value is: NO.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nSERVER_BASED_SEARCH    = NO\n\n# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP\n# script for searching. Instead the search results are written to an XML file\n# which needs to be processed by an external indexer. Doxygen will invoke an\n# external search engine pointed to by the SEARCHENGINE_URL option to obtain the\n# search results.\n#\n# Doxygen ships with an example indexer (doxyindexer) and search engine\n# (doxysearch.cgi) which are based on the open source search engine library\n# Xapian (see:\n# https://xapian.org/).\n#\n# See the section \"External Indexing and Searching\" for details.\n# The default value is: NO.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nEXTERNAL_SEARCH        = NO\n\n# The SEARCHENGINE_URL should point to a search engine hosted by a web server\n# which will return the search results when EXTERNAL_SEARCH is enabled.\n#\n# Doxygen ships with an example indexer (doxyindexer) and search engine\n# (doxysearch.cgi) which are based on the open source search engine library\n# Xapian (see:\n# https://xapian.org/). See the section \"External Indexing and Searching\" for\n# details.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nSEARCHENGINE_URL       =\n\n# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed\n# search data is written to a file for indexing by an external tool. With the\n# SEARCHDATA_FILE tag the name of this file can be specified.\n# The default file is: searchdata.xml.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nSEARCHDATA_FILE        = searchdata.xml\n\n# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the\n# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is\n# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple\n# projects and redirect the results back to the right project.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nEXTERNAL_SEARCH_ID     =\n\n# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen\n# projects other than the one defined by this configuration file, but that are\n# all added to the same external search index. Each project needs to have a\n# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of\n# to a relative location where the documentation can be found. The format is:\n# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nEXTRA_SEARCH_MAPPINGS  =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the LaTeX output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.\n# The default value is: YES.\n\nGENERATE_LATEX         = NO\n\n# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: latex.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_OUTPUT           = latex\n\n# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be\n# invoked.\n#\n# Note that when not enabling USE_PDFLATEX the default is latex when enabling\n# USE_PDFLATEX the default is pdflatex and when in the later case latex is\n# chosen this is overwritten by pdflatex. For specific output languages the\n# default can have been set differently, this depends on the implementation of\n# the output language.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_CMD_NAME         = latex\n\n# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate\n# index for LaTeX.\n# Note: This tag is used in the Makefile / make.bat.\n# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file\n# (.tex).\n# The default file is: makeindex.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nMAKEINDEX_CMD_NAME     = makeindex\n\n# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to\n# generate index for LaTeX. In case there is no backslash (\\) as first character\n# it will be automatically added in the LaTeX code.\n# Note: This tag is used in the generated output file (.tex).\n# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.\n# The default value is: makeindex.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_MAKEINDEX_CMD    = makeindex\n\n# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX\n# documents. This may be useful for small projects and may help to save some\n# trees in general.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nCOMPACT_LATEX          = NO\n\n# The PAPER_TYPE tag can be used to set the paper type that is used by the\n# printer.\n# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x\n# 14 inches) and executive (7.25 x 10.5 inches).\n# The default value is: a4.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nPAPER_TYPE             = a4\n\n# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names\n# that should be included in the LaTeX output. The package can be specified just\n# by its name or with the correct syntax as to be used with the LaTeX\n# \\usepackage command. To get the times font for instance you can specify :\n# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}\n# To use the option intlimits with the amsmath package you can specify:\n# EXTRA_PACKAGES=[intlimits]{amsmath}\n# If left blank no extra packages will be included.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nEXTRA_PACKAGES         =\n\n# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the\n# generated LaTeX document. The header should contain everything until the first\n# chapter. If it is left blank doxygen will generate a standard header. See\n# section \"Doxygen usage\" for information on how to let doxygen write the\n# default header to a separate file.\n#\n# Note: Only use a user-defined header if you know what you are doing! The\n# following commands have a special meaning inside the header: $title,\n# $datetime, $date, $doxygenversion, $projectname, $projectnumber,\n# $projectbrief, $projectlogo. Doxygen will replace $title with the empty\n# string, for the replacement values of the other commands the user is referred\n# to HTML_HEADER.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_HEADER           =\n\n# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the\n# generated LaTeX document. The footer should contain everything after the last\n# chapter. If it is left blank doxygen will generate a standard footer. See\n# LATEX_HEADER for more information on how to generate a default footer and what\n# special commands can be used inside the footer.\n#\n# Note: Only use a user-defined footer if you know what you are doing!\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_FOOTER           =\n\n# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined\n# LaTeX style sheets that are included after the standard style sheets created\n# by doxygen. Using this option one can overrule certain style aspects. Doxygen\n# will copy the style sheet files to the output directory.\n# Note: The order of the extra style sheet files is of importance (e.g. the last\n# style sheet in the list overrules the setting of the previous ones in the\n# list).\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_EXTRA_STYLESHEET =\n\n# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or\n# other source files which should be copied to the LATEX_OUTPUT output\n# directory. Note that the files will be copied as-is; there are no commands or\n# markers available.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_EXTRA_FILES      =\n\n# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is\n# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will\n# contain links (just like the HTML output) instead of page references. This\n# makes the output suitable for online browsing using a PDF viewer.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nPDF_HYPERLINKS         = YES\n\n# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as\n# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX\n# files. Set this option to YES, to get a higher quality PDF documentation.\n#\n# See also section LATEX_CMD_NAME for selecting the engine.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nUSE_PDFLATEX           = YES\n\n# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode\n# command to the generated LaTeX files. This will instruct LaTeX to keep running\n# if errors occur, instead of asking the user for help. This option is also used\n# when generating formulas in HTML.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_BATCHMODE        = NO\n\n# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the\n# index chapters (such as File Index, Compound Index, etc.) in the output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_HIDE_INDICES     = NO\n\n# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source\n# code with syntax highlighting in the LaTeX output.\n#\n# Note that which sources are shown also depends on other settings such as\n# SOURCE_BROWSER.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_SOURCE_CODE      = NO\n\n# The LATEX_BIB_STYLE tag can be used to specify the style to use for the\n# bibliography, e.g. plainnat, or ieeetr. See\n# https://en.wikipedia.org/wiki/BibTeX and \\cite for more info.\n# The default value is: plain.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_BIB_STYLE        = plain\n\n# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated\n# page will contain the date and time when the page was generated. Setting this\n# to NO can help when comparing the output of multiple runs.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_TIMESTAMP        = NO\n\n# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)\n# path from which the emoji images will be read. If a relative path is entered,\n# it will be relative to the LATEX_OUTPUT directory. If left blank the\n# LATEX_OUTPUT directory will be used.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_EMOJI_DIRECTORY  =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the RTF output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The\n# RTF output is optimized for Word 97 and may not look too pretty with other RTF\n# readers/editors.\n# The default value is: NO.\n\nGENERATE_RTF           = NO\n\n# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: rtf.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_OUTPUT             = rtf\n\n# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF\n# documents. This may be useful for small projects and may help to save some\n# trees in general.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nCOMPACT_RTF            = NO\n\n# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will\n# contain hyperlink fields. The RTF file will contain links (just like the HTML\n# output) instead of page references. This makes the output suitable for online\n# browsing using Word or some other Word compatible readers that support those\n# fields.\n#\n# Note: WordPad (write) and others do not support links.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_HYPERLINKS         = NO\n\n# Load stylesheet definitions from file. Syntax is similar to doxygen's\n# configuration file, i.e. a series of assignments. You only have to provide\n# replacements, missing definitions are set to their default value.\n#\n# See also section \"Doxygen usage\" for information on how to generate the\n# default style sheet that doxygen normally uses.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_STYLESHEET_FILE    =\n\n# Set optional variables used in the generation of an RTF document. Syntax is\n# similar to doxygen's configuration file. A template extensions file can be\n# generated using doxygen -e rtf extensionFile.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_EXTENSIONS_FILE    =\n\n# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code\n# with syntax highlighting in the RTF output.\n#\n# Note that which sources are shown also depends on other settings such as\n# SOURCE_BROWSER.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_SOURCE_CODE        = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the man page output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for\n# classes and files.\n# The default value is: NO.\n\nGENERATE_MAN           = NO\n\n# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it. A directory man3 will be created inside the directory specified by\n# MAN_OUTPUT.\n# The default directory is: man.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_OUTPUT             = man\n\n# The MAN_EXTENSION tag determines the extension that is added to the generated\n# man pages. In case the manual section does not start with a number, the number\n# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is\n# optional.\n# The default value is: .3.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_EXTENSION          = .3\n\n# The MAN_SUBDIR tag determines the name of the directory created within\n# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by\n# MAN_EXTENSION with the initial . removed.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_SUBDIR             =\n\n# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it\n# will generate one additional man file for each entity documented in the real\n# man page(s). These additional files only source the real man page, but without\n# them the man command would be unable to find the correct page.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_LINKS              = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the XML output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that\n# captures the structure of the code including all documentation.\n# The default value is: NO.\n\nGENERATE_XML           = YES\n\n# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: xml.\n# This tag requires that the tag GENERATE_XML is set to YES.\n\nXML_OUTPUT             = xml\n\n# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program\n# listings (including syntax highlighting and cross-referencing information) to\n# the XML output. Note that enabling this will significantly increase the size\n# of the XML output.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_XML is set to YES.\n\nXML_PROGRAMLISTING     = YES\n\n# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include\n# namespace members in file scope as well, matching the HTML output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_XML is set to YES.\n\nXML_NS_MEMB_FILE_SCOPE = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the DOCBOOK output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files\n# that can be used to generate PDF.\n# The default value is: NO.\n\nGENERATE_DOCBOOK       = NO\n\n# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.\n# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in\n# front of it.\n# The default directory is: docbook.\n# This tag requires that the tag GENERATE_DOCBOOK is set to YES.\n\nDOCBOOK_OUTPUT         = docbook\n\n# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the\n# program listings (including syntax highlighting and cross-referencing\n# information) to the DOCBOOK output. Note that enabling this will significantly\n# increase the size of the DOCBOOK output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_DOCBOOK is set to YES.\n\nDOCBOOK_PROGRAMLISTING = NO\n\n#---------------------------------------------------------------------------\n# Configuration options for the AutoGen Definitions output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an\n# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures\n# the structure of the code including all documentation. Note that this feature\n# is still experimental and incomplete at the moment.\n# The default value is: NO.\n\nGENERATE_AUTOGEN_DEF   = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the Perl module output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module\n# file that captures the structure of the code including all documentation.\n#\n# Note that this feature is still experimental and incomplete at the moment.\n# The default value is: NO.\n\nGENERATE_PERLMOD       = NO\n\n# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary\n# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI\n# output from the Perl module output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_PERLMOD is set to YES.\n\nPERLMOD_LATEX          = NO\n\n# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely\n# formatted so it can be parsed by a human reader. This is useful if you want to\n# understand what is going on. On the other hand, if this tag is set to NO, the\n# size of the Perl module output will be much smaller and Perl will parse it\n# just the same.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_PERLMOD is set to YES.\n\nPERLMOD_PRETTY         = YES\n\n# The names of the make variables in the generated doxyrules.make file are\n# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful\n# so different doxyrules.make files included by the same Makefile don't\n# overwrite each other's variables.\n# This tag requires that the tag GENERATE_PERLMOD is set to YES.\n\nPERLMOD_MAKEVAR_PREFIX =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the preprocessor\n#---------------------------------------------------------------------------\n\n# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all\n# C-preprocessor directives found in the sources and include files.\n# The default value is: YES.\n\nENABLE_PREPROCESSING   = YES\n\n# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names\n# in the source code. If set to NO, only conditional compilation will be\n# performed. Macro expansion can be done in a controlled way by setting\n# EXPAND_ONLY_PREDEF to YES.\n# The default value is: NO.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nMACRO_EXPANSION        = YES\n\n# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then\n# the macro expansion is limited to the macros specified with the PREDEFINED and\n# EXPAND_AS_DEFINED tags.\n# The default value is: NO.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nEXPAND_ONLY_PREDEF     = NO\n\n# If the SEARCH_INCLUDES tag is set to YES, the include files in the\n# INCLUDE_PATH will be searched if a #include is found.\n# The default value is: YES.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nSEARCH_INCLUDES        = YES\n\n# The INCLUDE_PATH tag can be used to specify one or more directories that\n# contain include files that are not input files but should be processed by the\n# preprocessor.\n# This tag requires that the tag SEARCH_INCLUDES is set to YES.\n\nINCLUDE_PATH           =\n\n# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard\n# patterns (like *.h and *.hpp) to filter out the header-files in the\n# directories. If left blank, the patterns specified with FILE_PATTERNS will be\n# used.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nINCLUDE_FILE_PATTERNS  =\n\n# The PREDEFINED tag can be used to specify one or more macro names that are\n# defined before the preprocessor is started (similar to the -D option of e.g.\n# gcc). The argument of the tag is a list of macros of the form: name or\n# name=definition (no spaces). If the definition and the \"=\" are omitted, \"=1\"\n# is assumed. To prevent a macro definition from being undefined via #undef or\n# recursively expanded use the := operator instead of the = operator.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nPREDEFINED             =\n\n# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this\n# tag can be used to specify a list of macro names that should be expanded. The\n# macro definition that is found in the sources will be used. Use the PREDEFINED\n# tag if you want to use a different macro definition that overrules the\n# definition found in the source code.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nEXPAND_AS_DEFINED      =\n\n# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will\n# remove all references to function-like macros that are alone on a line, have\n# an all uppercase name, and do not end with a semicolon. Such function macros\n# are typically used for boiler-plate code, and will confuse the parser if not\n# removed.\n# The default value is: YES.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nSKIP_FUNCTION_MACROS   = YES\n\n#---------------------------------------------------------------------------\n# Configuration options related to external references\n#---------------------------------------------------------------------------\n\n# The TAGFILES tag can be used to specify one or more tag files. For each tag\n# file the location of the external documentation should be added. The format of\n# a tag file without this location is as follows:\n# TAGFILES = file1 file2 ...\n# Adding location for the tag files is done as follows:\n# TAGFILES = file1=loc1 \"file2 = loc2\" ...\n# where loc1 and loc2 can be relative or absolute paths or URLs. See the\n# section \"Linking to external documentation\" for more information about the use\n# of tag files.\n# Note: Each tag file must have a unique name (where the name does NOT include\n# the path). If a tag file is not located in the directory in which doxygen is\n# run, you must also specify the path to the tagfile here.\n\nTAGFILES               =\n\n# When a file name is specified after GENERATE_TAGFILE, doxygen will create a\n# tag file that is based on the input files it reads. See section \"Linking to\n# external documentation\" for more information about the usage of tag files.\n\nGENERATE_TAGFILE       =\n\n# If the ALLEXTERNALS tag is set to YES, all external class will be listed in\n# the class index. If set to NO, only the inherited external classes will be\n# listed.\n# The default value is: NO.\n\nALLEXTERNALS           = NO\n\n# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed\n# in the modules index. If set to NO, only the current project's groups will be\n# listed.\n# The default value is: YES.\n\nEXTERNAL_GROUPS        = YES\n\n# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in\n# the related pages index. If set to NO, only the current project's pages will\n# be listed.\n# The default value is: YES.\n\nEXTERNAL_PAGES         = YES\n\n#---------------------------------------------------------------------------\n# Configuration options related to the dot tool\n#---------------------------------------------------------------------------\n\n# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram\n# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to\n# NO turns the diagrams off. Note that this option also works with HAVE_DOT\n# disabled, but it is recommended to install and use dot, since it yields more\n# powerful graphs.\n# The default value is: YES.\n\nCLASS_DIAGRAMS         = NO\n\n# You can include diagrams made with dia in doxygen documentation. Doxygen will\n# then run dia to produce the diagram and insert it in the documentation. The\n# DIA_PATH tag allows you to specify the directory where the dia binary resides.\n# If left empty dia is assumed to be found in the default search path.\n\nDIA_PATH               =\n\n# If set to YES the inheritance and collaboration graphs will hide inheritance\n# and usage relations if the target is undocumented or is not a class.\n# The default value is: YES.\n\nHIDE_UNDOC_RELATIONS   = YES\n\n# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is\n# available from the path. This tool is part of Graphviz (see:\n# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent\n# Bell Labs. The other options in this section have no effect if this option is\n# set to NO\n# The default value is: YES.\n\nHAVE_DOT               = NO\n\n# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed\n# to run in parallel. When set to 0 doxygen will base this on the number of\n# processors available in the system. You can set it explicitly to a value\n# larger than 0 to get control over the balance between CPU load and processing\n# speed.\n# Minimum value: 0, maximum value: 32, default value: 0.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_NUM_THREADS        = 0\n\n# When you want a differently looking font in the dot files that doxygen\n# generates you can specify the font name using DOT_FONTNAME. You need to make\n# sure dot is able to find the font, which can be done by putting it in a\n# standard location or by setting the DOTFONTPATH environment variable or by\n# setting DOT_FONTPATH to the directory containing the font.\n# The default value is: Helvetica.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_FONTNAME           = Helvetica\n\n# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of\n# dot graphs.\n# Minimum value: 4, maximum value: 24, default value: 10.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_FONTSIZE           = 10\n\n# By default doxygen will tell dot to use the default font as specified with\n# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set\n# the path where dot can find it using this tag.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_FONTPATH           =\n\n# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for\n# each documented class showing the direct and indirect inheritance relations.\n# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCLASS_GRAPH            = YES\n\n# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a\n# graph for each documented class showing the direct and indirect implementation\n# dependencies (inheritance, containment, and class references variables) of the\n# class with other documented classes.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCOLLABORATION_GRAPH    = YES\n\n# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for\n# groups, showing the direct groups dependencies.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nGROUP_GRAPHS           = YES\n\n# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and\n# collaboration diagrams in a style similar to the OMG's Unified Modeling\n# Language.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nUML_LOOK               = NO\n\n# If the UML_LOOK tag is enabled, the fields and methods are shown inside the\n# class node. If there are many fields or methods and many nodes the graph may\n# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the\n# number of items for each type to make the size more manageable. Set this to 0\n# for no limit. Note that the threshold may be exceeded by 50% before the limit\n# is enforced. So when you set the threshold to 10, up to 15 fields may appear,\n# but if the number exceeds 15, the total amount of fields shown is limited to\n# 10.\n# Minimum value: 0, maximum value: 100, default value: 10.\n# This tag requires that the tag UML_LOOK is set to YES.\n\nUML_LIMIT_NUM_FIELDS   = 10\n\n# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and\n# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS\n# tag is set to YES, doxygen will add type and arguments for attributes and\n# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen\n# will not generate fields with class member information in the UML graphs. The\n# class diagrams will look similar to the default class diagrams but using UML\n# notation for the relationships.\n# Possible values are: NO, YES and NONE.\n# The default value is: NO.\n# This tag requires that the tag UML_LOOK is set to YES.\n\nDOT_UML_DETAILS        = NO\n\n# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters\n# to display on a single line. If the actual line length exceeds this threshold\n# significantly it will wrapped across multiple lines. Some heuristics are apply\n# to avoid ugly line breaks.\n# Minimum value: 0, maximum value: 1000, default value: 17.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_WRAP_THRESHOLD     = 17\n\n# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and\n# collaboration graphs will show the relations between templates and their\n# instances.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nTEMPLATE_RELATIONS     = NO\n\n# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to\n# YES then doxygen will generate a graph for each documented file showing the\n# direct and indirect include dependencies of the file with other documented\n# files.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nINCLUDE_GRAPH          = YES\n\n# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are\n# set to YES then doxygen will generate a graph for each documented file showing\n# the direct and indirect include dependencies of the file with other documented\n# files.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nINCLUDED_BY_GRAPH      = YES\n\n# If the CALL_GRAPH tag is set to YES then doxygen will generate a call\n# dependency graph for every global function or class method.\n#\n# Note that enabling this option will significantly increase the time of a run.\n# So in most cases it will be better to enable call graphs for selected\n# functions only using the \\callgraph command. Disabling a call graph can be\n# accomplished by means of the command \\hidecallgraph.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCALL_GRAPH             = NO\n\n# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller\n# dependency graph for every global function or class method.\n#\n# Note that enabling this option will significantly increase the time of a run.\n# So in most cases it will be better to enable caller graphs for selected\n# functions only using the \\callergraph command. Disabling a caller graph can be\n# accomplished by means of the command \\hidecallergraph.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCALLER_GRAPH           = NO\n\n# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical\n# hierarchy of all classes instead of a textual one.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nGRAPHICAL_HIERARCHY    = YES\n\n# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the\n# dependencies a directory has on other directories in a graphical way. The\n# dependency relations are determined by the #include relations between the\n# files in the directories.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDIRECTORY_GRAPH        = YES\n\n# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images\n# generated by dot. For an explanation of the image formats see the section\n# output formats in the documentation of the dot tool (Graphviz (see:\n# http://www.graphviz.org/)).\n# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order\n# to make the SVG files visible in IE 9+ (other browsers do not have this\n# requirement).\n# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,\n# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,\n# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,\n# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and\n# png:gdiplus:gdiplus.\n# The default value is: png.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_IMAGE_FORMAT       = png\n\n# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to\n# enable generation of interactive SVG images that allow zooming and panning.\n#\n# Note that this requires a modern browser other than Internet Explorer. Tested\n# and working are Firefox, Chrome, Safari, and Opera.\n# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make\n# the SVG files visible. Older versions of IE do not have SVG support.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nINTERACTIVE_SVG        = NO\n\n# The DOT_PATH tag can be used to specify the path where the dot tool can be\n# found. If left blank, it is assumed the dot tool can be found in the path.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_PATH               =\n\n# The DOTFILE_DIRS tag can be used to specify one or more directories that\n# contain dot files that are included in the documentation (see the \\dotfile\n# command).\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOTFILE_DIRS           =\n\n# The MSCFILE_DIRS tag can be used to specify one or more directories that\n# contain msc files that are included in the documentation (see the \\mscfile\n# command).\n\nMSCFILE_DIRS           =\n\n# The DIAFILE_DIRS tag can be used to specify one or more directories that\n# contain dia files that are included in the documentation (see the \\diafile\n# command).\n\nDIAFILE_DIRS           =\n\n# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the\n# path where java can find the plantuml.jar file. If left blank, it is assumed\n# PlantUML is not used or called during a preprocessing step. Doxygen will\n# generate a warning when it encounters a \\startuml command in this case and\n# will not generate output for the diagram.\n\nPLANTUML_JAR_PATH      =\n\n# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a\n# configuration file for plantuml.\n\nPLANTUML_CFG_FILE      =\n\n# When using plantuml, the specified paths are searched for files specified by\n# the !include statement in a plantuml block.\n\nPLANTUML_INCLUDE_PATH  =\n\n# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes\n# that will be shown in the graph. If the number of nodes in a graph becomes\n# larger than this value, doxygen will truncate the graph, which is visualized\n# by representing a node as a red box. Note that doxygen if the number of direct\n# children of the root node in a graph is already larger than\n# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that\n# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.\n# Minimum value: 0, maximum value: 10000, default value: 50.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_GRAPH_MAX_NODES    = 50\n\n# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs\n# generated by dot. A depth value of 3 means that only nodes reachable from the\n# root by following a path via at most 3 edges will be shown. Nodes that lay\n# further from the root node will be omitted. Note that setting this option to 1\n# or 2 may greatly reduce the computation time needed for large code bases. Also\n# note that the size of a graph can be further restricted by\n# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.\n# Minimum value: 0, maximum value: 1000, default value: 0.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nMAX_DOT_GRAPH_DEPTH    = 0\n\n# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent\n# background. This is disabled by default, because dot on Windows does not seem\n# to support this out of the box.\n#\n# Warning: Depending on the platform used, enabling this option may lead to\n# badly anti-aliased labels on the edges of a graph (i.e. they become hard to\n# read).\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_TRANSPARENT        = NO\n\n# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output\n# files in one run (i.e. multiple -o and -T options on the command line). This\n# makes dot run faster, but since only newer versions of dot (>1.8.10) support\n# this, this feature is disabled by default.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_MULTI_TARGETS      = NO\n\n# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page\n# explaining the meaning of the various boxes and arrows in the dot generated\n# graphs.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nGENERATE_LEGEND        = YES\n\n# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate\n# files that are used to generate the various graphs.\n#\n# Note: This setting is not only used for dot files but also for msc and\n# plantuml temporary files.\n# The default value is: YES.\n\nDOT_CLEANUP            = YES\n"
  },
  {
    "path": "docs/export.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n```{code-cell}\n:tags: [hide-input]\n\nfrom IPython.display import display\n```\n\n(sec_export)=\n\n# Data export\n\n(sec_export_vcf)=\n## Variant Call Format\n\nTskit supports exporting data to the standard\n[Variant Call Format](http://samtools.github.io/hts-specs/VCFv4.3.pdf)\nvia the `tskit vcf` {ref}`command line interface<sec_cli>` command\nand the {meth}`TreeSequence.write_vcf` method in the {ref}`sec_python_api`.\nConversion is quite efficient, with tskit producing VCF data at several\nhundred megabytes per second (for large files), which is usually as fast as\nit can be written to storage or consumed by programs in a pipeline.\n\n::::{tip}\nIf we have a tree sequence file the\n{ref}`command line interface<sec_cli>` is often the most\nconvenient way to convert to VCF:\n\n:::{code-block} bash\ntskit vcf example.trees > example.vcf\n:::\n\nSee the {ref}`sec_export_vcf_compression` section for information\non how to compress the VCF output.\n::::\n\nFor tree sequences produced by recent versions of programs such as\n``msprime``, ``SLiM``, ``fwdpy11`` or ``tsinfer``, VCF output will\n\"do the right thing\" and no further arguments are needed.\nFor example, here we simulate 3 diploid individuals\nwith mutations using ``msprime``, and convert to VCF.\n\n```{code-cell}\nimport sys\nimport msprime\nts = msprime.sim_ancestry(\n    samples=3, ploidy=2, sequence_length=10, random_seed=2)\nts = msprime.sim_mutations(ts, rate=0.1, random_seed=2)\nts.write_vcf(sys.stdout)\n```\n\nIn the output VCF we have 3 diploid samples\n(see the {ref}`sec_export_vcf_terminology` section)\ncorresponding to samples specified in the ancestry simulation\nwith IDs ``tsk_0``, ``tsk_1`` and ``tsk_2``\n(see the {ref}`sec_export_vcf_individual_names`\nsection for how to override these default labels).\nWe then have a line for every row\nin the {ref}`site table<sec_site_table_definition>`, and\nthe data is derived directly from the {meth}`TreeSequence.variants`\nmethod; e.g.\n\n```{code-cell}\nfor var in ts.variants():\n    print(var.site.position, var.site.id, var.alleles, var.genotypes, sep=\"\\t\")\n```\n\nWe can see the ``POS`` value is equal to the site's position\n(see the {ref}`sec_export_vcf_modifying_coordinates` for information\non how we deal with continuous coordinates), the ``ID`` value\nis the site's ID, and the ``REF`` and ``ALT`` values\nare derived from the variant's ``alleles``.\n\nThe ``GT`` values for the three diploid individuals are derived from the\nvariant's genotypes (see the {ref}`sec_export_vcf_terminology` section).\nFor this simulation, the diploid individuals correspond to\nadjacent sample nodes in order, and we can see there is a direct\ncorrespondence between the phased ``GT`` values and variant's genotypes.\nSee the {ref}`sec_export_vcf_constructing_gt` section for\nmore information on how this done in general and for options\nto control the VCF sample and ``GT`` values.\n\n::::{important}\nIn these examples we write the VCF data to ``sys.stdout`` so that we can see\nthe output. Usually, however, you'd write to a file:\n\n:::{code-block}\nwith open(\"output.vcf\", \"w\") as vcf_file:\n    ts.write_vcf(vcf_file)\n:::\n\n:::{seealso}\nSee the {ref}`sec_export_vcf_compression` section for information\non how to compress the output or convert to BCF.\n:::\n\n::::\n\n(sec_export_vcf_terminology)=\n\n### Terminology\n\nThere are some mismatches between the terminology for tskit and VCF.\nIn VCF a \"sample\" is a multiploid individual, but in tskit a sample\nrefers to a single **node** (monoploid genome), and an individual\nconsists of one or more nodes (e.g., two nodes for a diploid).\nSimilarly, in VCF a \"genotype\" refers to the observed allelic state\nfor a sample **individual** at a particular site,\nwhereas in tskit a genotype is the observed allelic state\nfor a **node** (see {attr}`.Variant.genotypes`).\n\n:::{seealso}\nSee the {ref}`sec_glossary` for more details on tskit's data model\n and terminology.\n:::\n\n(sec_export_vcf_compression)=\n\n### Compressed output\n\nThe simplest way to compress the VCF output is to use the\n`tskit vcf` {ref}`command line interface<sec_cli>`\nand pipe the output to `bgzip`:\n\n:::{code-block} bash\ntskit vcf example.trees | bgzip -c > example.vcf.gz\n:::\nA general way to convert VCF data to various formats is to pipe the text\nproduced by ``tskit`` into ``bcftools`` using the command\nline interface:\n\n:::{code-block} bash\ntskit vcf example.trees | bcftools view -O b > example.bcf\n:::\n\nIf you need more control over the form of the output (or want to work\ndirectly in Python), the following recipe has the same effect:\n\n:::{code-block}\n\nimport os\nimport subprocess\n\nread_fd, write_fd = os.pipe()\nwrite_pipe = os.fdopen(write_fd, \"w\")\nwith open(\"output.bcf\", \"w\") as bcf_file:\n    proc = subprocess.Popen(\n        [\"bcftools\", \"view\", \"-O\", \"b\"], stdin=read_fd, stdout=bcf_file\n    )\n    ts.write_vcf(write_pipe)\n    write_pipe.close()\n    os.close(read_fd)\n    proc.wait()\n    if proc.returncode != 0:\n        raise RuntimeError(\"bcftools failed with status:\", proc.returncode)\n:::\n\n\nThe VCF output can also be compressed using the {mod}`gzip` Python module:\n\n:::{code-block}\n\nimport gzip\n\nwith gzip.open(\"output.vcf.gz\", \"wt\") as f:\n    ts.write_vcf(f)\n:::\n\nHowever, this gzipped VCF won't be fully compatible with downstream tools\nsuch as tabix, which usually require the VCF to use the specialised bgzip format.\n\n(sec_export_vcf_masking_output)=\n\n### Masking output\n\nThe {meth}`TreeSequence.write_vcf` method provides the\n``site_mask`` and ``sample_mask`` arguments to\nomit or mark parts of the output as missing.\n\n```{code-cell}\nts = msprime.sim_ancestry(\n    samples=3, ploidy=2, sequence_length=10, random_seed=2)\nts = msprime.sim_mutations(ts, rate=0.1, random_seed=2)\nts.tables.sites\n```\n\nThe ``sample_mask`` argument provides a general way to mask out\nparts of the output, which can be helpful when simulating missing\ndata. In this (contrived) example, we create a sample mask function\nthat marks one genotype missing in each variant in a regular\npattern:\n\n:::{code-block}\n\ndef sample_mask(variant):\n    sample_mask = np.zeros(ts.num_samples, dtype=bool)\n    sample_mask[variant.site.id % ts.num_samples] = 1\n    return sample_mask\n\n\nts.write_vcf(sys.stdout, sample_mask=sample_mask)\n:::\n\n(sec_export_vcf_constructing_gt)=\n\n### Constructing GT values\n\nThe core elements of the tskit\n{ref}`data model<sec_data_model>`\nare {ref}`nodes<sec_node_table_definition>`,\n{ref}`edges<sec_node_table_definition>`,\n{ref}`sites<sec_site_table_definition>` and\n{ref}`mutations<sec_mutation_table_definition>`.\nThese four tables allow us to completely describe the\ngenetic ancestry of a set of sampled monoploid\ngenomes and their genetic variation.\nThe {ref}`individual table<sec_individual_table_definition>`\ndefines a set of individual *organisms*, and it can\nbe used to define the inheritance relationships between\nthen (the pedigree). An individual may be associated\nwith one or more nodes, and these nodes may or\nmay not be samples (see the {ref}`sec_glossary`\nfor clarification of these terms).\nThus, there is some complexity in how the per-individual GT values\nare generated, which we explain in this section.\n\n#### Without individuals\n\nWe start with an example in which there are no individuals\ndefined (which was the default in msprime before version 1.0):\n\n```{code-cell}\nimport tskit\ntables = tskit.Tree.generate_balanced(4, span=10).tree_sequence.dump_tables()\ntables.sites.add_row(3, ancestral_state=\"A\")\ntables.mutations.add_row(site=0, node=0, derived_state=\"T\")\nts = tables.tree_sequence()\ndisplay(ts.draw_svg())\ndisplay(ts)\nts.write_vcf(sys.stdout)\n```\n\nHere we define a tree sequence consisting of a single tree, which\nhas a variant site at position 3 and a mutation over node 0.\nThere is no information about individuals in this tree sequence,\nand so we assume that each of the nodes corresponds to a single\nhaploid individual.\n\nUsers of msprime simulations would often be interested in producing\nVCFs for diploid organisms. Because of the assumptions made\nby these simulations, this means arbitrarily combining the sample\nnodes into pairs. This is what the ``ploidy`` option does:\n\n```{code-cell}\nts.write_vcf(sys.stdout, ploidy=2)\n```\n\nThus, the ``GT`` values for the (synthetic) diploid individual ``tsk_0``\nare generated by combining nodes 0 and 1, and ``tsk_1``\nby combining nodes 2 and 3.\n\n:::{important}\nSoftware packages modelling multiploid individuals are encouraged to\nuse the individual table to make their assumptions explicit. Recent\nversions of simulators and inference methods should all do this,\nand so the ``ploidy`` argument is really only intended to support\nlegacy code. It is therefore an error to supply a value for ``ploidy``\nwhen individual information is present in a tree sequence.\n:::\n\n#### With individuals\n\nExtending the example in the previous section, we add some individual data\ndefining a pair of diploid sibs and their parents.\n\n:::{note}\nWe set the nodes for (e.g.) individual 2 to [1, 3] here to illustrate\nthat nodes for a given individual are not necessarily contiguous.\n:::\n\n```{code-cell}\ntables.individuals.add_row(parents=[-1, -1])\ntables.individuals.add_row(parents=[-1, -1])\ntables.individuals.add_row(parents=[0, 1])\ntables.individuals.add_row(parents=[0, 1])\nnode_individual = tables.nodes.individual\nnode_individual[[1, 3]] = 2\nnode_individual[[0, 2]] = 3\ntables.nodes.individual = node_individual\ndisplay(tables.individuals)\ndisplay(tables.nodes)\nts = tables.tree_sequence()\nts.write_vcf(sys.stdout)\n```\n\nIn this model we have four individuals defined, but only\nindividuals 2 and 3 are associated with nodes (more specifically,\n**sample** nodes). Thus, we output **two** VCF sample individuals\ncomposed of the linked nodes.\n\n:::{note}\nNote that the labels are ``tsk_0`` and ``tsk_1`` even though\nthe individual IDs are 2 and 3. See the\n{ref}`sec_export_vcf_individual_names` section for how to change the\nthese default labels.\n:::\n\nIf some individuals have no associated nodes, they are omitted from the\nVCF output. By default, only nodes that are marked as samples contribute\nto the VCF genotypes; to include non-sample nodes as well (e.g., internal\nnodes that have been marked as individuals), set\n``include_non_sample_nodes=True`` when calling :meth:`TreeSequence.write_vcf`.\n\n:::{note}\nAt present, :meth:`TreeSequence.write_vcf` only supports sites with up to\n9 distinct alleles; attempting to write a site with more than 9 alleles\nwill result in a :class:`ValueError`.\n:::\n\n(sec_export_vcf_individual_names)=\n\n### Individual names\n\nBy default the VCF samples are given the labels ``tsk_0``, ``tsk_1``,\n..., ``tsk_{N - 1}``, where ``N`` is the number of individuals to\nbe output (see the {ref}`sec_export_vcf_constructing_gt` section).\n\nWe can change this default labelling using the ``individual_names``\nargument::\n\n```{code-cell}\nimport sys\nimport msprime\nts = msprime.sim_ancestry(\n    samples=3, ploidy=2, sequence_length=10, random_seed=2)\nts = msprime.sim_mutations(ts, rate=0.1, random_seed=2)\nts.write_vcf(sys.stdout, individual_names=[\"A\", \"B\", \"C\"])\n```\n\n#### Exporting to plink\n\nThe default VCF sample IDs produced by ``tskit`` do not work well\nwith plink because it parses the individual\nIDs based on a particular format, and does not allow ``0`` as a valid\nidentifier. We get an error like this:\n\n```\nError: Sample ID ends with \"_0\", which induces an invalid IID of '0`.\n```\n\nThis can be fixed by using the ``individual_names`` argument\nto set the names to anything where the first name doesn't end with ``_0``.\nAn example implementation for diploid individuals is:\n\n:::{code-block}\nn_dip_indv = int(ts.num_samples / 2)\nindv_names = [f\"tsk_{i}indv\" for i in range(n_dip_indv)]\nwith open(\"output.vcf\", \"w\") as vcf_file:\n    ts.write_vcf(vcf_file, individual_names=indv_names)\n:::\n\nAdding a second ``_`` (eg: ``tsk_0_indv``) is not recommended as\n``plink`` uses ``_`` as the default separator for separating family\nid and individual id, and two underscores will throw an error.\n\n(sec_export_vcf_modifying_coordinates)=\n\n### Modifying coordinates\n\nTree sequence site positions can be floating point values, whereas VCF\nrequires positive integers. The ``position_transform`` argument\ncontrols how tskit maps coordinates into VCF. Translating non-integer\npositions necessarily loses precision; by default we round to the nearest\ninteger, so multiple sites may share the same output position. \nFurthermore, tskit's coordinate system starts at zero,\nwhereas the VCF standard requires positions to be positive,\nand so a site at position 0 is not valid in the VCF standard.\nBecause VCF parsers differ, we do **not** do anything to account for this.\n\nThe simplest resolution of this discrepancy in convention between tskit and VCF\npositions is deal with any site at position 0 as a special case (for instance,\nby discarding or ignoring it).\nA different interpretation of this difference between tskit's position\nand VCF's POS field\nis that they are different coordinate systems: tskit coordinates are\n\"distance to the right of the left end of the chromosome\",\nwhile VCF coordinates are \"which number site, counting from the left end\nof the chromosome and starting at one\".\nUnder this interpretation, the solution is to supply an explicit\n``position_transform`` that adds 1 to the coordinate when outputting\nto VCF (or, using the ``\"legacy\"`` option described below). However, this can\neasily lead to off-by-one errors converting between the coordinate systems,\nso should only be used if you really are using 0-based coordinates for your\ntree sequence.\n\n:::{warning}\nMost VCF tools cannot deal with a POS value of 0. If your tree\nsequence contains a site with position 0, this will likely cause an error.\n:::\n\nInternally, the coordinates used in the VCF output are obtained by applying\nthe ``position_transform`` function to the array of site positions (and, for\nthe contig length, to the tree sequence :attr:`.TreeSequence.sequence_length`).\nThis function must return a one-dimensional array of the same length as its\ninput; otherwise a :class:`ValueError` is raised. In addition to accepting a\ncallable, tskit also supports the string value ``\"legacy\"`` here, which\nselects the pre-0.2.0 behaviour used by the original VCF exporter:\npositions are rounded to the nearest integer, starting at 1, and are forced\nto be strictly increasing by incrementing ties.\n\nThe VCF specification does not allow positions to be 0. By default, if any\ntransformed position is 0, :meth:`TreeSequence.write_vcf` will raise a\n:class:`ValueError`. If you wish to retain these records you can either:\n\n- set ``allow_position_zero=True`` to write such sites anyway;\n- mask the offending sites using the ``site_mask`` argument; or\n- choose a ``position_transform`` that maps 0 to a valid positive position.\n\nFor example, to shift all coordinates by 1, we could define:\n\n```{code-cell}\ndef one_based_positions(positions):\n    return [int(round(x)) + 1 for x in positions]\n\nts.write_vcf(sys.stdout, position_transform=one_based_positions)\n```\n\n:::{note}\nThe msprime 0.x legacy API simulates using continuous coordinates. It may\nbe simpler to update your code to use the msprime 1.0 API (which uses\ndiscrete coordinates by default) than to work out how to transform\ncoordinates in a way that is suitable for your application.\n:::\n"
  },
  {
    "path": "docs/file-formats.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n:::{currentmodule} tskit\n:::\n\n(sec_file_formats)=\n\n# File formats\n\n\n(sec_tree_sequence_file_format)=\n\n## Tree sequence file format\n\nTo make tree sequence data as efficient and easy as possible to use, we store the\ndata on file in a columnar, binary format. The format is based on the\n[kastore](https://pypi.org/project/kastore/) package, which is a simple\nkey-value store for numerical data. There is a one-to-one correspondence\nbetween the tables described above and the arrays stored in these files.\n\nBy convention, these files are given the `.trees` suffix (although this\nis not enforced in any way), and we will sometimes refer to them as \".trees\"\nfiles. We also refer to them as \"tree sequence files\".\n\n:::{todo}\nLink to the documentation for kastore, and describe the arrays that are\nstored as well as the top-level metadata. Note that a structured listing of\nall the data stored in a tree sequence file can be shown using\ne.g. ``python -m kastore ls file.trees``.\n:::\n\n\n### Legacy Versions\n\nTree sequence files are versioned. This version of tskit can read\n``.trees`` files produced by earlier releases that use the same *major*\nfile format version (see ``format/version`` in a kastore listing).\nFiles written using the pre-kastore HDF5 format (for example, by\nmsprime < 0.6.0 or tskit < 0.6.2) cannot be read directly. To convert\nsuch legacy files, use the ``tskit upgrade`` command from an older\ntskit version (< 0.6.2) to produce a modern ``.trees`` file.\n\n\n(sec_text_file_format)=\n\n## Text file formats\n\nThe tree sequence text file format is based on a simple whitespace\ndelimited approach. Each table corresponds to a single file, and is\ncomposed of a number of whitespace delimited columns. The first\nline of each file must be a **header** giving the names of each column.\nSubsequent rows must contain data for each of these columns, following\nthe usual conventions. Each table has a set of mandatory and optional columns which are\ndescribed below. The columns can be provided in any order, and extra columns\ncan be included in the file. Note, in particular, that this means that\nan `id` column may be present in any of these files, but it will be\nignored (IDs are always determined by the position of the row in a table).\n\nThe {meth}`load_text` method can be used to read tables in text format. This has been\nused to create the following very simple tree sequence, with four nodes, two trees,\nand three mutations at two sites, both on the first tree:\n\n\n```{code-cell} ipython3\n:tags: [\"hide-input\"]\n# TODO once https://github.com/tskit-dev/tskit/issues/1824 is solved\n# change the individual table to include some with blank parents / locations\nimport io\n\nimport tskit\nfrom IPython.display import SVG\n\nindividuals = \"\"\"\\\nflags       location     parents\n0           0.5,1.2      -1,-1\n0           1.0,3.4      0,-1\n0           3.5,6.3      0,1\n0           0.5          -1,-1\n0           0.5,0.5      2,3\n\"\"\"\n\nnodes = \"\"\"\\\nis_sample   individual   time\n1           0            0.0\n1           0            0.0\n0           -1           2.0\n0           -1           3.0\n\"\"\"\nedges = \"\"\"\\\nleft   right   parent  child\n0.0    7.0     2       0\n0.0    7.0     2       1\n7.0    10.0    3       0\n7.0    10.0    3       1\n\"\"\"\n\nsites = \"\"\"\\\nposition      ancestral_state\n2.0           AT\n4.0           A\n\"\"\"\n\nmutations = \"\"\"\\\nsite   node    derived_state    time    parent\n0      0       A                0.5     -1\n1      1       A                1.0     -1\n1      0       T                1.5     -1\n\"\"\"\n\nmigrations = \"\"\"\\\nleft   right   node   source   dest   time\n0.0    0.7     5      2        3      1.0\n0.8    0.9     8      3        4      3.0\n\"\"\"\n\npopulations = \"\"\"\\\nid   metadata\n0    cG9wMQ==\n1    cG9wMg==\n\"\"\"\n\nts = tskit.load_text(\n    individuals=io.StringIO(individuals),\n    nodes=io.StringIO(nodes),\n    edges=io.StringIO(edges),\n    sites=io.StringIO(sites),\n    mutations=io.StringIO(mutations),\n    # migrations=io.StringIO(migrations),  # uncomment when https://github.com/tskit-dev/tskit/issues/19 fixed\n    populations=io.StringIO(populations),\n    strict=False\n)\nSVG(ts.draw_svg(y_axis=True))\n\n```\n\nA deletion from AT to A has occurred at position 2 on the branch leading to\nnode 0, and two mutations have occurred at position 4 on the branch leading to\nnode 1, first from A to T, then a back mutation to A. The genotypes of our two\nsamples, nodes 0 and 1, are therefore AA and ATA. Note that this tree sequence\nalso contains entries in the individual, population,\nand migration tables, but this is not shown plot above.\n\n\n(sec_individual_text_format)=\n\n### Individual text format\n\nThe individual text format must contain a `flags` column.\nOptionally, there may also be `location`, `parents` and\n`metadata` columns. See the\n{ref}`individual table definitions<sec_individual_table_definition>`\nfor details on these columns.\n\nNote that there are currently no globally defined `flags`, but the column\nis still required; a value of `0` means that there are no flags set.\n\nThe `location` and `parents` columns should be a sequence of comma-separated numeric\nvalues. They do not all have to be the same length.\n\n```{code-cell} python\n:tags: [\"hide-input\", \"output-wide-tabs\"]\nimport sys\nfrom IPython.display import display, HTML\n\ndisplay(HTML(\"An example individual table:\"))\nts.dump_text(individuals=sys.stdout)\n```\n\n(sec_node_text_format)=\n\n### Node text format\n\nThe node text format must contain the columns `is_sample` and\n`time`. Optionally, there may also be `population`, `individual`, and\n`metadata` columns. See the\n{ref}`node table definitions<sec_node_table_definition>` for details on these columns.\n\nNote that we do not have a `flags` column in the text file format, but\ninstead use `is_sample` (which may be 0 or 1). Currently, `NODE_IS_SAMPLE` is\nthe only flag value defined for nodes, and as more flags are defined we will\nallow for extra columns in the text format.\n\n```{code-cell} ipython3\n:tags: [\"hide-input\", \"output-wide-tabs\"]\ndisplay(HTML(\"An example node table:\"))\nts.dump_text(nodes=sys.stdout)\n```\n\n\n(sec_edge_text_format)=\n\n### Edge text format\n\nThe edge text format must contain the columns `left`,\n`right`, `parent` and `child`. Optionally, there may also be\na `metadata` column.\nSee the {ref}`edge table definitions <sec_edge_table_definition>`\nfor details on these columns.\n\n```{code-cell} ipython3\n:tags: [\"hide-input\", \"output-wide-tabs\"]\ndisplay(HTML(\"An example edge table:\"))\nts.dump_text(edges=sys.stdout)\n```\n\n(sec_site_text_format)=\n\n### Site text format\n\nThe site text format must contain the columns `position` and\n`ancestral_state`. The `metadata` column may also be optionally\npresent. See the\n{ref}`site table definitions <sec_site_table_definition>`\nfor details on these columns.\n\n```{code-cell} ipython3\n:tags: [\"hide-input\", \"output-wide-tabs\"]\ndisplay(HTML(\"An example site table:\"))\nts.dump_text(sites=sys.stdout)\n```\n\n\n(sec_mutation_text_format)=\n\n### Mutation text format\n\nThe mutation text format must contain the columns `site`,\n`node` and `derived_state`. The `time`, `parent` and `metadata` columns\nmay also be optionally present (but `parent` must be specified if\nmore than one mutation occurs at the same site). If the `time` column\nis absent, the mutation times in the resulting tree sequence are set to\n{data}`tskit.UNKNOWN_TIME`, which is a numeric value that behaves like NaN.\nUnknown mutation times written out by\n{meth}`TreeSequence.dump_text` are represented in the text file by the\nliteral string ``\\\"unknown\\\"`` in the `time` column, and\n{func}`tskit.load_text` treats this string as `UNKNOWN_TIME` on input.\nSee the\n{ref}`mutation table definitions <sec_mutation_table_definition>`\nfor details on these columns.\n\n```{code-cell} ipython3\n:tags: [\"hide-input\", \"output-wide-tabs\"]\ndisplay(HTML(\"An example mutation table:\"))\nts.dump_text(mutations=sys.stdout)\n```\n\n\n(sec_migration_text_format)=\n\n### Migration text format\n\nThe migration text format must contain the columns `left`,\n`right`, `node`, `source`, `dest` and `time`. The `metadata` column\nmay also be optionally present. See the\n{ref}`migration table definitions <sec_migration_table_definition>`\nfor details on these columns.\n\n```{code-cell} ipython3\n:tags: [\"hide-input\", \"output-wide-tabs\"]\ndisplay(HTML(\"An example migration table:\"))\nprint(migrations)  # fixme\n# ts.dump_text(migrations=sys.stdout)\n```\n\n\n(sec_population_text_format)=\n\n### Population text format\n\nPopulation tables only have a `metadata` column, so the text format for\na population table requires there to be a `metadata` column. See the\n{ref}`population table definitions <sec_population_table_definition>` for\ndetails.\n\n```{code-cell} ipython3\n:tags: [\"hide-input\", \"output-wide-tabs\"]\ndisplay(HTML(\"An example population table:\"))\nts.dump_text(populations=sys.stdout)\n```\n\nThe `metadata` contains base64-encoded data (in this case, the strings\n`pop1` and `pop2`).\n"
  },
  {
    "path": "docs/glossary.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n:::{currentmodule} tskit\n:::\n\n\n(sec_glossary)=\n\n# Glossary\n\n(sec_data_model_definitions)=\n\n## Definitions\n\nHere are some definitions of some key ideas encountered in this documentation.\n\n(sec_data_model_definitions_tree)=\n\ntree\n: A \"gene tree\", i.e., the genealogical tree describing how a collection of\n  genomes (usually at the tips of the tree) are related to each other at some\n  chromosomal {ref}`position <sec_data_model_definitions_position>` or location.\n  As the trees may vary depending on this location, they are also known as \"local\n  trees\". See {ref}`sec_nodes_or_individuals` for discussion of what a \"genome\" is.\n\n(sec_data_model_definitions_tree_sequence)=\n\ntree sequence\n: A \"succinct tree sequence\" (or tree sequence, for brevity) is an object\n  that stores the genetic ancestry and mutational history of a set of\n  aligned DNA sequences or genomes. The name reflects the idea that a common\n  way to treat genetic ancestry is as a sequence of correlated\n  {ref}`trees <sec_data_model_definitions_tree>` at different chromosomal\n  {ref}`positions <sec_data_model_definitions_position>`.\n  Branches that are shared between these trees are efficiently stored as a\n  single {ref}`edge <sec_data_model_definitions_edge>`, and adjacent trees\n  may differ by only a few such edges. These edges connect\n  {ref}`nodes <sec_data_model_definitions_node>` (genomes) in\n  the tree sequence, forming  a\n  network or graph. Graphs of this sort are sometimes called ancestral\n  recombination graphs (ARGs), hence tree sequences provide a\n  flexible way to encode multiple types of ARG.\n\n(sec_data_model_definitions_node)=\n\nnode\n: Any point in a tree can be associated with a particular genome\n  in a particular ancestor, called a \"node\".  Since each node represents a\n  specific genome it has a unique `time`, thought of as its birth time. Nodes\n  may or may not correspond to branching points, either in a local\n  {ref}`tree <sec_data_model_definitions_tree>` or in the whole graph.\n  However a branching point must always be associated with a node.\n  See {ref}`sec_nodes_or_individuals` for discussion of what a \"node\"\n  represents.\n\n(sec_data_model_definitions_individual)=\n\nindividual\n: In certain situations we are interested in how nodes (representing\n  individual homologous genomes) are grouped together into individuals\n  (e.g. two nodes per diploid individual). For example, when we are working\n  with polyploid samples it is useful to associate metadata with a specific\n  individual rather than duplicate this information on the constituent nodes.\n  See {ref}`sec_nodes_or_individuals` for more discussion on this point.\n\n(sec_data_model_definitions_sample)=\n\nsample\n: The focal nodes of a tree sequence, usually thought of as those from which\n  we have obtained data. The specification of these affects various\n  methods: {meth}`TreeSequence.variants` and\n  {meth}`TreeSequence.haplotypes` will output the genotypes of the samples,\n  and {attr}`Tree.roots` only return roots ancestral to at least one\n  sample.\n  (This can be checked with {meth}`~Node.is_sample`;\n  see the {ref}`node table definitions <sec_node_table_definition>`\n  for information on how the sample\n  status a node is encoded in the `flags` column.)\n\n(sec_data_model_definitions_edge)=\n\nedge\n: The topology of a tree sequence is defined by a set of **edges**. Each\n  edge is a tuple `(left, right, parent, child)`, which records a\n  parent-child relationship among a pair of nodes on the\n  on the half-open interval `[left, right)` along the genome. The difference\n  between `left` and `right` is known as the \"span\" of the edge.\n\n(sec_data_model_definitions_site)=\n\nsite\n: Tree sequences can define the mutational state of nodes as well as their\n  topological relationships. A **site** is thought of as some\n  {ref}`position <sec_data_model_definitions_position>` along\n  the genome at which variation occurs. Each site is associated with\n  a unique position and ancestral state.\n\n(sec_data_model_definitions_mutation)=\n\nmutation\n: A mutation records the change of state at a particular site 'above'\n  a particular node (more precisely, along the branch between the node\n  in question and its parent). Each mutation is associated with a specific\n  site (which defines the position along the genome), a node (which defines\n  where it occurs within the tree at this position), and a derived state\n  (which defines the mutational state inherited by all nodes in the subtree\n  rooted at the focal node). In more complex situations in which we have\n  back or recurrent mutations, a mutation must also specify its 'parent'\n  mutation.\n\n(sec_data_model_definitions_migration)=\n\nmigration\n: An event at which a parent and child node were born in different populations.\n\n(sec_data_model_definitions_population)=\n\npopulation\n: A grouping of nodes, e.g., by sampling location.\n\n(sec_data_model_definitions_position)=\n\nposition\n: A location along the genome, from 0 to the \n  {ref}`sequence length<sec_data_model_definitions_sequence_length>`. In `tskit`\n  positions are stored as floating-point numbers, although it is common to\n  restrict positions to occur at discrete integer locations.\n\n(sec_data_model_definitions_provenance)=\n\nprovenance\n: An entry recording the origin and history of the data encoded in a tree sequence.\n\n(sec_data_model_definitions_ID)=\n\nID\n: In the set of interconnected tables that we define here, we refer\n  throughout to the IDs of particular entities. The ID of an\n  entity (e.g., a node) is defined by the position of the corresponding\n  row in the table. These positions are zero indexed. For example, if we\n  refer to node with ID zero, this corresponds to the node defined by the\n  first row in the node table.\n\n(sec_data_model_definitions_sequence_length)=\n\nsequence length\n: This value defines the coordinate space in which the edges and site positions\n  are defined. This is most often assumed to be equal to the largest\n  `right` coordinate in the edge table, but there are situations in which\n  we might wish to specify the sequence length explicitly.\n\n## Further discussion\n\n(sec_nodes_or_individuals)=\n\n### Nodes, Genomes, or Individuals?\n\nThe natural unit of biological analysis is (usually) the *individual*. However,\nmany organisms we study are diploid, and so each individual contains *two*\nhomologous copies of the entire genome, separately inherited from the two\nparental individuals. Since each monoploid copy of the genome is inherited separately,\neach diploid individual lies at the end of two distinct lineages, and so will\nbe represented by *two* places in any given genealogical tree. This makes it\ndifficult to precisely discuss tree sequences for diploids, as we have no\nsimple way to refer to the bundle of chromosomes that make up the \"copy of the\ngenome inherited from one particular parent\". For this reason, in this\ndocumentation we use the non-descriptive term \"node\" to refer to this concept\n-- and so, a diploid individual is composed of two nodes -- although we use the\nterm \"genome\" at times, for concreteness.\n\nSeveral properties naturally associated with individuals are in fact assigned\nto nodes in what follows: birth time and population. This is for two reasons:\nFirst, since coalescent simulations naturally lack a notion of polyploidy, earlier\nversions of `tskit` lacked the notion of an individual. Second, ancestral\nnodes are not naturally grouped together into individuals -- we know they must have\nexisted, but have no way of inferring this grouping, so in fact many nodes in\nan empirically-derived tree sequence will not be associated with individuals,\neven though their birth times might be inferred.\n\n"
  },
  {
    "path": "docs/ibd.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n\n(sec_identity)=\n\n# Identity by descent\n\nThe {meth}`.TreeSequence.ibd_segments` method allows us to compute\nsegments of identity by descent along a tree sequence.\n\n:::{note}\nThis documentation page is preliminary\n:::\n\n## Examples\n\nLet's take a simple tree sequence to illustrate the {meth}`.TreeSequence.ibd_segments`\nmethod and associated {ref}`sec_python_api_reference_identity`:\n\n```{code-cell}\n:tags: [hide-input]\n\nimport tskit\nimport io\nfrom IPython.display import SVG\n\nnodes = io.StringIO(\n    \"\"\"\\\n    id      is_sample   time\n    0       1           0\n    1       1           0\n    2       1           0\n    3       0           1\n    4       0           2\n    5       0           3\n    \"\"\"\n)\nedges = io.StringIO(\n    \"\"\"\\\n    left    right   parent  child\n    2     10     3       0\n    2     10     3       2\n    0     10     4       1\n    0     2      4       2\n    2     10     4       3\n    0     2      5       0\n    0     2      5       4\n    \"\"\"\n)\nts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\nSVG(ts.draw_svg())\n```\n\n### Definition\n\nA pair of nodes ``(u, v)`` has an IBD segment with a left and right\ncoordinate ``[left, right)`` and ancestral node ``a`` iff the most\nrecent common ancestor of the segment ``[left, right)`` in nodes ``u``\nand ``v`` is ``a``, and the segment has been inherited along the same\ngenealogical path (ie. it has not been broken by recombination). The\ndefinition of a \"genealogical path\" used here is\nthe sequence of edges, rather than nodes.\nSo, for instance, if ``u`` inherits a segment ``[x, z)`` from ``a``,\nbut that inheritance is represented by two edges,\none spanning ``[x, y)`` and the other spanning ``[y, z)``,\nthen this represents two genealogical paths,\nand any IBD segments would be split at ``y``.\nIn other words, the method assumes that the end\nof an edge represents a recombination,\nan assumption that may not reflect how the tree sequence\nis used -- see below for more discussion.\n\nThis definition is purely genealogical: it depends only on the tree\nsequence topology and node times, and does not inspect allelic\nstates or mutations. In particular, if we compute the MRCA of ``(u, v)``\nin each tree along the sequence, then (up to the additional refinement\nby genealogical path) the IBD segments are those\nthat share the same ancestor and paths to that\nancestor. Intervals in which ``u`` and ``v`` lie in different roots\nhave no MRCA and therefore do not contribute IBD segments.\n\nConsider the IBD segments that we get from our example tree sequence:\n\n```{code-cell}\nsegments = ts.ibd_segments(store_segments=True)\nfor pair, segment_list in segments.items():\n    print(pair, list(segment_list))\n```\n\nEach of the sample pairs (0, 1), (0, 2) and (1, 2) is associated with\ntwo IBD segments, representing the different paths from these sample\npairs to their common ancestor. Note in particular that (1, 2) has\n**two** IBD segments rather than one: even though the MRCA is\n4 in both cases, the paths from the samples to the MRCA are different\nin the left and right trees.\n\n\n### Data structures\n\nThe result of calling {meth}`.TreeSequence.ibd_segments` is an\n{class}`.IdentitySegments` class:\n\n```{code-cell}\nsegments = ts.ibd_segments()\nprint(segments)\n```\n\nBy default this class only stores the high-level summaries of the\nIBD segments discovered. As we can see in this example, we have a\ntotal of six segments and\nthe total span (i.e., the sum lengths of the genomic intervals spanned\nby IBD segments) is 30. In this default mode the object does not\nstore information about individual sample pairs, and methods that\ninspect per-pair information (such as indexing with ``[(a, b)]`` or\niterating over the mapping) will raise an\n``IdentityPairsNotStoredError``.\n\nIf required, we can get more detailed information about particular\nsegment pairs and the actual segments using the ``store_pairs``\nand ``store_segments`` arguments.\n\n:::{warning}\nOnly use the ``store_pairs`` and ``store_segments`` arguments if you\nreally need this information! The number of IBD segments can be\nvery large and storing them all requires a lot of memory. It is\nalso much faster to just compute the overall summaries, without\nneeding to store the actual lists.\n:::\n\n\n```{code-cell}\nsegments = ts.ibd_segments(store_pairs=True)\nfor pair, value in segments.items():\n    print(pair, \"::\", value)\n```\n\nNow we can see the more detailed breakdown of how the identity segments\nare distributed among the sample pairs. The {class}`.IdentitySegments`\nclass behaves like a dictionary, such that ``segments[(a, b)]`` will return\nthe {class}`.IdentitySegmentList` instance for that pair of samples:\n\n```{code-cell}\nseglist = segments[(0, 1)]\nprint(seglist)\n```\n\nIf we want to access the detailed information about the actual\nidentity segments, we must use the ``store_segments`` argument:\n\n```{code-cell}\nsegments = ts.ibd_segments(store_pairs=True, store_segments=True)\nsegments[(0, 1)]\n```\n\nWhen ``store_segments=True``, the {class}`.IdentitySegmentList` behaves\nlike a Python list, where each element is an instance of\n{class}`.IdentitySegment`. When only ``store_pairs=True`` is specified,\nthe number of segments and their total span are still available, but\nattempting to iterate over the list or access the per-segment arrays\nwill raise an ``IdentitySegmentsNotStoredError``.\n\n:::{warning}\nThe order of segments in an {class}`.IdentitySegmentList`\nis arbitrary, and may change in future versions.\n:::\n\n\n```{eval-rst}\n.. todo:: More examples using the other bits of the IdentitySegments\n    API here\n```\n\n### Controlling the sample sets\n\nBy default we get the IBD segments between all pairs of\n{ref}`sample<sec_data_model_definitions_sample>` nodes.\n\n#### IBD within a sample set\n\nWe can reduce this to pairs within a specific set using the\n``within`` argument:\n\n```{code-cell}\nsegments = ts.ibd_segments(within=[0, 2], store_pairs=True)\nprint(list(segments.keys()))\n```\n\nHere we have restricted attention to the samples with node IDs 0 and 2,\nso only the pair ``(0, 2)`` appears in the result. In general:\n\n- ``within`` should be a one-dimensional array-like of node IDs\n  (typically sample nodes). All unordered pairs from this set are\n  considered.\n- If ``within`` is omitted (the default), all nodes flagged as samples\n  in the node table are used.\n\n#### IBD between sample sets\n\nWe can also compute IBD **between** sample sets:\n\n```{code-cell}\nsegments = ts.ibd_segments(between=[[0,1], [2]], store_pairs=True)\nprint(list(segments.keys()))\n```\n\nIn this example we have two sample sets, ``[0, 1]`` and ``[2]``, so the\nidentity segments are computed only for pairs in which one sample lies\nin the first set and the other lies in the second. More generally:\n\n- ``between`` should be a list of non-overlapping lists of node IDs.\n- All pairs ``(u, v)`` are considered such that ``u`` and ``v`` belong\n  to different sample sets.\n\nThe ``within`` and ``between`` arguments are mutually exclusive: passing\nboth at the same time raises a :class:`ValueError`.\n\n:::{seealso}\nSee the {meth}`.TreeSequence.ibd_segments` documentation for\nmore details.\n:::\n\n### Constraints on the segments\n\nThe ``max_time`` and ``min_span`` arguments allow us to constrain the\nsegments that we consider.\n\nThe ``max_time`` argument specifies an upper bound on the time of the\ncommon ancestor node: only IBD segments whose MRCA node has a time\nno greater than ``max_time`` are returned.\n\nThe ``min_span`` argument filters by genomic length: only segments with\nspan strictly greater than ``min_span`` are included.\n\nFor example, working with ``ts2`` as the following tree sequence:\n\n```{code-cell}\n:tags: [hide-input]\n\nimport io\n\nnodes = io.StringIO(\n    \"\"\"\\\n    id      is_sample   time\n    0       1           0\n    1       1           0\n    2       0           1\n    3       0           3\n    \"\"\"\n)\nedges = io.StringIO(\n    \"\"\"\\\n    left    right   parent  child\n    0      4     2       0,1\n    4     10     3       0,1\n    \"\"\"\n)\nts2 = tskit.load_text(nodes=nodes, edges=edges, strict=False)\nSVG(ts2.draw_svg())\n```\n\nThere are two segments:\n```{code-cell}\nsegments = ts2.ibd_segments(store_segments=True)\nprint(\"all segments:\", list(segments.values())[0])\n```\n... but only the left-hand one is more recent than 2 time units ago:\n```{code-cell}\nsegments_recent = ts2.ibd_segments(max_time=2, store_segments=True)\nprint(\"max_time=1.2:\", list(segments_recent.values())[0])\n```\n... and only the right-hand one is longer than 5 units.\n```{code-cell}\n\nsegments_long = ts2.ibd_segments(min_span=5, store_segments=True)\nprint(\"min_span=0.5:\", list(segments_long.values())[0])\n```\n\nSo: the full result contains two IBD segments for the single sample\npair, one inherited via ancestor 2 over ``[0, 4)`` and one via\nancestor 3 over ``[4, 10)``. The ``max_time`` constraint removes the\nsegment inherited from the older ancestor (time 3), while the\n``min_span`` constraint keeps only the longer of the two segments.\n\n### More on the \"pathwise\" definition of IBD segments\n\nWe said above that the definition of IBD used by\n{meth}`.TreeSequence.ibd_segments` says that a given segment\nmust be inherited from the MRCA along a single genealogical path,\nand that \"genealogical paths\" are defined *edgewise*.\nThis can lead to surprising consequences.\n\nReturning to our example above:\n```{code-cell}\n:tags: [hide-input]\n\nSVG(ts.draw_svg())\n```\nthere are two IBD segments between ``1`` and ``2``:\n```{code-cell}\nsegments = ts.ibd_segments(within=[1, 2], store_pairs=True)\nfor pair, value in segments.items():\n    print(pair, \"::\", value)\n```\nThis might be surprising, because the MRCA of ``1`` and ``2``\nis node ``4`` over the entire sequence.\nIn fact, some definitions of IBD segments\nwould have this as a single segment,\nbecause the MRCA does not change,\neven if there are distinct genealogical paths.\n\nThe reason this is split into two segments\nis because the path from ``4`` to ``2`` changes:\non the left-hand segment ``[0, 2)``, the node ``2``\ninherits from node ``4``\nvia node ``3``, while on the right-hand segment ``[2, 10)``\nit inherits from node ``4`` directly.\nThe tree sequence doesn't say directly whether node ``2``\nalso inherits from node ``3`` on the right-hand segment,\nso whether or not this should be one IBD segment or two\ndepends on our interpretation\nof what's stored in the tree sequence.\nAs discussed in \n[Fritze et al](https://doi.org/10.1093/genetics/iyaf198),\nmost tree sequence simulators (at time of writing)\nwill produce this tree sequence even if node ``2``\ndoes in fact inherit from ``3`` over the entire sequence.\nUsing {meth}`.TreeSequence.extend_haplotypes` will\n\"put the unary nodes back\":\n```{code-cell}\nets = ts.extend_haplotypes()\nSVG(ets.draw_svg())\n```\nand once this is done, there is only a single IBD segment:\n```{code-cell}\nsegments = ets.ibd_segments(within=[1, 2], store_pairs=True)\nfor pair, value in segments.items():\n    print(pair, \"::\", value)\n```\nSo, extending haplotypes may produce IBD segments\nmore in line with theory, if the desired definition if IBD\nis the \"pathwise\" definition.\nHowever, this will also probably introduce erroneous\nportions of IBD segments,\nso caution is needed.\nAnother approach would be to merge adjacent segments of IBD\nthat have the same MRCA.\n\nSummarizing this section --\nthere is a confusing array of possible definitions\nof what it means to be \"an IBD segment\";\nand these may be extracted from a tree sequence\nin subtly different ways.\nHow much of a problem is this?\nThe answer depends on the precise situation,\nbut it seems likely that in practice,\ndifferences due to definition are small\nrelative to errors due to tree sequence inference.\nIndeed, empirical haplotype-matching methods\nfor identifying IBD segments can differ substantially\ndepending on the values of various hyperparameters.\nMore work is needed to develop a complete picture.\n"
  },
  {
    "path": "docs/installation.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_installation)=\n\n\n# Installation\n\nThere are two basic options for installing `tskit`: either through\npre-built binary packages using {ref}`sec_installation_conda` or\nby compiling locally using {ref}`sec_installation_pip`. We recommend using `conda`\nfor most users, although `pip` can be more convenient in certain cases.\nTskit is installed to provide succinct tree sequence functionality\nto other software (such as [msprime](https://github.com/tskit-dev/msprime)),\nso it may already be installed if you use such software.\n\n(sec_installation_requirements)=\n\n\n## Requirements\n\nTskit requires Python 3.8+. There are no external C library dependencies. Python\ndependencies are installed automatically by `pip` or `conda`.\n\n(sec_installation_conda)=\n\n\n## Conda\n\nPre-built binary packages for `tskit` are available through\n[conda](https://conda.io/docs/), and built using [conda-forge](https://conda-forge.org/).\nPackages for recent version of Python are available for Linux, OSX and Windows. Install\nusing:\n\n```bash\nconda install -c conda-forge tskit\n```\n\n### Quick Start\n\n1. Install `conda` using [miniconda ](https://conda.io/miniconda.html).\n   Make sure you follow the instructions to fully activate your `conda`\n   installation!\n2. Set up the [conda-forge channel ](https://conda-forge.org/) using\n   `conda config --add channels conda-forge`.\n3. Install tskit: `conda install tskit`.\n4. Try it out: `tskit --version`.\n\n\nThere are several different ways to obtain `conda`. Please see the\n[anaconda installation documentation](https://docs.anaconda.com/anaconda/install/)\nfor full details.\n\n(sec_installation_pip)=\n\n\n## Pip\n\nInstalling using `pip` is somewhat more flexible than `conda` and\nmay result in code that is (slightly) faster on your specific hardware.\n`Pip` is the recommended method when using the system provided Python\ninstallations. Installation is straightforward:\n\n```bash\npython3 -m pip install tskit\n```\n\n(sec_installation_development_versions)=\n\n\n## Development versions\n\nFor general use, we do not recommend installing development versions.\nOccasionally pre-release versions are made available, which can be\ninstalled using `python3 -m pip install --pre tskit`. If you really need to install a\nbleeding-edge version, see {ref}`sec_development_installing`.\n"
  },
  {
    "path": "docs/introduction.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_introduction)=\n\n# Introduction\n\nThis is the documentation for `tskit`, the tree sequence toolkit.\nSuccinct tree sequences are an efficient way of representing the\ngenetic history - often technically referred to as an Ancestral\nRecombination Graph or ARG - of a set of DNA sequences.\n\nThe tree sequence format is output by a number of external software libraries\nand programs (such as [msprime](https://tskit.dev/msprime/docs), \n[SLiM](https://github.com/MesserLab/SLiM), \n[fwdpp](https://fwdpp.readthedocs.io/en/), and \n[tsinfer](https://tskit.dev/tsinfer/docs/)) that either simulate or\ninfer the evolutionary ancestry of genetic sequences. This library provides the\nunderlying functionality that such software uses to load, examine, and\nmanipulate ARGs in tree sequence format, including efficient access to the\ncorrelated sequence of trees along a genome and general methods to calculate\n{ref}`genetic statistics<sec_stats>`.\n\nFor a gentle introduction, you might like to read \"{ref}`tutorials:sec_what_is`\"\non our {ref}`tutorials site<tutorials:sec_intro>`. There you can also find further\ntutorial material to introduce you to key `tskit` concepts.\n\n:::{important}\nIf you use `tskit` in your work, please remember to cite it appropriately: see the {ref}`citations<sec_citation>` page for details.\n:::\n\n:::{note}\nThis documentation is under active development and may be incomplete\nin some areas. If you would like to help improve it, please open an issue or\npull request on [GitHub](https://github.com/tskit-dev/tskit).\n:::\n"
  },
  {
    "path": "docs/metadata.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_metadata)=\n\n# Metadata\n\nThe tree-sequence and all the entities within it (nodes, mutations, edges,  etc.) can\nhave metadata associated with them. This is intended for storing and passing on\ninformation that tskit itself does not use or interpret, for example information derived\nfrom a VCF INFO field, or administrative information (such as unique identifiers)\nrelating to samples and populations. Note that provenance information about how a tree\nsequence was created should not be stored in metadata, instead the provenance mechanisms\nin tskit should be used (see {ref}`sec_provenance`).\n\nThe metadata for each entity (e.g. row in a table) is described by a schema for each\nentity type (e.g. table). The schemas allow the tskit Python API to encode and decode\nmetadata automatically and, most importantly, tells downstream users and tools how to\ndecode and interpret the metadata. For example, the `msprime` schema for populations\nrequires both a `name` and a `description` for each defined population: these names and\ndescriptions can assist downstream users in understanding and using `msprime` tree\nsequences. It is best practice to populate such metadata fields if your files will be\nused by any third party, or if you wish to remember what the rows refer to some time\nafter making the file!\n\nTechnically, schemas describe what information is stored in each metadata record, and\nhow it is to be encoded, plus some optional rules about the types and ranges of data\nthat can be stored. Every node's metadata follows the node schema, every mutation's\nmetadata the mutation schema, and so on. Most users of tree-sequence files will not\nneed to modify the schemas: typically, as in the example of `msprime` above, schemas are\ndefined by the software which created the tree-sequence file. The exact metadata stored\ndepends on the use case; it is also possible for subsequent processes to add or modify\nthe schemas, if they wish to add to or modify the types (or encoding) of the metadata.\n\nThe metadata schemas are in the form of a\n[JSON Schema](http://json-schema.org/) (a good guide to JSON Schema is at\n[Understanding JSON Schema](https://json-schema.org/understanding-json-schema/)). The\nschema must specify an object with properties,\nthe keys and types of those properties are specified along with optional\nlong-form names, descriptions and validations such as min/max or regex matching for\nstrings, see the {ref}`sec_metadata_schema_examples` below.\n\nAs a convenience the simplest, permissive JSON schema is available as\n{meth}`MetadataSchema.permissive_json()`.\n\nThe {ref}`sec_tutorial_metadata` Tutorial shows how to use schemas and access metadata\nin the tskit Python API.\n\nNote that the C API simply provides byte-array binary access to the metadata,\nleaving the encoding and decoding to the user. The same can be achieved with the Python\nAPI, see {ref}`sec_tutorial_metadata_binary`.\n\n\n(sec_metadata_examples)=\n\n## Examples\n\nIn this section we give some examples of how to define metadata\nschemas and how to add metadata to various parts of a tree sequence\nusing the Python API. For simplicity, these initial examples use the JSON codec\n(see {ref}`sec_metadata_codecs`).\n\n(sec_metadata_examples_top_level)=\n\n### Top level\n\nTop level metadata is associated with the tree sequence as a whole, rather than\nany specific table. This is used, for example, by programs such as\n[SLiM](https://github.com/MesserLab/SLiM) to store information about the sort of\nmodel that was used to generate the tree sequence (but note that detailed information\nused to recreate the tree sequence is better stored in {ref}`sec_provenance`).\n\nHere's an example of adding your own top-level metadata to a tree sequence:\n\n```{code-cell}\nimport tskit\n# Define some top-level metadata you might want to add to a tree sequence\ntop_level_metadata = {\n    \"taxonomy\": {\"species\": \"Arabidopsis lyrata\", \"subspecies\": \"petraea\"},\n    \"generation_time\": 2,\n}\n\n# Generate a simple tree sequence of one random tree.\nts = tskit.Tree.generate_random_binary(8, branch_length=10, random_seed=9).tree_sequence\n\n# To edit a tree sequence, first dump it to tables.\ntables = ts.dump_tables()\n\n# Set the metadata schema for the top-level metadata\ntables.metadata_schema = tskit.MetadataSchema.permissive_json()  # simplest schema\n# Set the metadata itself\ntables.metadata = top_level_metadata\n\nts = tables.tree_sequence()\nprint(\n    \"The tree sequence is of\",\n    ts.metadata[\"taxonomy\"][\"species\"],\n    \"subsp.\",\n    ts.metadata[\"taxonomy\"][\"subspecies\"],\n)\n```\n\nIn this case, the species and subspecies name are self-explanatory, but\nthe interpretation of the `generation_time` field is less clear. Setting\na more precise schema will help other users of your tree sequence:\n\n```{code-cell}\nschema = {\n    \"codec\": \"json\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"generation_time\": {\"type\": \"number\", \"description\": \"Generation time in years\"},\n    },\n    \"additionalProperties\": True,  # optional: True by default anyway\n}\ntables.metadata_schema = tskit.MetadataSchema(schema)\ntables.metadata = top_level_metadata  # put the metadata back in\nts = tables.tree_sequence()\nprint(ts.metadata)\n```\n\nNote that the schema here only describes the `generation_time` field. The\nmetadata also contains additional fields (such as the species) that are\nnot in the schema; this is allowed because `additionalProperties` is `True`\n(assumed by default in the {ref}`sec_metadata_codecs_json` codec, but shown\nabove for clarity). \n\nExplicitly specified fields are *validated* on input, helping to avoid errors.\nFor example, setting the generation time to a string will now raise an error:\n\n```{code-cell}\n:tags: [\"raises-exception\", \"output_scroll\"]\ntables.metadata = {\"generation_time\": \"two of your earth years\"}\n```\n\n:::{note}\nAlthough we have stored the generation time in metadata, the\ntime *units* of a tree sequence should be stored in the \n{attr}`~TreeSequence.time_units` attribute, not in\nmetadata. For example, we could set `tables.time_units = \"generations\"`.\n:::\n\n(sec_metadata_examples_reference_sequence)=\n\n### Reference sequence\n\nOften a genome will be associated with a\nreference sequence for that species. In this case, we might want to\nstore not just the species name, but also e.g. the build version of\nthe reference sequence, and possibly the reference sequence itself.\nThere is built-in support for this in tskit, via the\n{attr}`~tskit.ReferenceSequence.metadata` and\n{attr}`~tskit.ReferenceSequence.metadata_schema` properties\nof the {attr}`TreeSequence.reference_sequence` attribute\n(see the {ref}`sec_data_model_reference_sequence` documentation).\n\n:::{todo}\nAdd examples of reference sequence metadata when the API becomes less\npreliminary. This should\ninclude an example where we declare (or better, use on we define\nin the library) a standard metadata schema for a species, which\ndefines and documents accession numbers, genome builds, etc. e.g.\n\n```python\ntables.reference_sequence.metadata_schema = standard_schema\ntables.reference_sequence.metadata = {...}\nts = tables.tree_sequence()\n```\n:::\n\n(sec_metadata_examples_tables)=\n\n### Tables\n\nEach table in a tree sequence (apart from the provenance table)\ncan have its own metadata, and associated metadata schema.\n\n```{code-cell}\ntables.individuals.metadata_schema = tskit.MetadataSchema.permissive_json()\ntables.individuals.add_row(metadata={\"Accession ID\": \"ABC123\"})\nts = tables.tree_sequence()\nprint(\",\\n           \".join(str(ts.individual(0)).split(\", \")))\n\n```\n\nHowever, we might want something more descriptive than the default\n{meth}`~MetadataSchema.permissive_json()`. schema. We could create\na new schema, or modify the existing one. Modification is useful\nif a nontrivial schema has been set already, for example in the\n{ref}`case of populations <msprime:sec_demography_populations_metadata>`\nwhen the tree sequence has been generated by\n{func}`msprime:msprime.sim_ancestry`.\n\n```{code-cell}\n# Modify an existing schema\nschema_as_python_dict = tables.individuals.metadata_schema.schema\nif \"properties\" not in schema_as_python_dict:\n    schema_as_python_dict[\"properties\"] = {}\nschema_as_python_dict[\"properties\"][\"Accession ID\"] = {\n    \"type\": \"string\", \"description\": \"An accession ID for this individual\"}\n\n# Optional: require an accession id to be specified for all individuals\nif \"required\" not in schema_as_python_dict:\n    schema_as_python_dict[\"required\"] = []\nschema_as_python_dict[\"required\"].append(\"Accession ID\")\n\n# Set the schema back on the table\ntables.individuals.metadata_schema = tskit.MetadataSchema(schema_as_python_dict)\n\n# Put all the metadata back in, using validate_and_encode_row to validate it\ntables.individuals.packset_metadata([\n    tables.individuals.metadata_schema.validate_and_encode_row(ind.metadata)\n    for ind in tables.individuals\n])\nprint(\"New schema:\", tables.individuals.metadata_schema)\n```\n\n\n### Defaults\n\nSince we specified that the `accession_id` property was required in the\nexample above, the user *always* has to provide it, otherwise it will\nfail to validate:\n\n```{code-cell}\n:tags: [\"raises-exception\", \"output_scroll\"]\ntables.individuals.add_row(metadata={\"Comment\": \"This has no accession ID\"})\n```\n\nHowever, rather than require a user-specified value, we can provide a\ndefault, which will be returned if the field is absent. In this case the property\nshould not be marked as `required`.\n\n```{code-cell}\nnew_schema = {\n    \"codec\": \"json\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"Accession ID\": {\n            \"type\": \"string\",\n            \"description\": \"An accession ID for this individual\",\n            \"default\": \"N/A\",  # Default if this property is absent\n        },\n    },\n    \"default\": {\"Accession ID\": \"N/A\"},  # Default if no metadata in this row\n}\ntables.individuals.metadata_schema = tskit.MetadataSchema(new_schema)\ntables.individuals.packset_metadata([\n    tables.individuals.metadata_schema.validate_and_encode_row(ind.metadata)\n    for ind in tables.individuals\n])\ntables.individuals.add_row(metadata={\"Comment\": \"This has no accession ID\"})\nts = tables.tree_sequence()\n\nprint(\"Newly added individual:\")\nprint(\",\\n           \".join(str(ts.individual(-1)).split(\", \")))\n```\n\n:::{note}\nIn the {ref}`sec_metadata_codecs_json` codec, defaults can only\nbe set for the shallowest level of the metadata object.\n:::\n\n(sec_metadata_codecs)=\n\n## Codecs\n\nThe underlying metadata is in raw binary (see\n{ref}`data model <sec_metadata_definition>`) and so it\nmust be encoded and decoded. The C API does not do this, but the Python API will\nuse the schema to decode the metadata to Python objects.\nThe encoding for doing this is specified in the top-level schema property `codec`.\nCurrently the Python API supports the `json` codec which encodes metadata as\n[JSON](https://www.json.org/json-en.html), and the `struct` codec which encodes\nmetadata in an efficient schema-defined binary format using {func}`python:struct.pack` .\n\n(sec_metadata_codecs_json)=\n\n### `json`\n\nWhen `json` is specified as the `codec` in the schema the metadata is encoded in\nthe human readable [JSON](https://www.json.org/json-en.html) format. As this format\nis human readable and encodes numbers as text it uses more bytes than the `struct`\nformat. However it is simpler to configure as it doesn't require any format specifier\nfor each type in the schema. Tskit deviates from standard JSON in that\nempty metadata is interpreted as an empty object. This is to allow setting of a schema\nto a table with out the need to modify all existing empty rows.\n\n(sec_metadata_codecs_struct)=\n\n### `struct`\n\nWhen `struct` is specifed as the `codec` in the schema the metadata is encoded\nusing {func}`python:struct.pack` which results in a compact binary representation which\nis much smaller and generally faster to encode/decode than JSON.\n\nThis codec places extra restrictions on the schema:\n\n1. Each property must have a `binaryFormat`\n    This sets the binary encoding used for the property.\n\n2. All metadata objects must have fixed properties.\n    This means that additional properties not listed in the schema are disallowed. Any\n    property that does not have a `default` specified in the schema must be present.\n    Default values will be encoded.\n\n3. Arrays must be lists of homogeneous objects.\n    For example, this is not valid:\n    ```\n    {\"type\": \"array\", \"items\": [{\"type\": \"number\"}, {\"type\": \"string\"}]}\n    ```\n\n4. Types must be singular and not unions.\n    For example, this is not valid:\n    ```\n    {\"type\": [\"number\", \"string\"]}\n    ```\n    One exception is that the top-level can be a union of `object` and `null` to\n    support the case where some rows do not have metadata.\n\n5. The order that properties are encoded is by default alphabetically by name.\n    The order can be overridden by setting an optional numerical `index` on each\n    property. This is due to objects being unordered in JSON and Python `dicts`.\n\n\n#### binaryFormat\n\nTo determine the binary encoding of each property in the metadata the `binaryFormat` key is used.\nThis describes the encoding for each property using `struct`\n[format characters](https://docs.python.org/3/library/struct.html#format-characters).\nFor example an unsigned 8-byte integer can be specified with::\n\n```\n{\"type\": \"number\", \"binaryFormat\":\"Q\"}\n```\n\nAnd a length 10 string with::\n\n```\n{\"type\": \"string\", \"binaryFormat\":\"10p\"}\n```\n\nSome of the text below is copied from\n[the python docs](https://docs.python.org/3/library/struct.html).\n\n\n##### Numeric and boolean types\n\nThe supported numeric and boolean types are:\n\n\n```{list-table}\n:header-rows: 1\n* - Format\n  - C Type\n  - Python type\n  - Numpy type\n  - Size in bytes\n* - `?`\n  - *_Bool*\n  - bool\n  - bool\n  - 1\n* - `b`\n  - *signed char*\n  - integer\n  - int8\n  - 1\n* - `B`\n  - *unsigned char*\n  - integer\n  - uint8\n  - 1\n* - `h`\n  - *short*\n  - integer\n  - int16\n  - 2\n* - `H`\n  - *unsigned short*\n  - integer\n  - uint16\n  - 2\n* - `i`\n  - *int*\n  - integer\n  - int32\n  - 4\n* - `I`\n  - *unsigned int*\n  - integer\n  - uint32\n  - 4\n* - `l`\n  - `long`\n  - integer\n  - int32\n  - 4\n* - `L`\n  - *unsigned long*\n  - integer\n  - uint32\n  - 4\n* - `q`\n  - `long long`\n  - integer\n  - int64\n  - 8\n* - `Q`\n  - *unsigned long long*\n  - integer\n  - uint64\n  - 8\n* - `f`\n  - *float*\n  - float\n  - float32\n  - 4\n* - `d`\n  - *double*\n  - float\n  - float64\n  - 8\n```\n\nWhen attempting to pack a non-integer using any of the integer conversion\ncodes, if the non-integer has a `__index__` method then that method is\ncalled to convert the argument to an integer before packing.\n\nFor the `'f'` and `'d'` conversion codes, the packed\nrepresentation uses the IEEE 754 binary32 or binary64 format (for\n`'f'` or `'d'` respectively), regardless of the floating-point\nformat used by the platform.\n\nNote that endian-ness cannot be specified and is fixed at little endian.\n\nWhen encoding a value using one of the integer formats (`'b'`,\n`'B'`, `'h'`, `'H'`, `'i'`, `'I'`, `'l'`, `'L'`,\n`'q'`, `'Q'`), if the value is outside the valid range for that format\nthen {exc}`struct.error` is raised.\n\nFor the `'?'` format character, the decoded value will be either `True` or\n`False`. When encoding, the truth value of the input is used.\n\n\n##### Strings\n\n```{list-table}\n:header-rows: 1\n* - Format\n  - C Type\n  - Python type\n  - Size in bytes\n* - `x`\n  - pad byte\n  - no value\n  - as specified\n* - `c`\n  - *char*\n  - string of length 1\n  - 1\n* - `s`\n  - *char[]*\n  - string\n  - as specified\n* - `p`\n  - *char[]*\n  - string\n  - as specified\n```\n\nFor the `'s'` format character, the number prefixed is interpreted as the length in\nbytes, for example,\n`'10s'` means a single 10-byte string. For packing, the string is\ntruncated or padded with null bytes as appropriate to make it fit. For\nunpacking, the resulting bytes object always has exactly the specified number\nof bytes, unless `nullTerminated` is `true`, in which case it ends at the first\n`null`. As a special case, `'0s'` means a single, empty string.\n\nThe `'p'` format character encodes a \"Pascal string\", meaning a short\nvariable-length string stored in a fixed number of bytes, given by the count.\nThe first byte stored is the length of the string, or 255, whichever is\nsmaller.  The bytes of the string follow.  If the string to encode is too long\n(longer than the count minus 1), only the leading\n`count-1` bytes of the string are stored.  If the string is shorter than\n`count-1`, it is padded with null bytes so that exactly count bytes in all\nare used.  Note that strings specified with this format cannot be longer than 255.\n\nStrings that are longer than the specified length will be silently truncated,\nnote that the length is in bytes, not characters.\n\nThe string encoding can be set with `stringEncoding` which defaults to `utf-8`.\nA list of possible encodings is\n[here](https://docs.python.org/3.7/library/codecs.html#standard-encodings).\n\nFor most cases, where there are no `null` characters in the metadata\n`{\"type\":\"string\", \"binaryFormat\": \"1024s\", \"nullTerminated\": True}` is a good option\nwith the size set to that appropriate for the strings to be encoded.\n\n\n##### Padding bytes\n\nUnused padding bytes (for compatibility) can be added with a schema entry like:\n\n```\n{\"type\": \"null\", \"binaryFormat\":\"5x\"} # 5 padding bytes\n```\n\n##### Arrays\n\nThe codec stores the length of the array before the array data. The format used for the\nlength of the array can be chosen with `arrayLengthFormat` which must be one\nof `B`, `H`, `I`, `L` or `Q` which have the same meaning as in the numeric\ntypes above. `L` is the default. As an example:\n\n```\n{\"type\": \"array\", \"items\": {\"type\":\"number\", \"binaryFormat\":\"h\"}, \"arrayLengthFormat\":\"B\"}\n```\n\nWill result in an array of 2 byte integers, prepended by a single-byte array-length.\n\nFor arrays with a known fixed size, you can specify the `length` property instead:\n```\n{\"type\": \"array\", \"length\": 3, \"items\": {\"type\":\"number\", \"binaryFormat\":\"i\"}}\n```\nThis creates a fixed-length array of exactly 3 integers, without storing the array length in the encoded data. \nFixed-length arrays are more space-efficient since they don't need to store the length prefix.\n\nWhen using fixed-length arrays:\n1. The `arrayLengthFormat` property should not be specified\n2. Arrays provided for encoding must match the specified length exactly\n\nFor dealing with legacy encodings that do not store the\nlength of the array, setting `noLengthEncodingExhaustBuffer` to `true` will read\nelements of the array until the metadata buffer is exhausted. As such an array\nwith this option must be the last type in the encoded struct.\n\n\n##### Union typed metadata\n\nAs a special case under the `struct` codec, the top-level type of metadata can be a\nunion of `object` and `null`. Set `\"type\": [\"object\", \"null\"]`. Properties should\nbe defined as normal, and will be ignored if the metadata is `None`.\n\n(sec_metadata_codecs_jsonstruct)=\n\n### `json+struct`\n\nAn additional codec provides the ability to store *both* JSON and binary-encoded data.\nThis is provided for the case where we want to store some arbitrary metadata\n(as JSON) along with a relatively large amount of data (as binary, for efficiency).\nFor instance, we might want to record a raster map of the sampled area\nalong with a few pieces of generic information (e.g., the name of the area).\n\nThe metadata schema for \"json+struct\" metadata basically just specifies both\na JSON metadata schema and a struct metadata schema.\nEach entry in the metadata is encoded with either the JSON or the struct codec.\n\nSpecifically, the schema must contain:\n\n1. a `\"json\"` entry that is a valid JSON metadata schema (except it does\n    not need to specify the codec), and\n2. a `\"struct\"` entry that is a valid struct metadata schema (except it also does\n    not need to specify the codec).\n\nFurthermore, these two sub-schemas must both define objects,\nand must not both define the same property:\nin other words, the  names of the properties in these must not overlap.\n\nWhen you use this codec, the decoded metadata in python is just a dictionary,\nas usual with either the JSON or struct codecs. There is no separation\nbetween the binary-encoded and JSON-encoded entries in metadata;\nthis happens purely under the hood when encoding (and decoding) the metadata.\nSee the example below.\n\n#### Binary representation\n\nThe underlying structure of the JSON+struct codec is as follows.\n(If you're just working with metadata in python via the tskit interface,\nyou don't need to worry about this; this is important if you need to write\nmetadata in C, for instance.)\n(1) four magic bytes, the ASCII characters `J`, `B`, `L`, and `B`;\n(2) a one-byte (`uint8_t`) version number (currently, `1`);\n(3) a 64-bit (`uint64_t`) length in bytes for the JSON data;\n(4) a 64-bit length in bytes for the binary (struct) data, also in little-endian format;\n(5) the JSON data itself;\n(6) zero-ed \"padding\" bytes to bring the start of the binary section\ninto 8-byte alignment; and\n(7) the binary data.\nThe JSON data is encoded as ASCII, without a null terminating byte,\nand the format of the binary data is specified using the \"struct\" portion\nof the metadata schema, described :ref:`above <sec_metadata_codecs_struct>`.\n\n(sec_metadata_schema_examples)=\n\n## Schema examples\n\n### JSON codec\n\nThe JSON codec requires very little: for instance,\n``tskit.MetadataSchema.permissive_json()`` simply returns the schema\n``{\"codec\":\"json\"}``.\nUsing this schema allows you to include arbitrary data in an entry.\n\nHere is a more structured schema:\n\n```{code-cell}\nschema = {\n    \"codec\": \"json\",\n    \"title\": \"Example Metadata\",\n    \"type\": \"object\",\n    \"properties\": {\"name\": {\"type\": \"string\"}, \"size\": {\"type\": \"number\"}},\n    \"required\": [\"name\", \"size\"],\n    \"additionalProperties\": False,\n}\nms = tskit.MetadataSchema(schema)\nencoded = ms.validate_and_encode_row({\n    \"name\": \"abc\", \"size\": 123\n})\n```\n\nThis schema has two properties: \"name\" and \"size\";\n\"name\" is a string and \"size\" is a number;\nboth are required, and no additional properties are allowed.\n\n### Struct codec\n\nAs an example here is a schema using the `struct` codec which could apply, for example,\nto the individuals in a tree sequence:\n\n```python\ncomplex_struct_schema = {\n    \"codec\": \"struct\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"accession_number\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n        \"collection_date\": {\n            \"description\": \"Date of sample collection in ISO format\",\n            \"type\": \"string\",\n            \"binaryFormat\": \"10p\",\n            \"pattern\": \"^([1-9][0-9]{3})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])?$\",\n        },\n        \"phenotype\": {\n            \"description\": \"Phenotypic measurements on this individual\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"height\": {\n                    \"description\": \"Height in metres, or NaN if unknown\",\n                    \"type\": \"number\",\n                    \"binaryFormat\": \"f\",\n                    \"default\": float(\"NaN\"),\n                },\n                \"age\": {\n                    \"description\": \"Age in years at time of sampling, or -1 if unknown\",\n                    \"type\": \"number\",\n                    \"binaryFormat\": \"h\",\n                    \"default\": -1,\n                },\n            },\n            \"default\": {},\n        },\n    },\n    \"required\": [\"accession_number\", \"collection_date\"],\n    \"additionalProperties\": False,\n}\n\n# Demonstrate use\ntables.individuals.clear()\ntables.individuals.metadata_schema = tskit.MetadataSchema(complex_struct_schema)\ntables.individuals.add_row(\n    metadata={\"accession_number\": 123, \"collection_date\": \"2011-02-11\"}\n)\nts = tables.tree_sequence()\nprint(ts.individual(0).metadata)\n```\n\nThis schema states that the metadata for each row of the table is an object consisting\nof three properties. Property `accession_number` is a number (stored as a 4-byte int).\nProperty `collection_date` is a string which must satisfy a regex, which checks it is\na valid [ISO8601](https://www.iso.org/iso-8601-date-and-time-format.html) date. Property\n`phenotype` is itself an object consisting of the properties `height` (a single precision\nfloating point number) and age (a 2 byte signed integer).\nBecause this is a struct codec, and neither of the the first two properties have a\ndefault set, they must be marked as \"required\" (in the JSON codec if no default is given,\nunspecified properties will simply be missing in the returned metadata dictionary).\nAlso because this is a struct codec, `additionalProperties` must be set to False. This\nis assumed by default in the struct codec, but has been shown above for clarity.\n\n### JSON+Struct codec\n\nA schema using the `\"json+struct\"` codec simply needs to specify\nthe JSON part and the struct part, and be sure the two do not share any keys.\nHere is a simple example:\n\n```{code-cell}\nschema = {\n    \"codec\": \"json+struct\",\n    \"json\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"label\": {\"type\": \"string\"},\n            \"id\": {\"type\": \"number\"},\n        },\n        \"required\": [\"label\"],\n    },\n    \"struct\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"values\": {\n                \"type\": \"array\",\n                \"arrayLengthFormat\": \"B\",\n                \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n            }, \n        },\n    },\n}\nms = tskit.MetadataSchema(schema)\nrow = {\"label\": \"alpha\", \"id\": 7, \"values\": [5, 10, 2, 12]}\nencoded = ms.validate_and_encode_row(row)\nprint(\"Encoded:\", encoded)\nprint(\"Decoded:\", ms.decode_row(encoded))\n```\n\nThis encodes two things in JSON: a label and an ID number,\nand the uses the ``struct`` codec to encode an array of integers in binary.\n\n\n(sec_metadata_api_overview)=\n\n## Python Metadata API Overview\n\nSchemas are represented in the Python API by the {class}`tskit.MetadataSchema`\nclass which can be assigned to, and retrieved from, tables via their `metadata_schema`\nattribute (e.g. {attr}`tskit.IndividualTable.metadata_schema`). The schemas\nfor all tables can be retrieved from a {class}`tskit.TreeSequence` by the\n{attr}`tskit.TreeSequence.table_metadata_schemas` attribute.\n\nThe top-level tree sequence metadata schema is set via\n{attr}`tskit.TableCollection.metadata_schema` and can be accessed via\n{attr}`tskit.TreeSequence.metadata_schema`.\n\nEach table's `add_row` method (e.g. {meth}`tskit.IndividualTable.add_row`) will\nvalidate and encode the metadata using the schema. This encoding will also happen when\ntree sequence metadata is set (e.g. `table_collection.metadata = {...}`.\n\nMetadata will be lazily decoded if accessed via\n`tables.individuals[0].metadata`.  `tree_sequence.individual(0).metadata` or\n`tree_sequence.metadata`\n\nIn the interests of efficiency the bulk methods of `set_columns`\n(e.g. {meth}`tskit.IndividualTable.set_columns`)\nand `append_columns` (e.g. {meth}`tskit.IndividualTable.append_columns`) do not\nvalidate or encode metadata. See {ref}`sec_tutorial_metadata_bulk` for how to prepare\nmetadata for these methods.\n\nMetadata processing can be disabled and raw bytes stored/retrieved. See\n{ref}`sec_tutorial_metadata_binary`.\n\n(sec_structured_array_metadata)=\n\n## Structured array metadata\n\nIf the `struct` codec is used for metadata then the metadata can be very efficiently\naccessed via a `numpy` [structured array](https://numpy.org/doc/stable/user/basics.rec.html) via the `ts.X_metadata` property, e.g. {attr}`TreeSequence.individuals_metadata`. The codec must meet the following requirements for this to work:\n\n1. The metadata for a given object must be of a fixed size. This means that\n   variable length arrays are not permitted, such that the `length` property\n   must be set for all arrays.\n\n2. Each object for a given table must be present, i.e. at the top-level\n   the metadata must be an `object` and not a union of `object` and `null`.\n\n3. Strings must use the `s` format character, as the `p` pascal string format\n   is not supported by `numpy`.\n\nAs an example, let's make a tree sequence with a large amount of metadata:\n\n```{code-cell} ipython3\nimport msprime\nimport tskit\nimport itertools\nimport time\n\nts = msprime.sim_ancestry(1000, recombination_rate=1, sequence_length=1000)\nts = msprime.sim_mutations(ts, rate=20)\ntables = ts.dump_tables()\nmuts = tables.mutations.copy()\ntables.mutations.clear()\nschema = tskit.MetadataSchema({\n             \"codec\": \"struct\",\n             \"type\": \"object\",\n             \"properties\": {\n                 \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                 \"name\": {\"type\": \"string\", \"binaryFormat\": \"15s\"},\n                 },\n             },\n)\ntables.mutations.metadata_schema = schema\nfor i, m in enumerate(muts):\n             tables.mutations.append(m.replace(metadata={\n                 \"id\":i,\n                 \"name\":f\"name_{i}\"\n             }))\nts = tables.tree_sequence()\nprint(f\"Tree sequence with {ts.num_mutations} mutations\")\n```\n\nAccessing the metadata row-by-row is slow:\n\n```{code-cell} ipython3\n%timeit [m.metadata for m in ts.mutations()]\n```\n\nBut accessing via the structured array is fast:\n\n```{code-cell} ipython3\n%timeit md = ts.mutations_metadata\n```\n\nArrays of a specific key are easily accessed by item.It is also trivial to create a pandas dataframe from the metadata:\n\n```{code-cell} ipython3\nprint(ts.mutations_metadata[\"id\"][:5])\n\nimport pandas as pd\ndf = pd.DataFrame(ts.mutations_metadata)\nprint(df.head())\n```\n\n\n(sec_metadata_schema_schema)=\n\n## Full metaschema\n\nThe schema for metadata schemas is formally defined using\n[JSON Schema](http://json-schema.org/) and given in full here. Any schema passed to\n{class}`tskit.MetadataSchema` is validated against this metaschema.\n\n```{eval-rst}\n.. literalinclude:: ../python/tskit/metadata_schema.schema.json\n    :language: json\n```\n"
  },
  {
    "path": "docs/numba.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit.jit.numba\n```\n\n(sec_numba)=\n\n# Numba Integration\n\nThe `tskit.jit.numba` module provides classes for working with tree sequences\nfrom [Numba](https://numba.pydata.org/) jit-compiled Python code. Such code can run\nup to hundreds of times faster than normal Python, yet avoids the difficulties of writing\nC or other low-level code.\n\n:::{note}\nNumba is not a direct dependency of tskit, so will not be available unless installed:\n\n```bash\npip install numba\n```\n\nor\n\n```bash\nconda install numba\n```\n:::\n\n## Overview\n\nThe numba integration provides a {class}`tskit.TreeSequence` wrapper class {class}`NumbaTreeSequence`.\nThis class can be used directly in `numba.njit` compiled functions, and provides several efficient\nmethods for tree traversal:\n\n- **{meth}`~NumbaTreeSequence.tree_index`**: For efficient iteration through the trees in the sequence\n- **{meth}`~NumbaTreeSequence.parent_index`**: For efficient access to parent edge information, to\ntraverse upwards through the ARG.\n- **{meth}`~NumbaTreeSequence.child_index`**: For efficient access to child edge information, to\ntraverse downwards through the ARG.\n\nThese methods are optimised to work within Numba's `@njit` decorated functions,\nallowing you to write high-performance tree sequence analysis code in a plain Python style.\n\n## Basic Usage\n\nThe ``tskit.jit.numba`` module is not imported with normal `tskit` so must be imported explicitly:\n```{code-cell} python\nimport numpy as np\nimport tskit\nimport tskit.jit.numba as tskit_numba\n```\n\nNormal third-party classes such as {class}`tskit.TreeSequence` can't be used in `numba.njit` compiled\nfunctions so the {class}`tskit.TreeSequence` must be wrapped in a {class}`NumbaTreeSequence` by \n{meth}`jitwrap`. This must be done outside `njit` code:\n\n```{code-cell} python\nimport msprime\n\nts = msprime.sim_ancestry(\n    samples=50000,\n    sequence_length=100000,\n    recombination_rate=0.1,\n    random_seed=42\n)\nnumba_ts = tskit_numba.jitwrap(ts)\nprint(type(numba_ts))\n```\n\n## Tree Iteration\n\nTree iteration can be performed in `numba.njit` compiled functions using the {class}`TreeIndex` class.\nThis class provides `next()` and `prev()` methods for forward and backward iteration through the trees in a tree sequence. Its `in_range` and `out_range` attributes provide the edges that must be added or removed to form the current\ntree from the previous tree, along with the current tree `interval` and its sites and mutations through `site_range` and `mutation_range`.\n\nA `TreeIndex` instance can be obtained from a {class}`NumbaTreeSequence` using the {meth}`~NumbaTreeSequence.tree_index` method. The initial state of this is of a \"null\" tree outside the range of the tree sequence, the first call to `next()` or `prev()` will be to the first, or last tree sequence tree respectively. After that, the `in_range` and `out_range` attributes will provide the edges that must be added or removed to form the current tree from the previous tree. For example\n`tree_index.in_range.order[in_range.start:in_range.stop]` will give the edge ids that are new in the current tree, and `tree_index.out_range.order[out_range.start:out_range.stop]` will give the edge ids that are no longer present in the current tree. `tree_index.site_range` and\n`tree_index.mutation_range` give the indexes into the tree sequences site and mutation arrays.\n\nAs a simple example we can calculate the number of edges in each tree in a tree sequence:\n\n```{code-cell} python\nimport numba\n\n@numba.njit\ndef edges_per_tree(numba_ts):\n    tree_index = numba_ts.tree_index()\n    current_num_edges = 0\n    num_edges = []\n    \n    # Move forward through the trees\n    while tree_index.next():\n        # Access current tree information\n        in_range = tree_index.in_range\n        out_range = tree_index.out_range\n        \n        current_num_edges -= (out_range.stop - out_range.start)\n        current_num_edges += (in_range.stop - in_range.start)\n        num_edges.append(current_num_edges)\n    return num_edges\n```\n\n```{code-cell} python\n:tags: [hide-cell]\n# Warm up the JIT compiler\nedges = edges_per_tree(numba_ts)\n```\n\n\n```{code-cell} python\nimport time\n\nt = time.time()\njit_num_edges = edges_per_tree(numba_ts)\nprint(f\"JIT Time taken: {time.time() - t:.4f} seconds\")\n```\n\nDoing the same thing with the normal `tskit` API would be much slower:\n\n```{code-cell} python\nt = time.time()\npython_num_edges = []\nfor tree in ts.trees():\n    python_num_edges.append(tree.num_edges)\nprint(f\"Normal Time taken: {time.time() - t:.4f} seconds\")\n\nassert jit_num_edges == python_num_edges, \"JIT and normal results do not match!\"\n```\n\n### Example - diversity calculation\n\nAs a more interesting example we can calculate genetic diversity (also known as pi).\nFor this example we'll be calculating based on the distance in the tree between samples.\n(`mode=\"branch\"` in the tskit API.)\n\nThis example also shows the style of Python code that gives best performance under `numba`\nJIT compilation - using simple loops and fixed-size arrays with minimal object attribute access.\n\n```{code-cell} python\n    @numba.njit\n    def diversity(numba_ts):\n        # Cache arrays to avoid repeated attribute access in\n        # tight loops\n        edge_child = numba_ts.edges_child\n        edge_parent = numba_ts.edges_parent\n        node_times = numba_ts.nodes_time\n        node_flags = numba_ts.nodes_flags\n        \n        if numba_ts.num_samples <= 1:\n            return 0.0\n\n        parent = np.full(numba_ts.num_nodes, -1, dtype=np.int32)\n        branch_length = np.zeros(numba_ts.num_nodes, dtype=np.float64)\n        state = np.zeros(numba_ts.num_nodes, dtype=np.int32)\n        summary = np.zeros(numba_ts.num_nodes, dtype=np.float64)\n\n        n = float(numba_ts.num_samples)\n        two_over_denom = 2.0 / (n * (n - 1.0))\n        sample_summary = 2.0 / n\n\n        # Retrieve this constant outside the loop\n        # to avoid repeated attribute access\n        NODE_IS_SAMPLE = tskit.NODE_IS_SAMPLE\n        # Find the sample nodes and initialize their states\n        for node in range(numba_ts.num_nodes):\n            if node_flags[node] & NODE_IS_SAMPLE:\n                state[node] = 1.0\n                summary[node] = sample_summary\n\n        result = 0.0\n        running_sum = 0.0\n        tree_index = numba_ts.tree_index()\n\n        # Now iterate through the trees\n        while tree_index.next():\n            # Process the outgoing edges\n            for j in range(tree_index.out_range.start, tree_index.out_range.stop):\n                h = tree_index.out_range.order[j]\n                child = edge_child[h]\n                child_parent = edge_parent[h]\n\n                running_sum -= branch_length[child] * summary[child]\n                parent[child] = -1\n                branch_length[child] = 0.0\n\n                u = child_parent\n                parent_u = parent[u]\n                while u != -1:\n                    running_sum -= branch_length[u] * summary[u]\n                    state[u] -= state[child]\n                    summary[u] = state[u] * (n - state[u]) * two_over_denom\n                    running_sum += branch_length[u] * summary[u]\n                    u = parent_u\n                    if u != -1:\n                        parent_u = parent[u]\n\n            # Process the incoming edges\n            for j in range(tree_index.in_range.start, tree_index.in_range.stop):\n                h = tree_index.in_range.order[j]\n                child = edge_child[h]\n                child_parent = edge_parent[h]\n\n                parent[child] = child_parent\n                branch_length[child] = node_times[child_parent] - node_times[child]\n                running_sum += branch_length[child] * summary[child]\n\n                u = child_parent\n                parent_u = parent[u]\n                while u != -1:\n                    running_sum -= branch_length[u] * summary[u]\n                    state[u] += state[child]\n                    summary[u] = state[u] * (n - state[u]) * two_over_denom\n                    running_sum += branch_length[u] * summary[u]\n                    u = parent_u\n                    if u != -1:\n                        parent_u = parent[u]\n\n            result += running_sum * (\n                tree_index.interval[1] - tree_index.interval[0]\n            )\n\n        return result / numba_ts.sequence_length\n```\n\n```{code-cell} python\n:tags: [hide-cell]\n# Warm up the JIT\nd = diversity(numba_ts)\n```\n\n```{code-cell} python\nt = time.time()\nd = diversity(numba_ts)\nprint(\"Diversity:\", d)\nprint(\"Time taken:\", time.time() - t)\n```\n\nAs this code is written for this specific diversity calculation it is even faster\nthan the tskit C implementation, called here from Python:\n\n```{code-cell} python\nt = time.time()\nd_tskit = ts.diversity(mode=\"branch\")\nprint(\"Diversity (tskit):\", d_tskit)\nprint(\"Time taken:\", time.time() - t)\n```\n\n## ARG Traversal\n\nBeyond iterating through trees, you may need to traverse the ARG vertically. The {meth}`~NumbaTreeSequence.child_index` and {meth}`~NumbaTreeSequence.parent_index` methods provide efficient access to parent-child relationships in the edge table within `numba.njit` functions.\n\nThe {meth}`~NumbaTreeSequence.child_index` method returns an array that allows you to efficiently find all edges where a given node is the parent. Since edges are already sorted by parent in the tskit data model, this is implemented using simple range indexing. For any node `u`, the returned array `child_index[u]` gives a tuple of the start and stop indices in the tskit edge table where node `u` is the parent. The index is calculated on each call to `child_index()` so should be called once.\n\nThe {meth}`~NumbaTreeSequence.parent_index` method creates a {class}`ParentIndex` that allows you to efficiently find all edges where a given node is the child. Since edges are not sorted by child in the edge table, the returned class contains a custom index that sorts edge IDs by child node (and then by left coordinate). For any node `u`, `parent_index.index_range[u]` gives a tuple of the start and stop indices in the `parent_index.edge_index` array, and `parent_index.edge_index[start:stop]` gives the actual tskit edge IDs.\n\nBoth can be obtained from a {class}`NumbaTreeSequence`:\n\n```{code-cell} python\n# Get the indexes\nchild_index = numba_ts.child_index()\nparent_index = numba_ts.parent_index()\n\n# Example: find all left coordinates of edges where node 5 is the parent\nstart, stop = child_index[5]\nleft_coords = numba_ts.edges_left[start:stop]\nprint(left_coords)\n\n# Example: find all right coordinates of edges where node 3 is the child\nstart, stop = parent_index.index_range[3]\nright_coords = numba_ts.edges_right[start:stop]\nprint(right_coords)\n```\n\nThese indexes enable efficient algorithms that need to traverse parent-child relationships in the ARG, such as computing descendant sets, ancestral paths, or subtree properties.\n\n### Example - descendant span calculation\n\nHere's an example of using the ARG traversal indexes to calculate the total sequence length over which each node descends from a specified node:\n\n```{code-cell} python\n@numba.njit\ndef descendant_span(numba_ts, u):\n    \"\"\"\n    Calculate the total sequence length over which each node \n    descends from the specified node u.\n    \"\"\"\n    child_index = numba_ts.child_index()\n    edges_left = numba_ts.edges_left\n    edges_right = numba_ts.edges_right\n    edges_child = numba_ts.edges_child\n    \n    total_descending = np.zeros(numba_ts.num_nodes)\n    stack = [(u, 0.0, numba_ts.sequence_length)]\n    \n    while len(stack) > 0:\n        node, left, right = stack.pop()\n        total_descending[node] += right - left\n        \n        # Find all child edges for this node\n        for e in range(child_index[node, 0], child_index[node, 1]):\n            e_left = edges_left[e]\n            e_right = edges_right[e]\n            \n            # Check if edge overlaps with current interval\n            if e_right > left and right > e_left:\n                inter_left = max(e_left, left)\n                inter_right = min(e_right, right)\n                e_child = edges_child[e]\n                stack.append((e_child, inter_left, inter_right))\n    \n    return total_descending\n```\n\n```{code-cell} python\n:tags: [hide-cell]\n# Warm up the JIT\nresult = descendant_span(numba_ts, 0)\n```\n\n```{code-cell} python\n# Calculate descendant span for the root node (highest numbered node)\nroot_node = numba_ts.num_nodes - 1\nresult = descendant_span(numba_ts, root_node)\n\n# Show nodes that have non-zero descendant span\nnon_zero = result > 0\nprint(f\"Nodes descended from {root_node}:\")\nprint(f\"Node IDs: {np.where(non_zero)[0]}\")\nprint(f\"Span lengths: {result[non_zero]}\")\n```\n\nComparing performance with using the tskit Python API:\n\n```{code-cell} python\ndef descendant_span_tskit(ts, u):\n    \"\"\"Reference implementation using tskit trees\"\"\"\n    total_descending = np.zeros(ts.num_nodes)\n    for tree in ts.trees():\n        descendants = tree.preorder(u)\n        total_descending[descendants] += tree.span\n    return total_descending\n\nimport time\nt = time.time()\nnumba_result = descendant_span(numba_ts, root_node)\nprint(f\"Numba time: {time.time() - t:.6f} seconds\")\n\nt = time.time()\ntskit_result = descendant_span_tskit(ts, root_node)\nprint(f\"tskit time: {time.time() - t:.6f} seconds\")\n\nnp.testing.assert_array_almost_equal(numba_result, tskit_result, decimal=10)\nprint(\"Results match!\")\n```\n\n### Example - ARG descendant and ancestral edges calculation\n\nAs we have `child_index` and `parent_index`, we can efficiently find both descendant and ancestral sub-ARGs\nfor a given node. This first example shows how to find all edges in the ARG that are descendants of a given node. It returns a boolean array indicating which edges are part of the sub-ARG rooted at the specified node:\n\n```{code-cell} python\n@numba.njit\ndef descendant_edges(numba_ts, u):\n    \"\"\"\n    Returns a boolean array which is only True for edges that are descendants of node u.\n    \"\"\"\n    edge_select = np.zeros(numba_ts.num_edges, dtype=np.bool_)\n    child_index = numba_ts.child_index()\n    edges_left = numba_ts.edges_left\n    edges_right = numba_ts.edges_right\n    edges_child = numba_ts.edges_child\n    \n    # The stack stores (node_id, left_coord, right_coord)\n    stack = [(u, 0.0, numba_ts.sequence_length)]\n    \n    while len(stack) > 0:\n        node, left, right = stack.pop()\n        \n        # Find all edges where 'node' is the parent\n        start, stop = child_index[node]\n        for e in range(start, stop):\n            e_left = edges_left[e]\n            e_right = edges_right[e]\n            \n            # Check for genomic interval overlap\n            if e_right > left and right > e_left:\n                # This edge is part of the sub-ARG\n                edge_select[e] = True\n                \n                # Calculate the intersection for the next traversal step\n                inter_left = max(e_left, left)\n                inter_right = min(e_right, right)\n                e_child = edges_child[e]\n                stack.append((e_child, inter_left, inter_right))\n                \n    return edge_select\n```\n\n```{code-cell} python\n# Find descendant edges for a high-numbered node (likely near root)\ntest_node = max(0, numba_ts.num_nodes - 5)\nedge_select = descendant_edges(numba_ts, test_node)\n\n# Show which edges are descendants\ndescendant_edge_ids = np.where(edge_select)[0]\nprint(f\"Edges descended from node {test_node}: {descendant_edge_ids[:10]}...\")\nprint(f\"Total descendant edges: {np.sum(edge_select)}\")\n```\n\n```{code-cell} python\n:tags: [hide-cell]\n# Create a simple hard-coded example for consistent visualization\ntables = tskit.TableCollection(sequence_length=10.0)\n\ntables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)  # node 0\ntables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)  # node 1\ntables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)  # node 2\ntables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)  # node 3\ntables.nodes.add_row(flags=0, time=1)  # node 4\ntables.nodes.add_row(flags=0, time=2)  # node 5\ntables.nodes.add_row(flags=0, time=3)  # node 6\n\ntables.edges.add_row(left=0, right=5, parent=4, child=0)\ntables.edges.add_row(left=0, right=10, parent=4, child=1)\ntables.edges.add_row(left=5, right=10, parent=5, child=0)\ntables.edges.add_row(left=0, right=10, parent=5, child=2)\ntables.edges.add_row(left=0, right=7, parent=6, child=4)\ntables.edges.add_row(left=0, right=10, parent=6, child=5)\ntables.edges.add_row(left=7, right=10, parent=6, child=3)\n\ntables.sort()\nts_simple = tables.tree_sequence()\n```\n\nA tree sequence is easily made from the descendant edges array:\n\n```{code-cell} python\nnumba_ts_simple = tskit_numba.jitwrap(ts_simple)\nnode = 5\nE = descendant_edges(numba_ts_simple, node)\ntables_sub = ts_simple.dump_tables()\ntables_sub.edges.replace_with(tables_sub.edges[E])\nts_sub = tables_sub.tree_sequence()\n```\n\nAs an example, lets visualise the selection of a sub-ARG. Here is the full ARG\nwith a highlighted node:\n\n```{code-cell} python\ncss_style = f\".node.n{node} > .sym {{ fill: #c41e3a; }}\"\nts_simple.draw_svg(size=(400, 200), node_labels={}, y_axis=True, style=css_style)\n```\n\nAnd the sub-ARG from that node:\n\n```{code-cell} python\nts_sub.draw_svg(size=(400, 200), node_labels={}, y_axis=True, style=css_style)\n```\n\nIn the other direction, we can similarly find the sub-ARG that is ancestral to a given node:\n\n```{code-cell} python\n@numba.njit\ndef ancestral_edges(numba_ts, u):\n    \"\"\"\n    Returns a boolean array which is only True for edges that are ancestors of node u.\n    \"\"\"\n    edge_select = np.zeros(numba_ts.num_edges, dtype=np.bool_)\n    parent_index = numba_ts.parent_index()\n    edges_left = numba_ts.edges_left\n    edges_right = numba_ts.edges_right\n    edges_parent = numba_ts.edges_parent\n    \n    # The stack stores (node_id, left_coord, right_coord)\n    stack = [(u, 0.0, numba_ts.sequence_length)]\n    \n    while len(stack) > 0:\n        node, left, right = stack.pop()\n        \n        # Find all edges where 'node' is the child\n        start, stop = parent_index.index_range[node]\n        for i in range(start, stop):\n            e = parent_index.edge_index[i]\n            e_left = edges_left[e]\n            e_right = edges_right[e]\n            \n            # Check for genomic interval overlap\n            if e_right > left and right > e_left:\n                # This edge is part of the sub-ARG\n                edge_select[e] = True\n                \n                # Calculate the intersection for the next traversal step\n                inter_left = max(e_left, left)\n                inter_right = min(e_right, right)\n                e_parent = edges_parent[e]\n                stack.append((e_parent, inter_left, inter_right))\n\n    return edge_select\n```\n\n```{code-cell} python\n# Find ancestral edges for a sample node (low-numbered nodes are usually samples)\ntest_node = min(5, numba_ts.num_nodes - 1)\nedge_select = ancestral_edges(numba_ts, test_node)\n\n# Show which edges are ancestors\nancestral_edge_ids = np.where(edge_select)[0]\nprint(f\"Edges ancestral to node {test_node}: {ancestral_edge_ids[:10]}...\")\nprint(f\"Total ancestral edges: {np.sum(edge_select)}\")\n```\n\n```{code-cell} python\n:tags: [hide-cell]\n# Warm up the JIT for both functions\n_ = descendant_edges(numba_ts, 0)\n_ = ancestral_edges(numba_ts, 0)\n```\n\nComparing performance with using the tskit Python API shows significant speedup:\n\n```{code-cell} python\ndef descendant_edges_tskit(ts, start_node):\n    D = np.zeros(ts.num_edges, dtype=bool)\n    for tree in ts.trees():\n        for v in tree.preorder(start_node):\n            if v != start_node:\n                D[tree.edge(v)] = True\n    return D\n\ndef ancestral_edges_tskit(ts, start_node):\n    A = np.zeros(ts.num_edges, dtype=bool)\n    for tree in ts.trees():\n        curr_node = start_node\n        parent = tree.parent(curr_node)\n        while parent != tskit.NULL:\n            edge_id = tree.edge(curr_node)\n            A[edge_id] = True\n            curr_node = parent\n            parent = tree.parent(curr_node)\n    return A\n\nimport time\n\n# Test with root node for descendant edges\nroot_node = numba_ts.num_nodes - 1\nt = time.time()\nnumba_desc = descendant_edges(numba_ts, root_node)\nprint(f\"Numba descendant edges time: {time.time() - t:.6f} seconds\")\n\nt = time.time()\ntskit_desc = descendant_edges_tskit(ts, root_node)\nprint(f\"tskit descendant edges time: {time.time() - t:.6f} seconds\")\n\n# Test with sample node for ancestral edges  \nsample_node = 0\nt = time.time()\nnumba_anc = ancestral_edges(numba_ts, sample_node)\nprint(f\"Numba ancestral edges time: {time.time() - t:.6f} seconds\")\n\nt = time.time()\ntskit_anc = ancestral_edges_tskit(ts, sample_node)\nprint(f\"tskit ancestral edges time: {time.time() - t:.6f} seconds\")\n\n# Verify results match\nnp.testing.assert_array_equal(numba_desc, tskit_desc)\nnp.testing.assert_array_equal(numba_anc, tskit_anc)\nprint(\"Results match!\")\n```\n\n## API Reference\n\n```{eval-rst}\n.. currentmodule:: tskit.jit.numba\n\n.. autofunction:: jitwrap\n\n.. autoclass:: NumbaTreeSequence\n   :members:\n\n.. autoclass:: TreeIndex\n   :members:\n\n.. autoclass:: EdgeRange\n   :members:\n\n.. autoclass:: ParentIndex\n   :members:\n```"
  },
  {
    "path": "docs/provenance.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_provenance)=\n\n# Provenance\n\nEvery tree sequence has provenance information associated with it. The purpose of this\ninformation is to improve [reproducibility](https://en.wikipedia.org/wiki/Reproducibility):\ngiven the provenance associated with a given tree sequence, it should be possible to\nreproduce it. Provenance is split into three sections: the primary **software** used to\nproduce a tree sequence; the **parameters** provided to this software; and the computational\n**environment** where the software was run.\n\nThis documentation serves two distinct purposes:\n\n1. For developers using `tskit` in their own applications, it provides normative documentation\n   for how provenance information should be stored.\n2. For end-users of `tskit`, it provides documentation to allows them to inspect and interpret\n   the provenance information stored in `.trees` files.\n\nProvenance information is encoded using [JSON](https://www.json.org/).\nTo standardise the provenance information produced by different software and improve\ninteroperability we define a formal specification using [JSON Schema](http://json-schema.org/).\nThe full schema is provided {ref}`below <sec_provenance_schema>`, which may be used to\nautomatically validate input. In the following we describe the intention of the various\nsections in more detail.\n\nThis document defines specification version 1.0.0. Specification version numbers follow\n[SemVer](https://semver.org/) semantics.\n\n(sec_provenance_example)=\n\n## Example\n\nTo make things more concrete, let's consider an example:\n\n```json\n{\n  \"schema_version\": \"1.0.0\",\n  \"software\": {\n    \"name\": \"msprime\",\n    \"version\": \"0.6.1.dev123+ga252341.d20180820\"\n  },\n  \"parameters\": {\n    \"sample_size\": 5,\n    \"random_seed\": 12345,\n    \"command\": \"simulate\"\n  },\n  \"environment\": {\n    \"libraries\": {\n      \"gsl\": {\n        \"version\": \"2.1\"\n      },\n      \"kastore\": {\n        \"version\": \"0.1.0\"\n      }\n    },\n    \"python\": {\n      \"version\": \"3.5.2\",\n      \"implementation\": \"CPython\"\n    },\n    \"os\": {\n      \"system\": \"Linux\",\n      \"node\": \"powderfinger\",\n      \"release\": \"4.15.0-29-generic\",\n      \"version\": \"#31~16.04.1-Ubuntu SMP Wed Jul 18 08:54:04 UTC 2018\",\n      \"machine\": \"x86_64\"\n    }\n  },\n  \"resources\": {\n    \"elapsed_time\": 12.34,\n    \"user_time\": 10.56,\n    \"sys_time\": 1.78,\n    \"max_memory\": 1048576\n  }\n}\n```\n\nThis information records the provenance for a very simple msprime simulation. The record is a JSON\nobject with three mandatory fields (\"software\", \"parameters\" and \"environment\") and one optional\n(\"resources\") which we discuss separately in the following sections.\n\n(sec_provenance_software)=\n\n\n## Software\n\n\nEvery tree sequence is produced by some piece of software. For example, this may be a\ncoalescent simulation produced by `msprime`, a forwards-time simulation from `SLiM`\nor tree sequence inferred from data by `tsinfer`. The software provenance is\nintended to capture the details about this primary software.\n\n```{list-table}\n:header-rows: 1\n\n* - Field\n  - Type\n  - Description\n* - name\n  - string\n  - The name of the software.\n* - version\n  - string\n  - The software version.\n```\n\nNote that libraries that the primary software links against are considered part of the\n{ref}`sec_provenance_environment` and should be recorded there.\n\n(sec_provenance_parameters)=\n\n## Parameters\n\nThe parameters section of a provenance document records the input that was used to\nproduce a particular tree sequence. There are no requirements on what may be stored\nwithin it, but we make some recommendations here on how to encode such information.\n\nAs a general principle, sufficient information should be recorded in the parameters\nsection to allow the output tree sequence to be reproduced exactly. There will be instances,\nhowever, where this is not possible due to missing files, issues with numerical precision\nand so on.\n\n### API invocations\n\nConsider an API call like the following simple msprime simulation:\n\n```python\nts = msprime.simulate(sample_size=10, recombination_rate=2)\n```\n\nWe recommend encoding the parameters provenance as follows (other fields omitted\nfor clarity):\n\n```json\n{\n  \"parameters\": {\n    \"command\": \"simulate\",\n    \"sample_size\": 10,\n    \"recombination_rate\": 2,\n    \"random_seed\": 123456789,\n  }\n}\n```\n\nSpecifically, we encode the name of the function using the `command` key and\nthe function parameters in the obvious way. Note that we include the `random_seed`\nhere even though it was automatically generated.\n\n\n### CLI invocations\n\nConsider the following invocation of a hypothetical command line program:\n\n```bash\n$ supersim --sample-size=10 --do-some-stuff -O out.trees\n```\n\nWe recommend encoding the parameters provenance as follows (other fields omitted\nfor clarity):\n\n```json\n{\n  \"parameters\": {\n    \"command\": \"supersim\",\n    \"args\": [\"--sample-size=10\", \"--do-some-stuff\", \"-O\", \"out.trees\"],\n    \"random_seed\": 56789\n  }\n}\n```\n\nHere we encode the name of the program using the `command` key\nand its command line arguments as a list of strings in the `args` key. We\nalso include the automatically generated random seed in the parameters list.\n\nIf parameters that affect the output tree sequence are derived from environment\nvariables these should also be recorded.\n\n(sec_provenance_environment)=\n\n## Environment\n\nThe environment section captures details about the computational environment in\nwhich the software was executed. Two optional fields are defined: `os`\nand `libraries`. We recommend including any additional relevant platform\ninformation here; for example, if using Python store the interpreter information\nas shown in the example above.\n\n### Operating system\n\nThe `os` section records details about the operating system on which the\nsoftware was executed. This section is optional and has no required internal\nstructure. We recommend the following structure based on the output of the\nPOSIX [uname](http://pubs.opengroup.org/onlinepubs/009695399/functions/uname.html)\nfunction:\n\n```json\n{\n  \"environment\": {\n    \"os\": {\n      \"system\": \"Linux\",\n      \"node\": \"powderfinger\",\n      \"release\": \"4.15.0-29-generic\",\n      \"version\": \"#31~16.04.1-Ubuntu SMP Wed Jul 18 08:54:04 UTC 2018\",\n      \"machine\": \"x86_64\"\n    }\n}\n```\n\n### Libraries\n\nThe `libraries` section captures information about important libraries that the\nprimary software links against. There is no required structure.\n\n\n## Resources\n\nThe resources section captures details about the computational resources used during the execution of the software. This section is optional and has the following fields, each of which is optional and may not be filled depending on os support:\n\n\n- `elapsed_time`: The total elapsed time in seconds.\n- `user_time`: The total user CPU time in seconds.\n- `sys_time`: The total system CPU time in seconds.\n- `max_memory`: The maximum memory usage in bytes.\n\nIncluding this information makes it easy for users of tree-sequence producing software to\naccount for resource usage across pipelines of tools.\n\n(sec_provenance_schema)=\n\n## Full schema\n\nThis schema is formally defined using [JSON Schema](http://json-schema.org/) and\ngiven in full here. Developers writing provenance information to `.trees` files\nshould validate the output JSON against this schema.\n\n```{eval-rst}\n.. literalinclude:: ../python/tskit/provenance.schema.json\n    :language: json\n```\n"
  },
  {
    "path": "docs/python-api.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_python_api)=\n\n# Python API\n\nThis page documents the full tskit Python API. Brief thematic summaries of common\nclasses and methods are presented first. The {ref}`sec_python_api_reference` section\nat the end then contains full details which aim to be concise, precise and exhaustive.\nNote that this may not therefore be the best place to start if you are new\nto a particular piece of functionality.\n\n(sec_python_api_trees_and_tree_sequences)=\n\n## Trees and tree sequences\n\nThe {class}`TreeSequence` class represents a sequence of correlated\nevolutionary trees along a genome. The {class}`Tree` class represents a\nsingle tree in this sequence. These classes are the interfaces used to\ninteract with the trees and mutational information stored in a tree sequence,\nfor example as returned from a simulation or inferred from a set of DNA\nsequences.\n\n\n(sec_python_api_tree_sequences)=\n\n### {class}`TreeSequence` API\n\n\n(sec_python_api_tree_sequences_properties)=\n\n#### General properties\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.time_units\n  TreeSequence.nbytes\n  TreeSequence.sequence_length\n  TreeSequence.max_root_time\n  TreeSequence.discrete_genome\n  TreeSequence.discrete_time\n  TreeSequence.metadata\n  TreeSequence.metadata_schema\n  TreeSequence.reference_sequence\n```\n\n#### Efficient table column access\n\nThe {class}`.TreeSequence` class provides access to underlying numerical\ndata defined in the {ref}`data model<sec_data_model>` in two ways:\n\n1. Via the {attr}`.TreeSequence.tables` property and the\n    {ref}`Tables API<sec_tables_api_accessing_table_data>`.\n   Since version 1.0 this provides a direct, zero-copy, immutable view of the\n   underlying memory.\n2. Via a set of properties on the ``TreeSequence`` class that provide\n   direct and efficient access to a single array in the underlying memory.\n\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.individuals_flags\n  TreeSequence.nodes_time\n  TreeSequence.nodes_flags\n  TreeSequence.nodes_population\n  TreeSequence.nodes_individual\n  TreeSequence.edges_left\n  TreeSequence.edges_right\n  TreeSequence.edges_parent\n  TreeSequence.edges_child\n  TreeSequence.sites_position\n  TreeSequence.sites_ancestral_state\n  TreeSequence.mutations_site\n  TreeSequence.mutations_node\n  TreeSequence.mutations_parent\n  TreeSequence.mutations_time\n  TreeSequence.mutations_derived_state\n  TreeSequence.mutations_metadata\n  TreeSequence.mutations_edge\n  TreeSequence.mutations_inherited_state\n  TreeSequence.migrations_left\n  TreeSequence.migrations_right\n  TreeSequence.migrations_right\n  TreeSequence.migrations_node\n  TreeSequence.migrations_source\n  TreeSequence.migrations_dest\n  TreeSequence.migrations_time\n  TreeSequence.indexes_edge_insertion_order\n  TreeSequence.indexes_edge_removal_order\n```\n\n(sec_python_api_tree_sequences_loading_and_saving)=\n\n#### Loading and saving\n\nThere are several methods for loading data into a {class}`TreeSequence`\ninstance. The simplest and most convenient is the use the {func}`tskit.load`\nfunction to load a {ref}`tree sequence file <sec_tree_sequence_file_format>`. For small\nscale data and debugging, it is often convenient to use the {func}`tskit.load_text`\nfunction to read data in the {ref}`text file format<sec_text_file_format>`.\nThe {meth}`TableCollection.tree_sequence` function\nefficiently creates a {class}`TreeSequence` object from a\n{class}`collection of tables<TableCollection>`\nusing the {ref}`Tables API <sec_tables_api>`.\n\n```{eval-rst}\nLoad a tree sequence\n    .. autosummary::\n      load\n      load_text\n      TableCollection.tree_sequence\n\nSave a tree sequence\n    .. autosummary::\n      TreeSequence.dump\n```\n\n:::{seealso}\nTree sequences with a single simple topology can also be created from scratch by\n{ref}`generating<sec_python_api_trees_creating>` a {class}`Tree` and accessing its\n{attr}`~Tree.tree_sequence` property.\n:::\n\n(sec_python_api_tree_sequences_obtaining_trees)=\n\n#### Obtaining trees\n\nThe following properties and methods return information about the\n{class}`trees<Tree>` that are generated along a tree sequence.\n\n```{eval-rst}\n.. autosummary::\n\n  TreeSequence.num_trees\n  TreeSequence.trees\n  TreeSequence.breakpoints\n  TreeSequence.coiterate\n  TreeSequence.first\n  TreeSequence.last\n  TreeSequence.aslist\n  TreeSequence.at\n  TreeSequence.at_index\n```\n\n#### Obtaining other objects\n\n(sec_python_api_tree_sequences_obtaining_other_objects)=\n\nVarious components make up a tree sequence, such as nodes and edges, sites and\nmutations, and populations and individuals. These can be counted or converted into\nPython objects using the following classes, properties, and methods.\n\n##### Tree topology\n\n```{eval-rst}\nNodes\n    .. autosummary::\n      Node\n      TreeSequence.num_nodes\n      TreeSequence.nodes\n      TreeSequence.node\n      TreeSequence.num_samples\n      TreeSequence.samples\n\nEdges\n    .. autosummary::\n      Edge\n      TreeSequence.num_edges\n      TreeSequence.edges\n      TreeSequence.edge\n```\n\n##### Genetic variation\n\n```{eval-rst}\nSites\n    .. autosummary::\n      Site\n      TreeSequence.num_sites\n      TreeSequence.sites\n      TreeSequence.site\n      Variant\n      TreeSequence.variants\n      TreeSequence.genotype_matrix\n      TreeSequence.haplotypes\n      TreeSequence.alignments\n\nMutations\n    .. autosummary::\n      Mutation\n      TreeSequence.num_mutations\n      TreeSequence.mutations\n      TreeSequence.mutation\n```\n\n##### Demography\n\n```{eval-rst}\nPopulations\n    .. autosummary::\n      Population\n      TreeSequence.num_populations\n      TreeSequence.populations\n      TreeSequence.population\n\nMigrations\n    .. autosummary::\n      Migration\n      TreeSequence.num_migrations\n      TreeSequence.migrations\n      TreeSequence.migration\n```\n\n##### Other\n\n```{eval-rst}\nIndividuals\n    .. autosummary::\n      Individual\n      TreeSequence.num_individuals\n      TreeSequence.individuals\n      TreeSequence.individual\n\n\nProvenance entries (also see :ref:`sec_python_api_provenance`)\n    .. autosummary::\n      Provenance\n      TreeSequence.num_provenances\n      TreeSequence.provenances\n      TreeSequence.provenance\n```\n\n(sec_python_api_tree_sequences_modification)=\n\n#### Tree sequence modification\n\nAlthough tree sequences are immutable, several methods will taken an existing tree\nsequence and return a modifed version. These are thin wrappers around the\n{ref}`identically named methods of a TableCollection<sec_tables_api_modification>`,\nwhich perform the same actions but modify the {class}`TableCollection` in place.\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.simplify\n  TreeSequence.subset\n  TreeSequence.union\n  TreeSequence.concatenate\n  TreeSequence.keep_intervals\n  TreeSequence.delete_intervals\n  TreeSequence.delete_sites\n  TreeSequence.trim\n  TreeSequence.shift\n  TreeSequence.split_edges\n  TreeSequence.decapitate\n  TreeSequence.extend_haplotypes\n```\n\n(sec_python_api_tree_sequences_ibd)=\n\n#### Identity by descent\n\nThe {meth}`.TreeSequence.ibd_segments` method allows us to compute\nidentity relationships between pairs of samples. See the\n{ref}`sec_identity` section for more details and examples\nand the {ref}`sec_python_api_reference_identity` section for\nAPI documentation on the associated classes.\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.ibd_segments\n```\n\n(sec_python_api_tree_sequences_tables)=\n\n#### Tables\n\nThe underlying data in a tree sequence is stored in a\n{ref}`collection of tables<sec_tables_api>`. The following methods give access\nto tables and associated functionality. Since tables can be modified, this\nallows tree sequences to be edited: see the {ref}`sec_tables` tutorial for\nan introduction.\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.tables\n  TreeSequence.dump_tables\n  TreeSequence.table_metadata_schemas\n  TreeSequence.tables_dict\n```\n\n\n(sec_python_api_tree_sequences_statistics)=\n\n#### Statistics\n\n```{eval-rst}\n\nSingle site\n    .. autosummary::\n      TreeSequence.allele_frequency_spectrum\n      TreeSequence.divergence\n      TreeSequence.diversity\n      TreeSequence.f2\n      TreeSequence.f3\n      TreeSequence.f4\n      TreeSequence.Fst\n      TreeSequence.genealogical_nearest_neighbours\n      TreeSequence.genetic_relatedness\n      TreeSequence.genetic_relatedness_weighted\n      TreeSequence.genetic_relatedness_vector\n      TreeSequence.genetic_relatedness_matrix\n      TreeSequence.general_stat\n      TreeSequence.segregating_sites\n      TreeSequence.sample_count_stat\n      TreeSequence.mean_descendants\n      TreeSequence.Tajimas_D\n      TreeSequence.trait_correlation\n      TreeSequence.trait_covariance\n      TreeSequence.trait_linear_model\n      TreeSequence.Y2\n      TreeSequence.Y3\n\nComparative\n    .. autosummary::\n      TreeSequence.kc_distance\n```\n\n(sec_python_api_tree_sequences_topological_analysis)=\n\n#### Topological analysis\n\nThe topology of a tree in a tree sequence refers to the relationship among\nsamples ignoring branch lengths. Functionality as described in\n{ref}`sec_topological_analysis` is mainly provided via\n{ref}`methods on trees<sec_python_api_trees_topological_analysis>`, but more\nefficient methods sometimes exist for entire tree sequences:\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.count_topologies\n```\n\n(sec_python_api_tree_sequences_display)=\n\n#### Display\n\n```{eval-rst}\n.. autosummary::\n  TreeSequence.draw_svg\n  TreeSequence.draw_text\n  TreeSequence.__str__\n  TreeSequence._repr_html_\n```\n\n\n(sec_python_api_tree_sequences_export)=\n\n#### Export\n```{eval-rst}\n.. autosummary::\n  TreeSequence.as_fasta\n  TreeSequence.as_nexus\n  TreeSequence.dump_text\n  TreeSequence.to_macs\n  TreeSequence.write_fasta\n  TreeSequence.write_nexus\n  TreeSequence.write_vcf\n```\n\n\n(sec_python_api_trees)=\n\n### {class}`Tree<Tree>` API\n\nA tree is an instance of the {class}`Tree` class. These trees cannot exist\nindependently of the {class}`TreeSequence` from which they are generated.\nUsually, therefore, a {class}`Tree` instance is created by\n{ref}`sec_python_api_tree_sequences_obtaining_trees` from an existing tree\nsequence (although it is also possible to generate a new instance of a\n{class}`Tree` belonging to the same tree sequence using {meth}`Tree.copy`).\n\n:::{note}\nFor efficiency, each instance of a {class}`Tree` is a state-machine\nwhose internal state corresponds to one of the trees in the parent tree sequence:\n{ref}`sec_python_api_trees_moving_to` in the tree sequence does not require a\nnew instance to be created, but simply the internal state to be changed.\n:::\n\n(sec_python_api_trees_general_properties)=\n\n#### General properties\n\n\n```{eval-rst}\n.. autosummary::\n  Tree.tree_sequence\n  Tree.total_branch_length\n  Tree.root_threshold\n  Tree.virtual_root\n  Tree.num_edges\n  Tree.num_roots\n  Tree.has_single_root\n  Tree.has_multiple_roots\n  Tree.root\n  Tree.roots\n  Tree.index\n  Tree.interval\n  Tree.span\n```\n\n\n(sec_python_api_trees_creating)=\n\n#### Creating new trees\n\nIt is sometimes useful to create an entirely new tree sequence consisting\nof just a single tree (a \"one-tree sequence\"). The follow methods create such an\nobject and return a {class}`Tree` instance corresponding to that tree.\nThe new tree sequence to which the tree belongs is available through the\n{attr}`~Tree.tree_sequence` property.\n\n```{eval-rst}\nCreating a new tree\n    .. autosummary::\n      Tree.generate_balanced\n      Tree.generate_comb\n      Tree.generate_random_binary\n      Tree.generate_star\n\nCreating a new tree from an existing tree\n    .. autosummary::\n      Tree.split_polytomies\n```\n\n:::{seealso}\n{meth}`Tree.unrank` for creating a new one-tree sequence from its\n{ref}`topological rank<sec_python_api_trees_topological_analysis>`.\n:::\n\n:::{note}\nSeveral of these methods are {func}`static<python:staticmethod>`, so should\nbe called e.g. as `tskit.Tree.generate_balanced(4)` rather than used on\na specific {class}`Tree` instance.\n:::\n\n\n(sec_python_api_trees_node_measures)=\n\n#### Node measures\n\nOften it is useful to access information pertinant to a specific node or set of nodes\nbut which might also change from tree to tree in the tree sequence. Examples include\nthe encoding of the tree via `parent`, `left_child`, etc.\n(see {ref}`sec_data_model_tree_structure`), the number of samples under a node,\nor the most recent common ancestor (MRCA) of two nodes. This sort of information is\navailable via simple and high performance {class}`Tree` methods\n\n\n(sec_python_api_trees_node_measures_simple)=\n\n##### Simple measures\n\nThese return a simple number, or (usually) short list of numbers relevant to a specific\nnode or limited set of nodes.\n\n```{eval-rst}\nNode information\n    .. autosummary::\n      Tree.is_sample\n      Tree.is_isolated\n      Tree.is_leaf\n      Tree.is_internal\n      Tree.parent\n      Tree.num_children\n      Tree.time\n      Tree.branch_length\n      Tree.depth\n      Tree.population\n      Tree.right_sib\n      Tree.left_sib\n      Tree.right_child\n      Tree.left_child\n      Tree.children\n      Tree.edge\n\nDescendant nodes\n    .. autosummary::\n      Tree.leaves\n      Tree.samples\n      Tree.num_samples\n      Tree.num_tracked_samples\n\nNote that :meth:`Tree.num_samples` provides an efficient way to count samples under a node.\nHowever, samples and leaves are not always equivalent: some samples may be internal nodes,\nsome leaves may not be samples (in unsimplified tree sequences), and the same node can be\na leaf in one tree but internal in another. While ``tree.num_samples()`` often equals the\nleaf count (particularly in simplified tree sequences without internal samples), a strict\nleaf count requires tree traversal, e.g. via ``num_leaves = len(list(tree.leaves()))``.\n\n\nMultiple nodes\n    .. autosummary::\n      Tree.is_descendant\n      Tree.mrca\n      Tree.tmrca\n```\n\n\n(sec_python_api_trees_node_measures_array)=\n\n##### Array access\n\nThese all return a numpy array whose length corresponds to\nthe total number of nodes in the tree sequence. They provide direct access\nto the underlying memory structures, and are thus very efficient, providing a\nhigh performance interface which can be used in conjunction with the equivalent\n{ref}`traversal methods<sec_python_api_trees_traversal>`.\n\n```{eval-rst}\n.. autosummary::\n  Tree.parent_array\n  Tree.left_child_array\n  Tree.right_child_array\n  Tree.left_sib_array\n  Tree.right_sib_array\n  Tree.num_children_array\n  Tree.edge_array\n```\n\n\n(sec_python_api_trees_traversal)=\n\n#### Tree traversal\n\nMoving around within a tree usually involves visiting the tree nodes in some sort of\norder. Often, given a particular order, it is convenient to iterate over each node\nusing the {meth}`Tree.nodes` method. However, for high performance algorithms, it\nmay be more convenient to access the node indices for a particular order as\nan array, and use this, for example, to index into one of the node arrays (see\n{ref}`sec_topological_analysis_traversal`). Note that the most efficient of these\nmethods is {meth}`Tree.preorder`.\n\n```{eval-rst}\nIterator access\n    .. autosummary::\n\n      Tree.nodes\n      Tree.ancestors\n\nArray access\n    .. autosummary::\n\n      Tree.postorder\n      Tree.preorder\n      Tree.timeasc\n      Tree.timedesc\n```\n\n\n(sec_python_api_trees_topological_analysis)=\n\n#### Topological analysis\n\nThe topology of a tree refers to the simple relationship among samples\n(i.e. ignoring branch lengths), see {ref}`sec_combinatorics` for more details. These\nmethods provide ways to enumerate and count tree topologies.\n\nBriefly, the position of a tree in the enumeration `all_trees` can be obtained using\nthe tree's {meth}`~Tree.rank` method. Inversely, a {class}`Tree` can be constructed\nfrom a position in the enumeration with {meth}`Tree.unrank`.\n\n\n```{eval-rst}\nMethods of a tree\n    .. autosummary::\n      Tree.rank\n      Tree.count_topologies\n\nFunctions and static methods\n    .. autosummary::\n      Tree.unrank\n      all_tree_shapes\n      all_tree_labellings\n      all_trees\n```\n\n\n(sec_python_api_trees_comparing)=\n\n#### Comparing trees\n\n```{eval-rst}\n.. autosummary::\n  Tree.kc_distance\n  Tree.rf_distance\n```\n\n(sec_python_api_trees_balance)=\n\n#### Balance/imbalance indices\n\n```{eval-rst}\n.. autosummary::\n  Tree.colless_index\n  Tree.sackin_index\n  Tree.b1_index\n  Tree.b2_index\n```\n\n(sec_python_api_trees_sites_mutations)=\n\n#### Sites and mutations\n\n```{eval-rst}\n.. autosummary::\n  Tree.sites\n  Tree.num_sites\n  Tree.mutations\n  Tree.num_mutations\n  Tree.map_mutations\n```\n\n\n(sec_python_api_trees_moving_to)=\n\n#### Moving to other trees\n\n```{eval-rst}\n.. autosummary::\n\n  Tree.next\n  Tree.prev\n  Tree.first\n  Tree.last\n  Tree.seek\n  Tree.seek_index\n  Tree.clear\n```\n\n#### Display\n\n```{eval-rst}\n.. autosummary::\n\n  Tree.draw_svg\n  Tree.draw_text\n  Tree.__str__\n  Tree._repr_html_\n```\n\n#### Export\n\n```{eval-rst}\n.. autosummary::\n\n  Tree.as_dict_of_dicts\n  Tree.as_newick\n```\n\n\n(sec_tables_api)=\n\n## Tables and Table Collections\n\nThe information required to construct a tree sequence is stored in a collection\nof *tables*, each defining a different aspect of the structure of a tree\nsequence. These tables are described individually in\n{ref}`the next section<sec_tables_api_table>`. However, these are interrelated,\nand so many operations work\non the entire collection of tables, known as a *table collection*.\n\n(sec_tables_api_table_collection)=\n\n### `TableCollection` API\n\nThe {class}`TableCollection` and {class}`TreeSequence` classes are\ndeeply related. A `TreeSequence` instance is based on the information\nencoded in a `TableCollection`. Tree sequences are **immutable**, and\nprovide methods for obtaining trees from the sequence. A `TableCollection`\nis **mutable**, and does not have any methods for obtaining trees.\nThe `TableCollection` class thus allows creation and modification of\ntree sequences (see the {ref}`sec_tables` tutorial).\n\n\n#### General properties\n\nSpecific {ref}`tables<sec_tables_api_table>` in the {class}`TableCollection`\nare be accessed using the plural version of their name, so that, for instance, the\nindividual table can be accessed using `table_collection.individuals`. A table\ncollection also has other properties containing, for example, number of bytes taken\nto store it and the top-level metadata associated with the tree sequence as a whole.\n\n```{eval-rst}\nTable access\n    .. autosummary::\n      TableCollection.individuals\n      TableCollection.nodes\n      TableCollection.edges\n      TableCollection.migrations\n      TableCollection.sites\n      TableCollection.mutations\n      TableCollection.populations\n      TableCollection.provenances\n\nOther properties\n    .. autosummary::\n      TableCollection.file_uuid\n      TableCollection.indexes\n      TableCollection.nbytes\n      TableCollection.table_name_map\n      TableCollection.metadata\n      TableCollection.metadata_bytes\n      TableCollection.metadata_schema\n      TableCollection.sequence_length\n      TableCollection.time_units\n```\n\n\n(sec_tables_api_transformation)=\n\n#### Transformation\n\nThese methods act in-place to transform the contents of a {class}`TableCollection`,\neither by modifying the underlying tables (removing, editing, or adding to them) or\nby adjusting the table collection so that it meets the\n{ref}`sec_valid_tree_sequence_requirements`.\n\n\n(sec_tables_api_modification)=\n\n##### Modification\n\nThese methods modify the data stored in a {class}`TableCollection`. They also have\n{ref}`equivalant TreeSequence versions<sec_python_api_tree_sequences_modification>`\n(unlike the methods described below those do *not* operate in place, but rather act in\na functional way, returning a new tree sequence while leaving the original unchanged).\n\n```{eval-rst}\n.. autosummary::\n  TableCollection.clear\n  TableCollection.simplify\n  TableCollection.subset\n  TableCollection.delete_intervals\n  TableCollection.keep_intervals\n  TableCollection.delete_sites\n  TableCollection.trim\n  TableCollection.shift\n  TableCollection.union\n  TableCollection.delete_older\n```\n\n(sec_tables_api_creating_valid_tree_sequence)=\n\n##### Creating a valid tree sequence\n\nThese methods can be used to help reorganise or rationalise the\n{class}`TableCollection` so that it is in the form\n{ref}`required<sec_valid_tree_sequence_requirements>` for\nit to be {meth}`converted<TableCollection.tree_sequence>`\ninto a {class}`TreeSequence`. This may require sorting the tables,\nensuring they are logically consistent, and adding {ref}`sec_table_indexes`.\n\n:::{note}\nThese methods are not guaranteed to make valid a {class}`TableCollection` which is\nlogically inconsistent, for example if multiple edges have the same child at a\ngiven position on the genome or if non-existent node IDs are referenced.\n:::\n\n```{eval-rst}\nSorting\n    .. autosummary::\n      TableCollection.sort\n      TableCollection.sort_individuals\n      TableCollection.canonicalise\n\nLogical consistency\n    .. autosummary::\n      TableCollection.compute_mutation_parents\n      TableCollection.compute_mutation_times\n      TableCollection.deduplicate_sites\n\nIndexing\n    .. autosummary::\n      TableCollection.has_index\n      TableCollection.build_index\n      TableCollection.drop_index\n```\n\n#### Miscellaneous methods\n\n```{eval-rst}\n.. autosummary::\n  TableCollection.copy\n  TableCollection.equals\n  TableCollection.link_ancestors\n```\n\n#### Export\n```{eval-rst}\n.. autosummary::\n  TableCollection.tree_sequence\n  TableCollection.dump\n```\n\n(sec_tables_api_table)=\n\n### Table APIs\n\nHere we outline the table classes and the common methods and variables available for\neach. For description and definition of each table's meaning\nand use, see {ref}`the table definitions <sec_table_definitions>`.\n\n```{eval-rst}\n.. autosummary::\n\n  IndividualTable\n  NodeTable\n  EdgeTable\n  MigrationTable\n  SiteTable\n  MutationTable\n  PopulationTable\n  ProvenanceTable\n```\n\n(sec_tables_api_accessing_table_data)=\n\n#### Accessing table data\n\nThe tables API provides an efficient way of working\nwith and interchanging {ref}`tree sequence data <sec_data_model>`. Each table class\n(e.g, {class}`NodeTable`, {class}`EdgeTable`, {class}`SiteTable`) has a specific set\nof columns with fixed types, and a set of methods for setting and getting the data\nin these columns. The number of rows in the table `t` is given by `len(t)`.\n\n```{code-cell} ipython3\nimport tskit\nt = tskit.EdgeTable()\nt.add_row(left=0, right=1, parent=10, child=11)\nt.add_row(left=1, right=2, parent=9, child=11)\nprint(\"The table contains\", len(t), \"rows\")\nprint(t)\n```\n\nEach table supports accessing the data either by row or column. To access the data in\na *column*, we can use standard attribute access which will\nreturn a copy of the column data as a numpy array:\n\n```{code-cell} ipython3\nt.left\n```\n\n```{code-cell} ipython3\nt.parent\n```\n\nTo access the data in a *row*, say row number `j` in table `t`, simply use `t[j]`:\n\n```{code-cell} ipython3\nt[0]\n```\n\nThis also works as expected with negative `j`, counting rows from the end of the table\n\n```{code-cell} ipython3\nt[-1]\n```\n\nThe returned row has attributes allowing contents to be accessed by name, e.g.\n`site_table[0].position`, `site_table[0].ancestral_state`, `site_table[0].metadata`\netc.:\n\n```{code-cell} ipython3\nt[-1].right\n```\n\nRow attributes cannot be modified directly. Instead, the `replace` method of a row\nobject can be used to create a new row with one or more changed column\nvalues, which can then be used to replace the original. For example:\n\n```{code-cell} ipython3\nt[-1] = t[-1].replace(child=4, right=3)\nprint(t)\n```\n\nTables also support the {mod}`pickle` protocol, and so can be easily serialised and\ndeserialised. This can be useful, for example, when performing parallel computations\nusing the {mod}`multiprocessing` module (however, pickling will not be as efficient\nas storing tables in the native {ref}`format <sec_tree_sequence_file_format>`).\n\n```{code-cell} ipython3\nimport pickle\nserialised = pickle.dumps(t)\nt2 = pickle.loads(serialised)\nprint(t2)\n```\n\nTables support the equality operator `==` based on the data\nheld in the columns:\n\n```{code-cell} ipython3\nt == t2\n```\n\n```{code-cell} ipython3\nt is t2\n```\n\n```{code-cell} ipython3\nt2.add_row(0, 1, 2, 3)\nprint(t2)\nt == t2\n```\n\n:::{todo}\nMove some or all of these examples into a suitable alternative chapter.\n:::\n\n\n(sec_tables_api_text_columns)=\n\n##### Text columns\n\nAs described in the {ref}`sec_encoding_ragged_columns`, working with\nvariable length columns is somewhat more involved. Columns\nencoding text data store the **encoded bytes** of the flattened\nstrings, and the offsets into this column in two separate\narrays.\n\nConsider the following example:\n\n```{code-cell} ipython3\nt = tskit.SiteTable()\nt.add_row(0, \"A\")\nt.add_row(1, \"BB\")\nt.add_row(2, \"\")\nt.add_row(3, \"CCC\")\nprint(t)\nprint(t[0])\nprint(t[1])\nprint(t[2])\nprint(t[3])\n```\n\nHere we create a {class}`SiteTable` and add four rows, each with a different\n`ancestral_state`. We can then access this information from each\nrow in a straightforward manner. Working with columns of text data\nis a little trickier, however:\n\n```{code-cell} ipython3\nprint(t.ancestral_state)\nprint(t.ancestral_state_offset)\n```\n\n```{code-cell} ipython3\ntskit.unpack_strings(t.ancestral_state, t.ancestral_state_offset)\n```\n\nHere, the `ancestral_state` array is the UTF8 encoded bytes of the flattened\nstrings, and the `ancestral_state_offset` is the offset into this array\nfor each row. The {func}`tskit.unpack_strings` function, however, is a convient\nway to recover the original strings from this encoding. We can also use the\n{func}`tskit.pack_strings` to insert data using this approach:\n\n```{code-cell} ipython3\na, off = tskit.pack_strings([\"0\", \"12\", \"\"])\nt.set_columns(position=[0, 1, 2], ancestral_state=a, ancestral_state_offset=off)\nprint(t)\n```\n\nWhen inserting many rows with standard infinite sites mutations (i.e.,\nancestral state is \"0\"), it is more efficient to construct the\nnumpy arrays directly than to create a list of strings and use\n{func}`pack_strings`. When doing this, it is important to note that\nit is the **encoded** byte values that are stored; by default, we\nuse UTF8 (which corresponds to ASCII for simple printable characters).:\n\n```{code-cell} ipython3\nimport numpy as np\nt_s = tskit.SiteTable()\nm = 10\na = ord(\"0\") + np.zeros(m, dtype=np.int8)\noff = np.arange(m + 1, dtype=np.uint32)\nt_s.set_columns(position=np.arange(m), ancestral_state=a, ancestral_state_offset=off)\nprint(t_s)\nprint(\"ancestral state data\", t_s.ancestral_state)\nprint(\"ancestral state offsets\", t_s.ancestral_state_offset)\n```\n\n\nIn the mutation table, the derived state of each mutation can be handled similarly:\n\n```{code-cell} ipython3\nt_m = tskit.MutationTable()\nsite = np.arange(m, dtype=np.int32)\nd, off = tskit.pack_strings([\"1\"] * m)\nnode = np.zeros(m, dtype=np.int32)\nt_m.set_columns(site=site, node=node, derived_state=d, derived_state_offset=off)\nprint(t_m)\n```\n\n:::{todo}\nMove some or all of these examples into a suitable alternative chapter.\n:::\n\n\n(sec_tables_api_binary_columns)=\n\n##### Binary columns\n\nColumns storing binary data take the same approach as\n{ref}`sec_tables_api_text_columns` to encoding\n{ref}`variable length data <sec_encoding_ragged_columns>`.\nThe difference between the two is only raw {class}`bytes` values are accepted: no\ncharacter encoding or decoding is done on the data. Consider the following example\nwhere a table has no `metadata_schema` such that arbitrary bytes can be stored and\nno automatic encoding or decoding of objects is performed by the Python API and we can\nstore and retrieve raw `bytes`. (See {ref}`sec_metadata` for details):\n\nBelow, we add two rows to a {class}`NodeTable`, with different\n{ref}`metadata <sec_metadata_definition>`. The first row contains a simple\nbyte string, and the second contains a Python dictionary serialised using\n{mod}`pickle`.\n\n```{code-cell} ipython3\nt = tskit.NodeTable()\nt.add_row(metadata=b\"these are raw bytes\")\nt.add_row(metadata=pickle.dumps({\"x\": 1.1}))\nprint(t)\n```\n\nNote that the pickled dictionary is encoded in 24 bytes containing unprintable\ncharacters. It appears to be unrelated to the original contents, because the binary\ndata is [base64 encoded](https://en.wikipedia.org/wiki/Base64) to ensure that it is\nprint-safe (and doesn't break your terminal). (See the\n{ref}`sec_metadata_definition` section for more information on the\nuse of base64 encoding.).\n\nWe can access the metadata in a row (e.g., `t[0].metadata`) which returns a Python\nbytes object containing precisely the bytes that were inserted.\n\n```{code-cell} ipython3\nprint(t[0].metadata)\nprint(t[1].metadata)\n```\n\nThe metadata containing the pickled dictionary can be unpickled using\n{func}`pickle.loads`:\n\n```{code-cell} ipython3\nprint(pickle.loads(t[1].metadata))\n```\n\nAs previously, the `replace` method can be used to change the metadata,\nby overwriting an existing row with an updated one:\n\n```{code-cell} ipython3\nt[0] = t[0].replace(metadata=b\"different raw bytes\")\nprint(t)\n```\n\nFinally, when we print the `metadata` column, we see the raw byte values\nencoded as signed integers. As for {ref}`sec_tables_api_text_columns`,\nthe `metadata_offset` column encodes the offsets into this array. So, we\nsee that the first metadata value is 9 bytes long and the second is 24.\n\n```{code-cell} ipython3\nprint(t.metadata)\nprint(t.metadata_offset)\n```\n\nThe {func}`tskit.pack_bytes` and {func}`tskit.unpack_bytes` functions are\nalso useful for encoding data in these columns.\n\n:::{todo}\nMove some or all of these examples into a suitable alternative chapter.\n:::\n\n\n\n#### Table functions\n\n```{eval-rst}\n.. autosummary::\n\n  parse_nodes\n  parse_edges\n  parse_sites\n  parse_mutations\n  parse_individuals\n  parse_populations\n  parse_migrations\n  pack_strings\n  unpack_strings\n  pack_bytes\n  unpack_bytes\n```\n\n\n(sec_python_api_metadata)=\n\n## Metadata API\n\nThe `metadata` module provides validation, encoding and decoding of metadata\nusing a schema. See {ref}`sec_metadata`, {ref}`sec_metadata_api_overview` and\n{ref}`sec_tutorial_metadata`.\n\n```{eval-rst}\n.. autosummary::\n  MetadataSchema\n  register_metadata_codec\n```\n\n:::{seealso}\nRefer to the top level metadata-related properties of TreeSequences and TableCollections,\nsuch as {attr}`TreeSequence.metadata` and {attr}`TreeSequence.metadata_schema`. Also the\nmetadata fields of\n{ref}`objects accessed<sec_python_api_tree_sequences_obtaining_other_objects>` through\nthe {class}`TreeSequence` API.\n:::\n\n\n(sec_python_api_provenance)=\n\n## Provenance\n\nWe provide some preliminary support for validating JSON documents against the\n{ref}`provenance schema <sec_provenance>`. Programmatic access to provenance\ninformation is planned for future versions.\n\n\n```{eval-rst}\n.. autosummary::\n  validate_provenance\n```\n\n(sec_utility_api)=\n\n## Utility functions\n\nMiscellaneous top-level utility functions.\n\n```{eval-rst}\n.. autosummary::\n  is_unknown_time\n  random_nucleotides\n```\n\n\n(sec_python_api_reference)=\n\n## Reference documentation\n\n(sec_python_api_constants)=\n\n### Constants\n\nThe following constants are used throughout the `tskit` API.\n\n```{eval-rst}\n.. automodule:: tskit\n   :members:\n```\n\n(sec_python_api_exceptions)=\n\n### Exceptions\n\n```{eval-rst}\n.. autoexception:: DuplicatePositionsError\n.. autoexception:: MetadataEncodingError\n.. autoexception:: MetadataSchemaValidationError\n.. autoexception:: MetadataValidationError\n.. autoexception:: ProvenanceValidationError\n```\n\n(sec_python_api_functions)=\n\n### Top-level functions\n\n```{eval-rst}\n.. autofunction:: all_trees\n.. autofunction:: all_tree_shapes\n.. autofunction:: all_tree_labellings\n.. autofunction:: is_unknown_time\n.. autofunction:: load\n.. autofunction:: load_text\n.. autofunction:: pack_bytes\n.. autofunction:: pack_strings\n.. autofunction:: parse_edges\n.. autofunction:: parse_individuals\n.. autofunction:: parse_mutations\n.. autofunction:: parse_nodes\n.. autofunction:: parse_populations\n.. autofunction:: parse_migrations\n.. autofunction:: parse_sites\n.. autofunction:: random_nucleotides\n.. autofunction:: register_metadata_codec\n.. autofunction:: validate_provenance\n.. autofunction:: unpack_bytes\n.. autofunction:: unpack_strings\n\n```\n\n### Tree and tree sequence classes\n\n#### The {class}`Tree` class\n\nAlso see the {ref}`sec_python_api_trees` summary.\n\n```{eval-rst}\n.. autoclass:: Tree()\n    :members:\n    :special-members: __str__\n    :private-members: _repr_html_\n```\n\n#### The {class}`TreeSequence` class\n\nAlso see the {ref}`sec_python_api_tree_sequences` summary.\n\n```{eval-rst}\n.. autoclass:: TreeSequence()\n    :members:\n    :special-members: __str__\n    :private-members: _repr_html_\n```\n\n### Simple container classes\n\n#### The {class}`Individual` class\n\n```{eval-rst}\n.. autoclass:: Individual()\n    :members:\n```\n\n#### The {class}`Node` class\n\n```{eval-rst}\n.. autoclass:: Node()\n    :members:\n```\n\n#### The {class}`Edge` class\n\n```{eval-rst}\n.. autoclass:: Edge()\n    :members:\n```\n\n#### The {class}`Site` class\n\n```{eval-rst}\n.. autoclass:: Site()\n    :members:\n```\n\n#### The {class}`Mutation` class\n\n```{eval-rst}\n.. autoclass:: Mutation()\n    :members:\n```\n\n#### The {class}`Variant` class\n\n```{eval-rst}\n.. autoclass:: Variant()\n    :members:\n```\n\n#### The {class}`Migration` class\n\n```{eval-rst}\n.. autoclass:: Migration()\n    :members:\n```\n\n#### The {class}`Population` class\n\n```{eval-rst}\n.. autoclass:: Population()\n    :members:\n```\n\n#### The {class}`Provenance` class\n\n```{eval-rst}\n.. autoclass:: Provenance()\n    :members:\n```\n\n#### The {class}`Interval` class\n\n```{eval-rst}\n.. autoclass:: Interval()\n    :members:\n```\n\n#### The {class}`Rank` class\n\n```{eval-rst}\n.. autoclass:: Rank()\n    :members:\n```\n\n### TableCollection and Table classes\n\n#### The {class}`TableCollection` class\n\nAlso see the {ref}`sec_tables_api_table_collection` summary.\n\n```{eval-rst}\n.. autoclass:: TableCollection\n    :inherited-members:\n    :members:\n```\n\n% Overriding the default signatures for the tables here as they will be\n% confusing to most users.\n\n\n#### {class}`IndividualTable` classes\n\n```{eval-rst}\n.. autoclass:: IndividualTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from an {class}`IndividualTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Individual` class.\n\n```{eval-rst}\n.. autoclass:: IndividualTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`NodeTable` classes\n\n```{eval-rst}\n.. autoclass:: NodeTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from a {class}`NodeTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Node` class.\n\n```{eval-rst}\n.. autoclass:: NodeTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`EdgeTable` classes\n\n```{eval-rst}\n.. autoclass:: EdgeTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from an {class}`EdgeTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Edge` class.\n\n```{eval-rst}\n.. autoclass:: EdgeTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`MigrationTable` classes\n\n```{eval-rst}\n.. autoclass:: MigrationTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from a {class}`MigrationTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Migration` class.\n\n```{eval-rst}\n.. autoclass:: MigrationTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`SiteTable` classes\n\n```{eval-rst}\n.. autoclass:: SiteTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from a {class}`SiteTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Site` class.\n\n```{eval-rst}\n.. autoclass:: SiteTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`MutationTable` classes\n\n```{eval-rst}\n.. autoclass:: MutationTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from a {class}`MutationTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Mutation` class.\n\n```{eval-rst}\n.. autoclass:: MutationTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`PopulationTable` classes\n\n```{eval-rst}\n.. autoclass:: PopulationTable()\n    :members:\n    :inherited-members:\n    :special-members: __getitem__\n```\n\n##### Associated row class\n\nA row returned from a {class}`PopulationTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Population` class.\n\n```{eval-rst}\n.. autoclass:: PopulationTableRow()\n    :members:\n    :inherited-members:\n```\n\n\n#### {class}`ProvenanceTable` classes\n\nAlso see the {ref}`sec_provenance` and\n{ref}`provenance API methods<sec_python_api_provenance>`.\n\n```{eval-rst}\n.. autoclass:: ProvenanceTable()\n    :members:\n    :inherited-members:\n```\n\n##### Associated row class\n\nA row returned from a {class}`ProvenanceTable` is an instance of the following\nbasic class, where each attribute matches an identically named attribute in the\n{class}`Provenance` class.\n\n```{eval-rst}\n.. autoclass:: ProvenanceTableRow()\n    :members:\n    :inherited-members:\n```\n\n(sec_python_api_reference_identity)=\n\n### Identity classes\n\nThe classes documented in this section are associated with summarising\nidentity relationships between pairs of samples. See the {ref}`sec_identity`\nsection for more details and examples.\n\n#### The {class}`IdentitySegments` class\n\n```{eval-rst}\n.. autoclass:: IdentitySegments()\n    :members:\n```\n\n#### The {class}`IdentitySegmentList` class\n\n```{eval-rst}\n.. autoclass:: IdentitySegmentList()\n    :members:\n```\n\n#### The {class}`IdentitySegment` class\n\n```{eval-rst}\n.. autoclass:: IdentitySegment()\n    :members:\n```\n\n### Miscellaneous classes\n\n#### The {class}`ReferenceSequence` class\n\n```{eval-rst}\n.. todo:: Add a top-level summary section that we can link to from here.\n```\n\n```{eval-rst}\n.. autoclass:: ReferenceSequence()\n    :members:\n    :inherited-members:\n```\n\n#### The {class}`MetadataSchema` class\n\nAlso see the {ref}`sec_python_api_metadata` summary.\n\n```{eval-rst}\n.. autoclass:: MetadataSchema\n    :members:\n    :inherited-members:\n```\n\n#### The {class}`TableMetadataSchemas` class\n\n```{eval-rst}\n.. autoclass:: TableMetadataSchemas\n    :members:\n```\n\n#### The {class}`TopologyCounter` class\n\n```{eval-rst}\n.. autoclass:: TopologyCounter\n```\n\n#### The {class}`LdCalculator` class\n\n```{eval-rst}\n.. autoclass:: LdCalculator\n    :members:\n```\n\n#### The {class}`TableCollectionIndexes` class\n\n```{eval-rst}\n.. autoclass:: TableCollectionIndexes\n    :members:\n```\n\n#### The {class}`SVGString` class\n\n```{eval-rst}\n.. autoclass:: SVGString\n    :members:\n    :private-members: _repr_svg_\n```\n\n#### The {class}`PCAResult` class\n```{eval-rst}\n.. autoclass:: PCAResult\n\t:members:\n```\n"
  },
  {
    "path": "docs/quickstart.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n:::{currentmodule} tskit\n:::\n\n```{code-cell} ipython3\n:tags: [remove-cell]\nimport msprime\n\ndef basic_sim():\n    ts = msprime.sim_ancestry(\n        3,\n        population_size=1000,\n        model=\"dtwf\",\n        sequence_length=1e4,\n        recombination_rate=1e-7,\n        random_seed=665)\n    ts = msprime.sim_mutations(ts, rate=2e-7, random_seed=123)\n    ts.dump(\"data/basic_tree_seq.trees\")\n    \ndef create_notebook_data():\n    basic_sim()\n\n# create_notebook_data()  # uncomment to recreate the tree seqs used in this notebook\n```\n\n# Quickstart\n\nOur {ref}`tutorials site<tutorials:sec_intro>` has a more extensive tutorial on\n{ref}`sec_tskit_getting_started`. Below we just give a quick flavour of the\n{ref}`sec_python_api` (note that\nAPIs in {ref}`C <sec_c_api>` and Rust exist, and it is also possible to\n{ref}`interface to the Python library in R <tutorials:sec_tskit_r>`).\n\n## Basic properties\n\nAny tree sequence, such as one generated by {ref}`msprime <msprime:sec_intro>`, can be\nloaded, and a summary table printed. This example uses a small tree sequence, but the\n`tskit` library scales effectively to ones encoding millions of genomes and variable\nsites. \n\n```{code-cell}\nimport tskit\n\nts = tskit.load(\"data/basic_tree_seq.trees\")  # Or generate using e.g. msprime.sim_ancestry()\nts  # In a Jupyter notebook this displays a summary table. Otherwise use print(ts)\n```\n\n## Individual trees\n\nYou can get e.g. the first tree in the tree sequence and analyse it.\n\n```{code-cell}\nfirst_tree = ts.first()\nprint(\"Total branch length in first tree is\", first_tree.total_branch_length, ts.time_units)\nprint(\"The first of\", ts.num_trees, \"trees is plotted below\")\nfirst_tree.draw_svg(y_axis=True)  # plot the tree: only useful for small trees\n```\n\n## Extracting genetic data\n\nA tree sequence provides an extremely compact way to\n{ref}`store genetic variation data <tutorials:sec_what_is_dna_data>`. The trees allow\nthis data to be {meth}`decoded <Variant.decode>` at each site:\n\n```{code-cell}\nfor variant in ts.variants():\n    print(\n        \"Variable site\", variant.site.id,\n        \"at genome position\", variant.site.position,\n        \":\", [variant.alleles[g] for g in variant.genotypes],\n    )\n```\n\n## Analysis\n\nTree sequences enable {ref}`efficient analysis <tutorials:sec_what_is_analysis>`\nof genetic variation using a comprehensive range of built-in {ref}`sec_stats`:\n\n```{code-cell}\ngenetic_diversity = ts.diversity()\nprint(\"Av. genetic diversity across the genome is\", genetic_diversity)\n\nbranch_diversity = ts.diversity(mode=\"branch\")\nprint(\"Av. genealogical dist. between pairs of tips is\", branch_diversity,  ts.time_units)\n```\n\n## Plotting the whole tree sequence\n\nThis can give you a visual feel for small genealogies:\n\n```{code-cell}\nts.draw_svg(\n    size=(800, 300),\n    y_axis=True,\n    mutation_labels={m.id: m.derived_state for m in ts.mutations()},\n)\n```\n\n## Underlying data structures\n\nThe data that defines a tree sequence is stored in a set of tables. These tables\ncan be viewed, and copies of the tables can be edited to create a new tree sequence.\n\n```{code-cell}\n# The sites table is one of several tables that underlie a tree sequence\nts.tables.sites\n```\n\nThe rest of this documentation gives a comprehensive description of the entire `tskit`\nlibrary, including {ref}`descriptions and definitions <sec_table_definitions>` of all\nthe tables.\n\n"
  },
  {
    "path": "docs/stats.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n\n(sec_stats)=\n\n# Statistics\n\nThe `tskit` library provides a large number of functions for calculating population\ngenetic statistics from tree sequences. Statistics can reflect either the distribution\nof genetic variation or the underlying trees that generate it; the\n[duality](https://doi.org/10.1534/genetics.120.303253) between\nmutations and branch lengths on trees means that statistics based on genetic variation\noften have an corresponding version based on branch lengths in trees in a tree sequence.\n\nNote that `tskit` provides a unified interface for computing so-called\n\"single site statistics\" that are summaries across sites in windows of the genome, as\nwell as a standard method for specifying \"multi-way\" statistics that are calculated\nover many combinations of sets of samples simultaneously.\n\nPlease see the {ref}`tutorial <tutorials:sec_tutorial_stats>` for examples of the\nstatistics API in use.\n\n:::{warning}\nSite statistics defined here currently treat :ref:`missing data<sec_data_model_missing_data>`\nin the same way as in earlier versions of tskit: when computing site-based\nstatistics, isolated samples without mutations directly above them are treated\nas carrying the ancestral allele rather than as missing. Future versions of\ntskit may expose options to treat missing data differently in statistics; for\nnow, if you need explicit control over how missing data is handled you should\nuse the low-level genotype/variant APIs (for example with\n``isolated_as_missing=True``) together with your own summary logic.\n:::\n\n(sec_stats_available)=\n\n## Available statistics\n\nHere are the statistics that can be computed using `tskit`,\ngrouped by basic classification and type. Single-site statistics are ones that are\naverages across sites in windows of the genome, returning numpy arrays whose\ndimensions are determined by the parameters (see {ref}`sec_stats_output_dimensions`).\n\n{ref}`sec_stats_sample_sets_one_way` are defined over a single sample set, \nwhereas {ref}`sec_stats_sample_sets_multi_way` compare 2 or more sets of samples.\n\nSome of the methods below benefit from a little extra discussion, provided in the\n{ref}`sec_stats_notes` section at the end of this chapter: if so, a link to the note\nappears beside the listed method.\n\n* Single site\n    * One-way\n        * {meth}`~TreeSequence.allele_frequency_spectrum` (see {ref}`notes<sec_stats_notes_afs>`)\n        * {meth}`~TreeSequence.diversity`\n        * {meth}`~TreeSequence.segregating_sites`\n        * {meth}`~TreeSequence.trait_covariance`\n          {meth}`~TreeSequence.trait_correlation`\n          {meth}`~TreeSequence.trait_linear_model`\n          (see {ref}`sec_stats_notes_trait`)\n        * {meth}`~TreeSequence.Tajimas_D` (see {ref}`notes<sec_stats_notes_derived>`)\n    * Multi-way\n        * {meth}`~TreeSequence.divergence`\n        * {meth}`~TreeSequence.genetic_relatedness`\n          {meth}`~TreeSequence.genetic_relatedness_weighted`\n          {meth}`~TreeSequence.genetic_relatedness_vector`\n          {meth}`~TreeSequence.genetic_relatedness_matrix`\n        * {meth}`~TreeSequence.f4`\n          {meth}`~TreeSequence.f3`\n          {meth}`~TreeSequence.f2`\n          (see {ref}`sec_stats_notes_f`)\n        * {meth}`~TreeSequence.Y3`\n          {meth}`~TreeSequence.Y2`\n          (see {ref}`sec_stats_notes_y`)\n        * {meth}`~TreeSequence.genealogical_nearest_neighbours` (see {ref}`sec_stats_notes_gnn`)\n        * {meth}`~TreeSequence.Fst` (see {ref}`sec_stats_notes_derived`)\n* Multi site\n    * {meth}`~LdCalculator` (note this is soon to be deprecated)\n\n:::{note}\nThere is a general framework provided for calculating additional single site\nstatistics (see the {ref}`sec_stats_general_api` section). However, the\npre-implemented statistics in the table above will be faster than rederiving\nthem using the general framework directly, so the versions above should be preferred.\n:::\n\n\n(sec_stats_examples)=\n\n### Quick examples\n\n```{code-cell} ipython3\n:\"tags\": [\"hide-input\"]\nfrom IPython.display import Markdown\nimport msprime\nimport numpy as np\n\ndemography = msprime.Demography()\ndemography.add_population(name=\"A\", initial_size=10_000)\ndemography.add_population(name=\"B\", initial_size=10_000)\ndemography.set_symmetric_migration_rate([\"A\", \"B\"], 0.001)\nts = msprime.sim_ancestry(\n    samples={\"A\": 2, \"B\": 2},\n    sequence_length=1000,\n    demography=demography,\n    recombination_rate=2e-8,\n    random_seed=12)\nts = msprime.sim_mutations(ts, rate=2e-8, random_seed=12)\nMarkdown(\n    f\"These examples use a tree sequence of {ts.num_samples} samples \"\n    f\"in {ts.num_populations} populations, \"\n    f\"with a sequence length of {int(ts.sequence_length)}. \"\n    f\"There are {ts.num_trees} trees and \"\n    f\"{ts.num_sites} variable sites in the tree sequence.\"\n)\n```\n\n#### Basic calling convention\n\n```{code-cell} ipython3\npi = ts.diversity()\nprint(pi) # Genetic diversity within the sample set\n```\n\n#### Restrict to {ref}`sample sets<sec_stats_sample_sets>`\n\n```{code-cell} ipython3\npi_0 = ts.diversity(sample_sets=ts.samples(population=0))\nprint(pi_0)  # Genetic diversity within population 0\n```\n\n#### Summarise in genomic {ref}`windows<sec_stats_windows>`\n\n```{code-cell} ipython3\npi_window = ts.diversity(sample_sets=ts.samples(population=1), windows=[0, 400,  600, 1000])\nprint(pi_window)  # Genetic diversity within population 1 in three windows along the genome\n```\n\n#### Compare {ref}`between<sec_stats_sample_sets_multi_way>` sample sets\n\n```{code-cell} ipython3\ndxy = ts.divergence(sample_sets=[ts.samples(population=0), ts.samples(population=1)])\nprint(dxy)  # Av number of differences per bp between samples in population 0 and 1\n```\n\n#### Change the {ref}`mode<sec_stats_mode>`\n\n```{code-cell} ipython3\nbl = ts.divergence(\n    mode=\"branch\",  # Use branch lengths rather than genetic differences\n    sample_sets=[ts.samples(population=0), ts.samples(population=1)],\n)\nprint(bl)  # Av branch length separating samples in population 0 and 1\n```\n\n(sec_stats_single_site)=\n\n## Single site statistics\n\n\n(sec_stats_interface)=\n\n### Interface\n\nTskit offers a powerful and flexible interface for computing population genetic\nstatistics. Consequently, the interface is a little complicated and there are a\nlot of options. However, we provide sensible defaults for these options and\n`tskit` should do what you want in most cases. There are several major options\nshared by many statistics, which we describe in detail in the following subsections:\n\n{ref}`sec_stats_mode`\n: What are we summarising information about?\n\n{ref}`sec_stats_windows`\n: What section(s) of the genome are we interested in?\n\n{ref}`sec_stats_span_normalise`\n: Should the statistic calculated for each window be normalised by the span\n  (i.e. the sequence length) of that window?\n\nThe statistics functions are highly efficient and are based where possible\non numpy arrays. Each of these statistics will return the results as a numpy\narray, and the format of this array will depend on the statistic being\ncomputed (see the {ref}`sec_stats_output_format` section for details).\nA convenient feature of the statistics API is that the dimensions of the\noutput array is defined in a simple and intuitive manner by the\nparameters provided. The {ref}`sec_stats_output_dimensions` section\ndefines the rules that are used.\n\nPlease see the {ref}`tutorial <sec_tutorial_stats>` for examples of the\nstatistics API in use.\n\n\n(sec_stats_mode)=\n\n#### Mode\n\nThere are three **modes** of statistic: `site`, `branch`, and `node`,\nthat each summarize aspects of the tree sequence in different but related ways.\nRoughly speaking, these answer the following sorts of question:\n\nsite\n: How many mutations differentiate these two genomes?\n\nbranch\n: How long since these genomes' common ancestor?\n\nnode\n: On how much of the genome is each node an ancestor of only one of these genomes, but not both?\n\nThese three examples can all be answered in the same way with the tree sequence:\nfirst, draw all the paths from one genome to the other through the tree sequence\n(back up to their common ancestor and back down in each marginal tree).\nThen,\n(`site`) count the number of mutations falling on the paths,\n(`branch`) measure the length of the paths, or\n(`node`) count how often the path goes through each node.\nThere is more discussion of this correspondence in the paper describing these statistics,\nand precise definitions are given in each statistic.\n\nHere's an example of using the {meth}`~TreeSequence.diversity` statistic to return the\naverage branch length between all pairs of samples:\n\n```{code-cell} ipython3\nts.diversity(mode=\"branch\")\n```\n\nOne important thing to know is that `node` statistics have somewhat different output.\nWhile `site` and `branch` statistics naturally return one number\nfor each portion of the genome (and thus incorporates information about many nodes: see below),\nthe `node` statistics return one number **for each node** in the tree sequence (and for each window).\nThere can be a lot of nodes in the tree sequence, so beware.\n\nAlso remember that in a tree sequence the \"sites\" are usually just the **variant** sites,\ne.g., the sites of the SNPs. Although the tree sequence may in principle have monomorphic\nsites, those produced by simulation usually don't.\n\n\n(sec_stats_sample_sets)=\n\n#### Sample sets and indexes\n\nMany standard population genetics statistics\nare defined with respect to some number of groups of genomes,\nusually called \"populations\".\n(However, it's clear from the correspondence to descriptors of tree shape\nthat the definitions can usefully describe *something*\neven if the groups of samples don't come from \"separate populations\" in some sense.)\nBasically, statistics defined in terms of sample sets can use the frequency of any allele\nin each of the sample sets when computing the statistic.\nFor instance, nucleotide divergence is defined for a *pair* of groups of samples,\nso if you wanted to compute pairwise divergences between some groups of samples,\nyou'd specify these as your `sample_sets`.\nThen, if `p[i]` is the derived allele frequency in sample set `i`,\nunder the hood we (essentially) compute the divergence between sample sets `i` and `j`\nby averaging `p[i] * (1 - p[j]) + (1 - p[i]) * p[j]` across the genome.\n\nConcretely, `sample_sets` specifies the IDs of the nodes to compute statistics of.\nImportantly, these nodes must be {ref}`samples <sec_data_model_definitions_sample>`.\n\nHere's an example of calculating the average\n{meth}`genetic diversity<TreeSequence.diversity>` within a specific population:\n\n```{code-cell} ipython3\nts.diversity(sample_sets=ts.samples(population=0))\n```\n\n\nSo, what if you\nhave samples from each of 10 populations,\nand want to compute **all** fourty-five pairwise divergences?\nYou could call `divergence` fourty-five times, but this would be tedious\nand also inefficient, because the allele frequencies for one population\ngets used in computing many of those values.\nSo, statistics that take a `sample_sets` argument also take an `indexes` argument,\nwhich for a statistic that operates on `k` sample sets will be a list of `k`-tuples.\nIf `indexes` is a length `n` list of `k`-tuples,\nthen the output will have `n` columns,\nand if `indexes[j]` is a tuple `(i0, ..., ik)`,\nthen the `j`-th column will contain values of the statistic computed on\n`(sample_sets[i0], sample_sets[i1], ..., sample_sets[ik])`.\n\n\nHow multiple statistics are handled differs slightly between statistics\nthat operate on single sample sets and multiple sample sets.\n\n\n(sec_stats_sample_sets_one_way)=\n\n##### One-way methods\n\nOne-way statistics such as {meth}`TreeSequence.diversity` are defined over a single\nsample set. For these methods, `sample_sets` is interpreted in the following way:\n\n- If it is a single list of node IDs (e.g., `sample_sets=[0, 1 ,2]`), this is\n  interpreted as running the calculation over one sample set and we remove\n  the last dimension in the result array as described in the\n  {ref}`sec_stats_output_dimensions` section.\n\n- If it is `None` (the default), this is equivalent to `sample_sets=ts.samples()`,\n  and we therefore compute the statistic over all samples in the tree sequence. **Note\n  that we also drop the outer dimension of the result array in this case**.\n\n- If it is a list of lists of samples we return an array for each window in the output,\n  which contains the value of the statistic separately for each of `sample_sets`\n  in the order they are given.\n\n\n(sec_stats_sample_sets_multi_way)=\n\n##### Multi-way methods\n\nMulti-way statistics such as {meth}`TreeSequence.divergence` are defined over a\n`k` sample sets. In this case, `sample_sets` must be a list of lists of sample IDs,\nand there is no default. For example, this finds the average\n{meth}`genetic divergence<TreeSequence.divergence>` between samples in populations\n0 and 1\n\n```{code-cell} ipython3\nts.divergence(\n    sample_sets=[\n        ts.samples(population=0),\n        ts.samples(population=1),\n    ]\n)\n```\n\n\nThe `indexes` parameter is interpreted in the following way:\n\n- If it is a single `k`-tuple, this is interpreted as computing a single\n  statistic selecting the specified sample sets and we remove the last dimension\n  in the result array as described in the {ref}`sec_stats_output_dimensions` section.\n\n- If it is `None` and `sample_sets` contains exactly `k` sample sets,\n  this is equivalent to `indexes=range(k)`. **Note\n  that we also drop the outer dimension of the result array in this case**.\n\n- If it is a list of `k`-tuples (each consisting of integers\n  between `0` and `len(sample_sets) - 1`) of length `n` we\n  compute `n` statistics based on these selections of sample sets.\n\n\n(sec_stats_windows)=\n\n#### Windows\n\nEach statistic has an argument, `windows`,\nwhich defines a collection of contiguous windows spanning the genome.\n`windows` should be a list of `n+1` increasing numbers beginning with 0\nand ending with the `sequence_length`.\nThe statistic will be computed separately in each of the `n` windows,\nand the `k`-th row of the output will report the values of the statistic\nin the `k`-th window, i.e., from (and including) `windows[k]` to\n(but not including) `windows[k+1]`. For example, this calculates\n{meth}`Tajima's D<TreeSequence.Tajimas_D>` in four evenly spaced windows along the\ngenome:\n\n```{code-cell} ipython3\nnum_windows = 4\nts.Tajimas_D(windows=np.linspace(0, ts.sequence_length, num_windows + 1))\n```\n\nMost windowed statistics by default return **averages** within each of the windows,\nso the values are comparable between windows, even of different spans.\n(However, shorter windows may be noisier.)\nSuppose for instance  that you compute some statistic with `windows = [0, a, b]`\nfor some valid positions `0 < a < b`,\nand get an output array `S` with two rows.\nThen, computing the same statistic with `windows = [0, b]`\nwould be equivalent to averaging the rows of `S`,\nobtaining `((a - 0) * S[0] + (b - a) * S[1]) / (b - 0)`.\n\nThere are some shortcuts to other useful options:\n\n`windows = None`\n   This is the default and computes statistics in single window over the whole\n   sequence. As the first returned array contains only a single\n   value, we drop this dimension as described in the\n   {ref}`output dimensions <sec_stats_output_dimensions>` section. **Note:** if you\n   really do want to have an array with a single value as the result, please use\n   `windows = [0.0, ts.sequence_length]`.\n\n`windows = \"trees\"`\n   This says that you want statistics computed separately on the portion of the genome\n   spanned by each tree, so is equivalent to passing `windows = ts.breakpoints()`.\n   (Beware: there can be a lot of trees!)\n\n`windows = \"sites\"`\n   This says to output one set of values for **each site**,\n   and is equivalent to passing `windows = [s.position for s in ts.sites()] + [ts.sequence_length]`.\n   This will return one statistic for each site (beware!);\n   since the windows are all different sizes you probably want to also pass\n   `span_normalise=False` (see below).\n\n\n(sec_stats_span_normalise)=\n\n#### Span normalise\n\nIn addition to windowing there is an option, `span_normalise` (which defaults to `True`),\nAll the primary statistics defined here are *sums* across locations in the genome:\nsomething is computed for each position, and these values are added up across all positions in each window.\nWhether the total span of the window is then taken into account is determined by the option `span_normalise`:\nif it is `True` (the default), the sum for each window is converted into an *average*,\nby dividing by the window's *span* (i.e. the length of genome that it covers).\nOtherwise, the sum itself is returned.\nThe default is `span_normalise=True`,\nbecause this makes the values comparable across windows of different sizes.\nTo make this more concrete: {meth}`pairwise sequence divergence <.TreeSequence.divergence>`\nbetween two samples with `mode=\"site\"` is the density of sites that differ between the samples;\nthis is computed for each window by counting up the number of sites\nat which the two differ, and dividing by the total span of the window.\nIf we wanted the number of sites at which the two differed in each window,\nwe'd calculate divergence with `span_normalise=False`.\n\nFollowing on from above, suppose we computed the statistic `S` with\n`windows = [0, a, b]` and `span_normalise=True`,\nand then computed `T` in just the same way except with `span_normalise=False`.\nThen `S[0]` would be equal to `T[0] / a` and `S[1] = T[1] / (b - a)`.\nFurthermore, the value obtained with `windows = [0, b]` would be equal to `T[0] + T[1]`.\nHowever, you probably usually want the (default) normalized version:\ndon't get unnormalised values unless you're sure that's what you want.\nThe exception is when computing a site statistic with `windows = \"sites\"`:\nthis case, computes a statistic with the pattern of genotypes at each site,\nand normalising would divide these statistics by the distance to the previous variant site\n(probably not what you want to do).\n\n:::{note}\nThe resulting values are scaled \"per unit of sequence length\" - for instance, pairwise\nsequence divergence is measured in \"differences per unit of sequence length\". Functions\nsuch as {func}`msprime:msprime.sim_mutations` will by default add mutations in discrete\ncoordinates, usually interpreted as base pairs, in which\ncase span normalised statistics are in units of \"per base pair\".\n:::\n\n(sec_stats_output_format)=\n\n#### Output format\n\nEach of the statistics methods returns a `numpy` ndarray.\nSuppose that the output is named `out`.\nIf `windows` has been specified, the number of rows of the output is equal to the\nnumber of windows, so that `out.shape[0]` is equal to `len(windows) - 1`\nand `out[i]` is an array of statistics describing the portion of the tree sequence\nfrom `windows[i]` to `windows[i + 1]` (including the left but not the right endpoint).\nWhat is returned within each window depends on the {ref}`mode <sec_stats_mode>`:\n\n`mode=\"site\"` or `mode=\"branch\"`\n   The output is a two-dimensional array,\n   with columns corresponding to the different statistics computed: `out[i, j]` is the `j`-th statistic\n   in the `i`-th window.\n\n`mode=\"node\"`\n   The output is a three-dimensional array,\n   with the second dimension corresponding to node id.\n   In other words, `out.shape[1]` is equal to `ts.num_nodes`,\n   and `out[i,j]` is an array of statistics computed for node `j` on the `i`-th window.\n\nThe final dimension of the arrays in other cases is specified by the method.\n\nNote, however, that empty dimensions can optionally be dropped,\nas described in the {ref}`sec_stats_output_dimensions` section.\n\nA note about **default values** and **division by zero**:\nUnder the hood, statistics computation fills in zeros everywhere, then updates these\n(since statistics are all **additive**, this makes sense).\nBut now suppose that you've got a statistic that returns `nan`\n(\"not a number\") sometimes, like if you're taking the diversity of a sample set with only `n=1` sample,\nwhich involves dividing by `n * (n - 1)`.\nUsually, you'll just get `nan` everywhere that the division by zero happens.\nBut there's a couple of caveats.\nFor `site` statistics, any windows without any sites in them never get touched,\nso they will have a value of 0.\nFor `branch` statistics, any windows with **no branches** will similarly remain 0.\nThat said, you should **not** rely on the specific behavior of whether `0` or `nan` is returned\nfor \"empty\" cases like these: it is subject to change.\n\n\n(sec_stats_output_dimensions)=\n\n#### Output dimensions\n\nIn the general case, tskit outputs two dimensional (or three dimensional, in the case of node\nstats) numpy arrays, as described in the {ref}`sec_stats_output_format` section.\nThe first dimension corresponds to the window along the genome\nsuch that for some result array `x`, `x[j]` contains information about the jth window.\nThe last dimension corresponds to the statistics being computed, so that `x[j, k]` is the\nvalue of the kth statistic in the jth window (in the two dimensional case). This is\na powerful and general interface, but in many cases we will not use this full generality\nand the extra dimensions in the numpy arrays are inconvenient.\n\nTskit optionally removes empty dimensions from the output arrays following a few\nsimple rules.\n\n1. If `windows` is None we are computing over the single window covering the\n   full sequence. We therefore drop the first dimension of the array.\n\n2. In one-way stats, if the `sample_sets` argument is a 1D array we interpret\n   this as specifying a single sample set (and therefore a single statistic), and\n   drop the last dimension of the output array. If `sample_sets` is None\n   (the default), we use the sample set `ts.samples()`, invoking\n   this rule (we therefore drop the last dimension by default).\n\n3. In k-way stats, if the `indexes` argument is a 1D array of length k\n   we intepret this as specifying a single statistic and drop the last\n   dimension of the array. If `indexes` is None (the default) and\n   there are k sample sets, we compute the statistic from these sample sets\n   and drop the last dimension.\n\n4. If, after dropping these dimensions, the dimension is 0, we return a numpy\n   scalar (instead of an array of dimension 0).\n\nRules 2 and 3 can be summarised by \"the dimensions of the input determines\nthe dimensions of the output\". Note that dropping these dimensions is\n**optional**: it is always possible to keep the full dimensions of the\noutput arrays.\n\nPlease see the {ref}`tutorial <sec_tutorial_stats>` for examples of the\nvarious output dimension options.\n\n\n(sec_stats_general_api)=\n\n### General API\n\nThe methods {meth}`TreeSequence.general_stat` and {meth}`TreeSequence.sample_count_stat`\nprovide access to the general-purpose algorithm for computing statistics.\nHere is a bit more discussion of how to use these.\n\n\n(sec_stats_polarisation)=\n\n#### Polarisation\n\nMany statistics calculated from genome sequence treat all alleles on equal footing,\nas one must without knowledge of the ancestral state and sequence of mutations that produced the data.\nSeparating out the *ancestral* allele (e.g., as inferred using an outgroup)\nis known as *polarisation*.\nFor instance, in the allele frequency spectrum, a site with alleles at 20% and 80% frequency\nis no different than another whose alleles are at 80% and 20%,\nunless we know in each case which allele is ancestral,\nand so while the unpolarised allele frequency spectrum gives the distribution of frequencies of *all* alleles,\nthe *polarised* allele frequency spectrum gives the distribution of frequencies of only *derived* alleles.\n\nThis concept is extended to more general statistics as follows.\nFor site statistics, summary functions are applied to the total weight or number of samples\nassociated with each allele; but if polarised, then the ancestral allele is left out of this sum.\nFor branch or node statistics, summary functions are applied to the total weight or number of samples\nbelow, and above each branch or node; if polarised, then only the weight below is used.\n\n(sec_stats_strictness)=\n\n### Strictness, and which branches count?\n\nMost statistics are not affected by invariant sites,\nand hence do not depend on any part of the tree that is not ancestral to any of the sample sets.\nHowever, some statistics are different: for instance, \ngiven a pair of samples, {meth}`TreeSequence.genetic_relatedness`\nwith `centre=False` and `polarised=False`\nadds up the total number of alleles (or total area of branches) that is\neither ancestral to both samples *or ancestral to neither*.\nSo, it depends on what else is in the tree sequence.\n(For this reason, we don't recommend actually *using* this combination of options for genetic\nrelatedness; the default for that method is `polarised=True`.)\n\nIn terms of the summary function {math}`f(x)`, \"not affected by invariant sites\" translates to\n{math}`f(0) = f(n) = 0`, where {math}`n` is the vector of sample set sizes.\nBy default, {meth}`TreeSequence.general_stat` checks if the summary function satisfies this condition,\nand throws an error if not; this check can be disabled by setting `strict=False`.\n\n\n(sec_stats_summary_functions)=\n\n#### Summary functions\n\nFor convenience, here are the summary functions used for many of the statistics.\nBelow, {math}`x` denotes the number of samples in a sample set below a node,\n`n` denotes the total size of a sample set, {math}`p = x / n`,\nand boolean expressions (e.g., {math}`(x > 0)`) are interpreted as 0/1.\n\n`diversity`\n: {math}`f(x) = \\frac{x (n - x)}{n (n-1)}`\n\n  For an unpolarized statistic with biallelic loci, this calculates\n  {math}`2 p (1-p)`.\n\n`segregating_sites`\n: {math}`f(x) =  (x > 0) (1 - x / n)`\n\n  (Note: this works because if {math}`\\sum_i p_1 = 1` then {math}`\\sum_{i=1}^k (1-p_i) = k-1`.)\n\n`Y1`\n: {math}`f(x) = \\frac{x (n - x) (n - x - 1)}{n (n-1) (n-2)}`\n\n`divergence`\n: {math}`f(x_1, x_2) = \\frac{x_1 (n_2 - x_2)}{n_1 n_2}`,\n\n  unless the two indices are the same, when the diversity function is used.\n\n  For an unpolarised statistic with biallelic loci, this calculates\n  {math}`p_1 (1-p_2) + (1 - p_1) p_2`.\n\n`genetic_relatedness, centre=True`\n: {math}`f(x_i, x_j) = (x_i / n_i - m)(x_j / n_j - m)`,\n\n  where {math}`m = \\frac{1}{n}\\sum_{k=1}^n x_k` with {math}`n` the total number\n  of sample sets.\n  For a polarised statistic (the default) with biallelic loci, this calculates\n  {math}`(p_1 - \\bar{p}) (p_2 - \\bar{p})`, where {math}`\\bar{p}` is the average\n  derived allele frequency across sample sets.\n\n`genetic_relatedness, centre=False`\n: {math}`f(x_i, x_j) = (x_i / n_i) (x_j / n_j)`.\n\n  For an polarised statistic (the default) with biallelic loci, this calculates\n  {math}`p_1 p_2`.\n\n`genetic_relatedness_weighted, centre=True`\n: {math}`f(w_i, w_j, x_i, x_j) = (x_i - w_i p) (x_j - w_j p)`,\n\n  where {math}`p` is the proportion of all samples below the focal node,\n  and {math}`w_j = \\sum_{k=1}^n W_{kj}` is the sum of the weights in the {math}`j`th column of the weight matrix.\n\n`genetic_relatedness_weighted, centre=False`\n: {math}`f(w_i, w_j, x_i, x_j) = x_i x_j`.\n\n\n`Y2`\n: {math}`f(x_1, x_2) = \\frac{x_1 (n_2 - x_2) (n_2 - x_2 - 1)}{n_1 n_2 (n_2 - 1)}`\n\n`f2`\n: {math}`f(x_1, x_2) = \\frac{x_1 (x_1 - 1) (n_2 - x_2) (n_2 - x_2 - 1)}{n_1 (n_1 - 1) n_2 (n_2 - 1)} - \\frac{x_1 (n_1 - x_1) (n_2 - x_2) x_2}{n_1 (n_1 - 1) n_2 (n_2 - 1)}`\n\n  For an unpolarized statistic with biallelic loci, this calculates\n  {math}`((p_1 - p_2)^2 - (p_1 (1-p_2)^2 + (1-p_1) p_2^2)/n_1 - (p_1^2 (1-p_2) + (1-p_1)^2 p_2)/n_2`\n  {math}`+ (p_1 p_2 + (1-p_1)(1-p_2))/ n_1 n_2)(1 + \\frac{1}{n_1 - 1})(1 + \\frac{1}{n_2 - 1})`,\n  which is the unbiased estimator for {math}`(p_1 - p_2)^2` from a finite sample.\n\n`Y3`\n: {math}`f(x_1, x_2, x_3) = \\frac{x_1 (n_2 - x_2) (n_3 - x_3)}{n_1 n_2 n_3}`\n\n`f3`\n: {math}`f(x_1, x_2, x_3) = \\frac{x_1 (x_1 - 1) (n_2 - x_2) (n_3 - x_3)}{n_1 (n_1 - 1) n_2 n_3} - \\frac{x_1 (n_1 - x_1) (n_2 - x_2) x_3}{n_1 (n_1 - 1) n_2 n_3}`\n\n  For an unpolarized statistic with biallelic loci, this calculates\n  {math}`((p_1 - p_2)(p_1 - p_3) - p_1 (1-p_2)(1-p_3)/n_1 - (1-p_1) p_2 p_3/n_1)(1 + \\frac{1}{n_1 - 1})`,\n  which is the unbiased estimator for {math}`(p_1 - p_2)(p_1 - p_3)` from a finite sample.\n\n`f4`\n: {math}`f(x_1, x_2, x_3, x_4) = \\frac{x_1 x_3 (n_2 - x_2) (n_4 - x_4)}{n_1 n_2 n_3 n_4} - \\frac{x_1 x_4 (n_2 - x_2) (n_3 - x_3)}{n_1 n_2 n_3 n_4}`\n\n  For an unpolarized statistic with biallelic loci, this calculates\n  {math}`(p_1 - p_2)(p_3 - p_4)`.\n\n`trait_covariance`\n: {math}`f(w) = \\frac{w^2}{2 (n-1)^2}`,\n\n  where {math}`w` is the sum of all trait values of the samples below the node.\n\n`trait_correlation`\n: {math}`f(w, x) = \\frac{w^2}{2 x (1 - x/n) (n - 1)}`,\n\n  where as before {math}`x` is the total number of samples below the node,\n  and {math}`n` is the total number of samples.\n\n`trait_linear_model`\n: {math}`f(w, z, x) = \\frac{1}{2}\\left( \\frac{w - \\sum_{j=1}^k z_j v_j}{x - \\sum_{j=1}^k z_j^2} \\right)^2`,\n\n  where {math}`w` and {math}`x` are as before,\n  {math}`z_j` is the sum of the j-th normalised covariate values below the node,\n  and {math}`v_j` is the covariance of the trait with the j-th covariate.\n\n\n(sec_stats_multi_site)=\n\n## Multi site statistics\n\n(sec_stats_two_locus)=\n\n### Two-locus statistics\n\nThe {meth}`~TreeSequence.ld_matrix` method provides an interface to\na collection of two-locus statistics with predefined summary functions (see\n{ref}`sec_stats_two_locus_summary_functions`).\nThe LD matrix method differs from other\nstatistics methods in that it provides a unified API with an argument to\nspecify different two-locus summaries of the data. It otherwise behaves\nsimilarly to most other functions with respect to `sample_sets` and `indexes`.\n\nTwo-locus statistics can be computed using two {ref}`modes <sec_stats_mode>`,\neither `site` or `branch`, and these should be interpreted in the same way as\nthese modes in the single-site statistics. That is, the `site` mode computes LD\nover observed alleles at pairs of sites, while the `branch` model computes\nexpected LD conditioned on pairs of trees.\n\n(sec_stats_two_locus_site)=\n\n#### Site mode\n\nThe `\"site\"` mode computes two-locus statistics summarized over alleles between\nall pairs of specified sites. The default behavior, leaving `sites`\nunspecified, will compute a matrix for all pairs of sites, with one row and\ncolumn for each site in the tree sequence (i.e., an {math}`n \\times n` matrix\nwhere {math}`n` is the number of sites in the tree sequence). We can also\nrestrict the output to a subset of sites, either by specifying a single vector\nof site indexes for both rows and columns or a pair of vectors for the row\nsites and column sites separately.\n\nThe following computes a matrix of the {math}`r^2` measure of linkage\ndisequilibrium (LD) computed pairwise between the first 4 sites in the tree\nsequence among all samples. The `sites` must be given as a list of lists, and\nwith a single list of sites specified, we obtain a symmetric square matrix. \n\n```{code-cell} ipython3\nld = ts.ld_matrix(sites=[[0, 1, 2, 3]])\nprint(ld)\n```\n\nIf a list of two lists of site indexes is provided, these specify the row and\ncolumn sites. For instance, here we specify 2 rows and 3 columns, which\ncomputes a subset of the matrix shown above.\n\n```{code-cell} ipython3\nld = ts.ld_matrix(sites=[[1, 2], [1, 2, 3]])\nprint(ld)\n```\n\n#### Computational details\n\nBecause we allow for two-locus statistics to be computed for multi-allelic\ndata, we need to be able to combine statistical results from each pair of\nalleles into one summary for a pair of sites. This does not affect biallelic\ndata (and so this section can be skipped on first reading).\nWe use two implementations for\ncombining results from multiple alleles: `hap_weighted` and `total_weighted`.\nThese are statistic-specific and not chosen by the user, with choices motivated\nby [Zhao (2007)](https://doi.org/10.1017/S0016672307008634).\n\nBriefly, consider a pair of sites with {math}`n` alleles at the first locus and\n{math}`m` alleles at the second. (Whether this includes the ancestral allele\ndepends on whether the statistic is polarised.) Write {math}`f_{ij}` as the\nstatistic computed for focal alleles {math}`A_i` and {math}`B_j`. Then the\nweighting schemes are defined as:\n\n- `hap_weighted`: {math}`\\sum_{i=1}^{n}\\sum_{j=1}^{m}p(A_{i}B_{j})f_{ij}`,\n  where {math}`p(A_{i}B_{j})` is the frequency of haplotype {math}`A_{i}B_{j}`.\n  This method was first introduced in [Karlin\n  (1981)](https://doi.org/10.1111/j.1469-1809.1981.tb00308.x) and reviewed in\n  [Zhao (2007)](https://doi.org/10.1017/S0016672307008634).\n\n- `total_weighted`: {math}`\\frac{1}{n m}\\sum_{i=1}^{n}\\sum_{j=1}^{m}f_{ij}`.\n  This method assigns equal weight to each of the possible pairs of focal\n  alleles at the two sites, taking the arithmetic mean of statistics over\n  focal haplotypes.\n\nOut of all of the available summary functions, only {math}`r^2` uses\n`hap_weighted` normalisation, with the remainder using uniform weighting\n(`total_weighted`).\n\nWithin this framework, statistics may be either polarised or unpolarised. For\nstatistics that are polarised, we compute statistic values for pairs of derived\nalleles. (For this purpose, the \"derived\" alleles at a site are all alleles\nexcept that stored as the ``ancestral_state`` for the site.) Unpolarised\nstatistics compute statistics over all pairs of alleles, derived and ancestral.\nIn either case, the result is averaged over these values, using one of the\nweighting scheme (described below for each statistics). The option for\npolarisation is not exposed to the user, and we list which statistics are\npolarised below.\n\n(sec_stats_two_locus_branch)=\n\n#### Branch mode\n\nThe `\"branch\"` mode computes expected two-locus statistics between pairs of\ntrees, conditioned on the marginal topologies and branch lengths of those\ntrees. The trees for which we compute statistics are specified by positions,\nand for a pair of positions we consider all possible haplotypes that could be\ngenerated by a single mutation occurring on each of the two trees.\n\nFor two trees, one with {math}`n` branches and the other with {math}`m`\nbranches, there are {math}`nm` possible pairs of branches that may carry the\npair of mutations. For each pair, we compute the two-locus statistic, and then\nsum these values weighted by the product of the two branch lengths. Given that\nthe two mutations occur, this accounts for the relative probability that the\ntwo mutations fall on any pair of branches.\n\nIn other words, imagine we place two mutations uniformly, one on each tree, and\nthen compute the statistic. The branch mode computes the expected value of the\nstatistic over this process, multiplied by the product of the total branch\nlengths of each tree. This weighting accounts for mutational opportunity, so that\nthe sum of the branch-mode statistic over all positions in a genomic region,\nmultiplied by a mutation rate,  is equal to the expected sum of the two-locus site\nstatistic over all mutations falling in that region under an infinite-sites model.\n\nThe time complexity of this method is quadratic in the number of samples,\ndue to the pairwise comparisons of branches from each pair of trees.\nBy default, this method computes\na symmetric matrix for all pairs of trees, with rows and columns representing\neach tree in the tree sequence. Similar to the site method, we can restrict the\noutput to a subset of trees, either by specifying a vector of positions or\na pair of vectors for row and column positions separately. To select a specific\ntree, the specified positions must land in the tree span (`[start, end)`).\n\nIn the following, we compute a matrix of expected {math}`r^2` within and\nbetween the first 4 trees in the tree sequence. The tree breakpoints are\na convenient way to specify those first four trees.\n\n```{code-cell} ipython3\nld = ts.ld_matrix(\n    mode=\"branch\",\n    positions=[ts.breakpoints(as_array=True)[0:4]]\n)\nprint(ld)\n```\n\nWe note that these values are quite large: as described above, the statistic is\nscaled by the product of the total branch lengths of each pair of trees. To\ncompute the expected {math}`r^2` value for a pair of mutations that each land\nuniformly on the pair of trees, we can divide by the product of the total\nbranch lengths:\n\n```{code-cell} ipython3\ntotal_branch_lengths = [tree.total_branch_length for tree in ts.trees()]\nprod_branch_lengths = np.outer(total_branch_lengths, total_branch_lengths)\nprint(ld / prod_branch_lengths[0:4, 0:4])\n```\n\nTo compute the average {math}`r^2` for a uniformly chosen pair of mutations, we also\nweight by tree span:\n\n```{code-cell} ipython3\ntree_spans = np.array([t.span for t in ts.trees()])\ntotal_opportunity = np.sum(tree_spans * total_branch_lengths)\nall_ld = ts.ld_matrix(mode=\"branch\")\nmean_ld = np.sum(all_ld * np.outer(tree_spans, tree_spans)) / total_opportunity ** 2\nprint(\"mean infinite-sites LD:\", mean_ld)\n```\n\nAs with the `\"site\"` mode above, we can specify the row and column trees\nseparately.\n\n```{code-cell} ipython3\nbreakpoints = ts.breakpoints(as_array=True)\nld = ts.ld_matrix(\n    mode=\"branch\",\n    positions=[breakpoints[[0]], breakpoints[0:4]]\n)\nprint(ld)\n```\n\n(sec_stats_two_locus_sample_sets)=\n\n#### Sample Sets\n\nWithout specifying `sample_sets` or `indexes`, the `ld_matrix()` method\ncomputes statistics over a single sample set that includes all samples in the\ntree sequence. The API allows for the specification of a subset or multiple\nsubsets of samples, so that a separate LD matrix can be computed for each. If\n`sample_sets` is specified as a single list of samples, then a single LD matrix\nis returned. A list of lists of samples will return a 3D array containing an LD\nmatrix for each list of samples.\n\nSome LD statistics can be computed between sample sets (two-way statistics are\nspecified below), in which case `indexes` must be specified that reference the\nindexes of the `sample_sets`, which must be a list of lists of sample nodes.\nThis results in an LD matrix computed for each list of indexes. The statistics\nare selected in the same way (with the `stat` argument), and these are limited\nto a handful of statistics (see\n{ref}`sec_stats_two_locus_summary_functions_two_way`). The dimension-dropping\nrules for the result follow the rest of the tskit stats API in that a single\nlist or tuple will produce a single two-dimensional matrix, while a list of\nthese will produce a three-dimensional array, with the first dimension of\nlength equal to the length of the list.\n\nFor example, to compute the {math}`r^2` LD matrix over a subset of samples in\nthe tree sequence (such as sample nodes 0 through 7), we would specify the\nsamples as follows:\n\n```{code-cell} ipython3\nts = msprime.sim_ancestry(\n    20,\n    population_size=10000,\n    sequence_length=1000,\n    recombination_rate=2e-8,\n    random_seed=12)\nts = msprime.sim_mutations(ts, rate=2e-8, random_seed=12)\n\nld = ts.ld_matrix(mode=\"site\", sample_sets=range(8))\nprint(ld)\n```\n\nWe would get the following dimensions with the specified\n`sample_sets` and `indexes` arguments.\n\n```\n# one-way\nts.ld_matrix(sample_sets=None) # -> 2 dimensions\nts.ld_matrix(sample_sets=[0, 1, 2, 3]) # -> 2 dimensions\nts.ld_matrix(sample_sets=[[0, 1, 2, 3]]) # -> 3 dimensions\n# two-way\nts.ld_matrix(sample_sets=[[0, 1, 2, 3], [4, 5, 6, 7]], indexes=(0, 1)) # -> 2 dimensions\nts.ld_matrix(sample_sets=[[0, 1, 2, 3], [4, 5, 6, 7]], indexes=[(0, 1)]) # -> 3 dimensions\n```\n\n#### Why are there `nan` values in the LD matrix?\n\nFor some statistics, it is possible to observe `nan` entries in the LD matrix,\nwhich can be surprising and may numerically impact downstream analyses. A `nan`\nentry occurs if the denominator of a ratio statistic (including {math}`r` and\n{math}`r^2`) is zero, indicating that one or both of the alleles in the pair is\nfixed or absent in the given sample set(s). This can happen for\na number of reasons:\n\n- Some mutation models allow for reversible mutations, so a back mutation at\n  a site can result in a single allele despite multiple mutations in the\n  history of the sample.\n- LD is computed for a subsample of individuals, and some sites are not\n  variable among the sample nodes in the subsample.\n- A mutation exists above the root of the local tree, so that all samples carry\n  the mutation, and one or more sites are not variable.\n\nThe `branch` mode will also return `nan` values for ratio statistics if there\nare branches in either tree on which a mutation would not result in\na polymorphism within a sample set.\n\n:::{warning}\nThis means there are two common situations in which many or all LD values will be `nan`.\nThese are:\n\n1. A branch-mode ratio statistic computed on less than the full set of samples\n    will always be `nan`, since part of the trees are ancestral to none of the samples.\n2. A site-mode ratio statistic will be `nan` at any sites at which there are alleles found\n    in the entire set of samples that are not seen in the provided sample set.\n\nThis behavior **may change in the future**,\nbecause possibly more natural behavior not currently implemented\nwould be to ignore the branches/alleles not ancestral\nto any of the provided samples.\n:::\n\n(sec_stats_two_locus_sample_one_way_stats)=\n\n#### One-way Statistics\n\nOne-way statistics are summaries of two loci in a single sample set, using\na triple of haplotype counts {math}`\\{n_{AB}, n_{Ab}, n_{aB}\\}` and the size of\nthe sample set {math}`n`, where the capitalized and lowercase letters in our\nnotation represent alternate alleles.\n\n(sec_stats_two_locus_sample_two_way_stats)=\n\n#### Two-way Statistics\n\nTwo-way statistics are summaries of haplotype counts between two sample sets,\nwhich operate on the three haplotype counts (as in one-way stats, above)\ncomputed from each sample set, indexed by `(i, j)`. These statistics take on\na different meaning from their one-way counterparts. For instance `stat=\"D2\"`\nover a pair of sample sets computes {math}`D_i D_j`, which is the product of\nthe covariance measure of LD within each sample set and is related to the\ncovariance of {math}`D` between sample sets.\n\nOnly a subset of our summary functions are two-way statistics (see\n{ref}`sec_two_locus_summary_functions_two_way`). Note that the unbiased two-way\nstatistics expect non-overlapping sample sets (see [Ragsdale and Gravel\n(2020)](https://doi.org/10.1093/molbev/msz265)), and we do not make any\nassertions about the sample sets and assume that `i` and `j` represent disjoint\nsets of samples (see also the note in {meth}`~TreeSequence.divergence`).\n\n(sec_stats_two_locus_summary_functions)=\n\n#### Summary Functions\n\n(sec_stats_two_locus_summary_functions_one_way)=\n\n##### One-way\n\nThe two-locus summary functions all take haplotype counts and sample set size\nas input. Suppose that at the first site there are alleles\n{math}`(a_1, a_2, ...)`, and at the second site there are alleles\n{math}`(b_1, b_2, ...)`. For a pair of focal alleles {math}`a_i` and\n{math}`b_j`, we define two-locus counts\n{math}`(n(a_i,b_j), n(a_i,\\sim b_j), n(\\sim a_i, b_j))`, where\n{math}`n(a_i,b_j)` is the number of two-locus haplotypes in the sample set that\ncarry both alleles {math}`a_i` and {math}`b_j`,\n{math}`n(a_i,\\sim b_j)` is the number that carry the allele {math}`a_i`\nand do not carry the allele {math}`b_j`, and\n{math}`n(\\sim a_i, b_j)` is the number that carry the allele {math}`b_j`\nand do not carry the allele {math}`a_i`. That is,\n{math}`n(\\sim a_i, b_j) = \\sum_{k\\not=i} n(a_k, b_j)`, and\n{math}`n(a_i, \\sim b_j) = \\sum_{l\\not=j} n(a_i, b_l)`.\n\nWe informally refer to focal alleles as {math}`A,B` and the above sets of\nhaplotypes as {math}`(AB, Ab, aB)`, so that {math}`Ab` refers to the set\nof all haplotypes {math}`(a_i, \\sim b_j)` and {math}`aB` refers to\n{math}`(\\sim a_i, b_j)`.\nTheir counts are labeled similarly: {math}`n_{AB} = n(A,B)`,\n{math}`n_{Ab} = n(A, \\sim B)`, and {math}`n_{aB} = n(\\sim A, B)`.\nThen each of our summary functions has the signature\n{math}`f(n_{AB}, n_{Ab}, n_{aB}, n)`, converting to haplotype frequencies\n{math}`\\{p_{AB}, p_{Ab}, p_{aB}\\}` by dividing by the number {math}`n` of\nsamples in the sample set. Then\n{math}`n_{ab} = n - n_{AB} - n_{Ab} - n_{aB}`, {math}`n_A = n_{AB} + n_{Ab}`\nand {math}`n_B = n_{AB} + n_{aB}`, with frequencies {math}`p` found by dividing\nby {math}`n`.\n\nFor polarised statistics, we compute the statistic using all pairs of\nnon-ancestral alleles as focal alleles: so, we do not compute the summary\nfunction with haplotype counts for which the focal alleles are the ancestral\nallele at either of the two loci.\nFor unpolarised statistics, we compute the summary function over all\npairs of alleles. Thus, for polarised statistics, the summary function is\ncalled {math}`(n_1-1)\\times(n_2-1)` times, where {math}`n_1` and {math}`n_2`\nare the total number of alleles at the first and second locus, respectively.\nFor unpolarised statistics, the summary function is called {math}`n_1 n_2`\ntimes. The result is then averaged over the results computed for\neach pair of focal alleles, using the specified weighting approach for a\ngiven summary function.\n\n`D`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = p_{AB}p_{ab} - p_{Ab}p_{aB} \\, (=p_{AB} - p_A p_B)`\n\n  This statistic is polarised, as the unpolarised result, which averages over\n  allele labelings, is zero. Uses the `total` weighting method.\n\n`D_prime`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = \\frac{D}{D_{\\max}}`,\n\n  where {math}`D_{\\max} = \\begin{cases}\n            \\min\\{p_A (1-p_B), p_B (1-p_B)\\} & \\textrm{if }D>=0 \\\\\n            \\min\\{p_A p_B, (1-p_B) (1-p_B)\\} & \\textrm{if }D<0\n        \\end{cases}`\n\n  and {math}`D` is defined above. Polarised, `total` weighted.\n\n`D2`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = D^2`\n\n  and {math}`D` is defined above. Unpolarised, `total` weighted.\n\n`Dz`\n:  {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = D (1 - 2 p_A) (1 - 2 p_B)`,\n\n  where {math}`D` is defined above. Unpolarised, `total` weighted.\n\n`pi2`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = p_A (1-p_A) p_B (1-p_B)`\n\n  Unpolarised, `total` weighted.\n\n`r`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = \\frac{D}{\\sqrt{p_A (1-p_A) p_B (1-p_B)}}`,\n\n  where {math}`D` is defined above. Polarised, `total` weighted.\n\n`r2`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = \\frac{D^{2}}{p_A (1-p_A) p_B (1-p_B))}`,\n\n  where {math}`D` is defined above. Unpolarised, `haplotype` weighted.\n\nUnbiased two-locus statistics from the Hill-Robertson (1968) system are\ncomputed from haplotype counts. Definitions of these unbiased estimators can\nbe found in [Ragsdale and Gravel\n(2020)](https://doi.org/10.1093/molbev/msz265). They require at least 4 samples\nto be valid and are specified as `stat=\"D2_unbiased\"`, `\"Dz_unbiased\"`, or\n`\"pi2_unbiased\"`.\n\n(sec_two_locus_summary_functions_two_way)=\n\n(sec_stats_two_locus_summary_functions_two_way)=\n\n##### Two-way\n\nTwo-way statistics are indexed by sample sets {math}`i, j` and compute values\nusing haplotype counts within pairs of sample sets.\n\n`D2`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = D_i D_j`,\n\n  where {math}`D_i` denotes {math}`D` computed within sample set {math}`i`,\n  and {math}`D` is defined above. Unpolarised, `total` weighted.\n\n`r2`\n: {math}`f(n_{AB}, n_{Ab}, n_{aB}, n) = r_i r_j`,\n\n  where {math}`r_i` denotes {math}`r` computed within sample set {math}`i`,\n  and {math}`r` is defined above. Unpolarised, `haplotype` weighted.\n\nAnd `D2_unbiased`, which can be found in [Ragsdale and Gravel\n(2020)](https://doi.org/10.1093/molbev/msz265).\n\n\n(sec_stats_notes)=\n\n## Notes\n\n\n(sec_stats_notes_afs)=\n\n### Allele frequency spectrum\n\nMost single site statistics are based on the summaries of the allele frequency spectra\n(AFS). The `tskit` AFS interface includes windowed and joint spectra,\nusing the same general pattern as other statistics,\nbut some of the details about how it is defined,\nespecially in the presence of multiple alleles per site, need to be explained.\nIf all sites are biallelic, then the result is just as you'd expect:\nsee the method documentation at {meth}`~TreeSequence.allele_frequency_spectrum` \nfor the description.\nNote that with `mode=\"site\"`, we really do tabulate *allele* counts:\nif more than one mutation on different parts of the tree produce the same allele,\nit is the total number with this allele (i.e., inheriting *either* mutation)\nthat goes into the AFS.\nThe AFS with `mode=\"branch\"` is the expected value for the Site AFS\nwith infinite-sites, biallelic mutation, so there is nothing surprising there,\neither.\n\nBut, how do we deal with sites at which there are more than two alleles?\nAt each site, we iterate over the distinct alleles at that site,\nand for each allele, count how many samples in each sample set\nhave inherited that allele.\nFor a concrete example, suppose that we are computing the AFS of a single\nsample set with 10 samples, and are considering a site with three alleles:\n*a*, *b*, and *c*,\nwhich have been inherited by 6, 3, and 1 samples, respectively,\nand that allele *a* is ancestral.\nWhat we do at this site depends on if the AFS is polarised or not.\n\nIf we are computing the *polarised* AFS,\nwe add 1 to each entry of the output corresponding to each allele count\n*except* the ancestral allele.\nIn our example, we'd add 1 to both `AFS[3]` and `AFS[1]`.\nThis means that the sum of all entries of a polarised, site AFS\nshould equal the total number of non-ancestral alleles in the tree sequence\nthat are ancestral to at least one of the samples in the tree sequence\nbut not ancestral to all of them.\nThe reason for this last caveat is that like with most statistics,\nmutations that are not ancestral to *any* samples (not just those in the sample sets)\nare not counted (and so don't even enter into `AFS[0]`),\nand similarly for those alleles inherited by *all* samples.\n\nNow, if we are computing the *unpolarised* AFS,\nwe add *one half* to each entry of the *folded* output\ncorresponding to each allele count *including* the ancestral allele.\nWhat does this mean?\nWell, `polarised=False` means that we cannot distinguish between an\nallele count of 6 and an allele count of 4.\nSo, *folding* means that we would add our allele that is seen in 6 samples\nto `AFS[4]` instead of `AFS[6]`.\nSo, in total, we will add 0.5 to each of `AFS[4]`, `AFS[3]`, and `AFS[1]`.\nThis means that the sum of an unpolarised AFS\nwill be equal to the total number of alleles that are inherited\nby any of the samples in the tree sequence, divided by two.\nWhy one-half? Well, notice that if in fact the mutation that produced the *b*\nallele had instead produced an *a* allele,\nso that the site had only two alleles, with frequencies 7 and 3.\nThen, we would have added 0.5 to `AFS[3]` *twice*.\n\n\n(sec_stats_notes_trait)=\n\n### Trait correlations\n\n{meth}`~TreeSequence.trait_covariance`, {meth}`~TreeSequence.trait_correlation`, and\n{meth}`~TreeSequence.trait_linear_model` compute correlations and covariances of traits\n(i.e., an arbitrary vector) with allelic state, possibly in the context of a multivariate\nlinear model with other covariates (as in GWAS).\n\n\n(sec_stats_notes_f)=\n\n### Patterson's f statistics\n\n{meth}`~TreeSequence.f4`, {meth}`~TreeSequence.f3`, and {meth}`~TreeSequence.f2`\nare the `f` statistics (also called `F` statistics) introduced in\n[Reich et al (2009)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2842210/).\nSee the documentation (link below) for the definition,\nand [Peter (2016)](https://www.genetics.org/content/202/4/1485) for readable\ndiscussion of their use.\n\n\n(sec_stats_notes_y)=\n\n### Y statistics\n\n{meth}`~TreeSequence.Y3` and {meth}`~TreeSequence.Y2` are the `Y` statistics introduced\nby [Ashander et al (2018)](https://www.biorxiv.org/content/10.1101/354530v1)\nas a three-sample intermediate between diversity/divergence (which are\npairwise) and Patterson's f statistics (which are four-way).\n\n\n(sec_stats_notes_derived)=\n\n### Derived statistics\n\nMost statistics have the property that `mode=\"branch\"` and\n`mode=\"site\"` are \"dual\" in the sense that they are equal, on average, under\na high neutral mutation rate. {meth}`~TreeSequence.Fst` and {meth}`~TreeSequence.Tajimas_D`\ndo not have this property (since both are ratios of statistics that do have this property).\n\n\n(sec_stats_notes_gnn)=\n\n### Genealogical nearest neighbours\n\nThe {meth}`~TreeSequence.genealogical_nearest_neighbours` statistic is not based on branch\nlengths, but on topologies. therefore it currently has a slightly different interface to\nthe other single site statistics. This may be revised in the future.\n"
  },
  {
    "path": "docs/substitutions/linear_traversal_warning.rst",
    "content": ".. warning:: The current implementation of this operation is linear in the number of\n    trees, so may be inefficient for large tree sequences. See\n    `this issue <https://github.com/tskit-dev/tskit/issues/684>`_ for more\n    information.\n"
  },
  {
    "path": "docs/substitutions/table_edit_warning.rst",
    "content": ".. warning:: The numpy arrays returned by table attribute accesses are copies\n    of the underlying data. In particular, this means that editing\n    individual values in the arrays will not change the table data\n    Instead, you should set entire columns or rows at once\n    (see :ref:`sec_tables_api_accessing_table_data`).\n"
  },
  {
    "path": "docs/substitutions/table_keep_rows_main.rst",
    "content": "Updates this table in-place according to the specified boolean\narray, and returns the resulting mapping from old to new row IDs.\nFor each row ``j``, if ``keep[j]`` is True, that row will be\nretained in the output; otherwise, the row will be deleted.\nRows are retained in their original ordering.\n\nThe returned ``id_map`` is an array of the same length as\nthis table before the operation, such that ``id_map[j] = -1``\n(:data:`tskit.NULL`) if row ``j`` was deleted, and ``id_map[j]``\nis the new ID of that row, otherwise.\n\n.. todo::\n    This needs some examples to link to. See\n    https://github.com/tskit-dev/tskit/issues/2708\n"
  },
  {
    "path": "docs/substitutions/tree_array_warning.rst",
    "content": ".. warning:: This is a high-performance interface which\n    provides zero-copy access to memory used in the C library.\n    As a consequence, the values stored in this array will change as\n    the Tree state is modified as we move along the tree sequence. See the\n    :class:`.Tree` documentation for more details. Therefore, if you want to\n    compare arrays representing different trees along the sequence, you must\n    take **copies** of the arrays.\n"
  },
  {
    "path": "docs/substitutions/virtual_root_array_note.rst",
    "content": ".. note:: The length of these arrays is\n    equal to the number of nodes in the tree sequence plus 1, with the\n    final element corresponding to the tree's :meth:`~.Tree.virtual_root`.\n    Please see the :ref:`tree roots <sec_data_model_tree_roots>` section\n    for more details.\n"
  },
  {
    "path": "docs/topological-analysis.md",
    "content": "---\njupytext:\n  text_representation:\n    extension: .md\n    format_name: myst\n    format_version: 0.12\n    jupytext_version: 1.9.1\nkernelspec:\n  display_name: Python 3\n  language: python\n  name: python3\n---\n\n```{currentmodule} tskit\n```\n\n(sec_topological_analysis)=\n\n# Topological analysis\n\nThe branching structure of a tree is known as the tree *topology*. Dealing with the\ntopological structure of trees is of key importance in dealing with genealogies\nsuch as those encoded in the tree sequence structure.\n\n\n(sec_topological_analysis_traversal)=\n\n## Visiting nodes\n\n\n(sec_topological_analysis_descending)=\n\n### Descendant traversal\n\nA basic thing to want to do is to visit all the nodes in a tree under a focal node\n(i.e. the focal node and all its descendants). This can be done in various\n*traversal orders*. The `tskit` library provides several methods to do this, such\nas {meth}`Tree.nodes` in the {ref}`sec_python_api`. By default, this method\niterates over all the descendant nodes of the root(s) of the tree, and hence\nvisits all the tree's nodes:\n\n```{code-cell} ipython3\nimport tskit\nfrom IPython.display import SVG, display\n\ntree = tskit.Tree.generate_balanced(10, arity=3)\ndisplay(SVG(tree.draw_svg()))\n\nfor order in [\"preorder\", \"inorder\", \"postorder\"]:\n    print(f\"{order}:\\t\", list(tree.nodes(order=order)))\n```\n\nProviding a focal node ID allows us to traverse through that node's descendants.\nFor instance, here we visit node 13 and its descendants:\n\n```{code-cell} ipython3\nprint(\"Nodes under node 13:\", [u for u in tree.nodes(13)])\n```\n\nThe node IDs returned by traversal methods allow us to to access node information.\nBelow, for example, we use the {meth}`Tree.num_children` method to find the number of\nchildren (the \"arity\") of every non-leaf node, and take the average. Since the\nspecific ordering of descendant nodes is not important in this case, we can leave\nit out (defaulting to preorder traversal, the most efficient order):\n\n```{code-cell} ipython3\nimport numpy as np\nav_arity = np.mean([tree.num_children(u) for u in tree.nodes() if not tree.is_leaf(u)])\nprint(f\"Average arity of internal nodes: {av_arity}\")\n```\n\n:::{note}\nIn `tskit`, a tree can have multiple {ref}`sec_data_model_tree_roots`, each with a\ndescendant topology.  However, for some algorithms, instead of traversing through\nthe descendants of each root of a tree in turn, it can be helpful to start at a\nsingle node and traverse downwards through the entire genealogy. The\n{ref}`virtual root<sec_data_model_tree_virtual_root>` is provided for this purpose.\n:::\n\n#### Array methods\n\nThe {meth}`Tree.nodes` iterator provides a convenient way of looping over descendant\nnode IDs, but it can be more efficient to deal with all the IDs at once, as a\nsingle array of values. This can be combined with\n{ref}`direct memory access<sec_python_api_trees_node_measures_array>` resulting in a\nhigh performance approach. Here, for example, is an equivalent\narray-based method to find the average arity of internal nodes, by counting\nhow many times a node is referenced as a parent:\n\n```{code-cell} ipython3\nparent_id, count = np.unique(tree.parent_array[tree.preorder()], return_counts=True)\nprint(f\"Average arity is {count[parent_id != tskit.NULL].mean()}\")\n```\n\n:::{seealso}\nThe {ref}`sec_analysing_trees` tutorial provides a number of additional examples\nof tree traversal techniques, with different performance characteristics.\n:::\n\n\n(sec_topological_analysis_ascending)=\n\n### Ascending traversal\n\nFor many applications it is useful to be able to traverse upwards from a node or set\nof nodes, such as the leaves. We can do this by iterating over parents. Here, for\nexample, we traverse upwards from each of the samples in the tree:\n\n```{code-cell} ipython3\nfor u in tree.samples():\n    path = []\n    v = u\n    while v != tskit.NULL:\n        path.append(v)\n        v = tree.parent(v)\n    print(u, \"->\", path)\n```\n\n:::{todo}\nIndicate that this can be made performant using `numba`, and link out to a tutorial\non high performance methods including the `numba` approach.\n:::\n\n\n(sec_combinatorics)=\n\n## Identifying and counting topologies\n\ntskit uses a combinatorial approach to identify unique topologies of\nrooted, leaf-labelled trees. It provides methods\nfor enumerating all possible tree topologies, as well as converting\nback and forth between a tree and its position, or rank, in the\nenumeration of all possible topologies.\nThese methods do not only apply to binary trees;\nrather, they cover general, rooted trees without unary nodes.\n\n```{list-table} \n* - {meth}`Tree.rank`\n  - Return the rank of this tree.\n* - {meth}`Tree.unrank`\n  - Return a Tree given its rank and a number of leaves.\n* - {func}`tskit.all_trees`\n  - Return a generator over all leaf-labelled trees of n leaves.\n* - {func}`tskit.all_tree_shapes`\n  - Return a generator over all tree shapes of n leaves.\n* - {func}`tskit.all_tree_labellings`\n  - Return a generator over all labellings of the given tree's shape.\n```\n\n:::{note}\nAs the number of nodes increases, the number of different topologies rises\nextremely rapidly (see its entry in the\n[On-Line Encyclopedia of Integer Sequences](https://oeis.org/A000311)). This\ncombinatorial explosion is a major limitation in any analysis that attempts to\nexplore possible topologies. For example, although the {func}`tskit.all_trees`\nfunction above will happily start generating topologies for (say) a tree of 50\nleaves, the total number of possible topologies is over $6^81$, which is of\nthe same order as the number of atoms in the observable universe. Generating\nall the topologies of a tree with anything much more than 10 tips is likely\nto be impracticable.\n:::\n\n\n(sec_tree_ranks)=\n\n### Interpreting Tree Ranks\n\nTo understand tree ranks we must look at how leaf-labelled tree topologies\nare enumerated. For example, we can use {func}`tskit.all_trees`\nto generate all possible topologies of three leaves:\n\n```{code-cell} ipython3\nimport tskit\nfrom IPython.display import display, SVG\n\nfor t in tskit.all_trees(num_leaves=3):\n    display(SVG(t.draw_svg(node_labels={0: 0, 1: 1, 2: 2}, order=\"tree\", size=(120, 120))))\n```\n\nIn this sequence, there exist two distinct tree shapes and each shape\ncan be labelled in at least one unique way. Given that topologies are\nordered first by their shape and then by their labelling, a tree\ntopology can be uniquely identified by\n\n1. The shape of the tree\n2. The labelling of the tree's shape\n\nWe can refer to the first tree in the above enumeration as the\nfirst labelling of the first shape of trees with three leaves, or tree\n$(0, 0)$. The second tree can be identified as the first labelling\nof the second shape, or $(1, 0)$, and so on.\nThis pair of indexes for the shape and labelling of a tree is referred\nto as the rank of the tree, and can be computed using the\n{meth}`Tree.rank` method.\n\n```{code-cell} ipython3\nranks = [t.rank() for t in tskit.all_trees(num_leaves=3)]\nprint(\"Ranks of 3-leaf trees:\", ranks)\n```\n\n```{note}\nRanks in combinatorics are typically natural numbers. However,\nwe refer to this tuple of shape and label rank as a rank because\nit serves the same purpose of indexing trees in an enumeration.\n```\n\nFor details on how shapes and labellings are ordered, see\n{ref}`sec_enumerating_topologies`.\n\nWe can also reconstruct a leaf-labelled tree given its rank. This process\nis known as unranking, and can be performed using the {meth}`Tree.unrank`\nmethod.\n\n```{code-cell} ipython3\nfor rank in [(0, 0), (1, 0), (1, 1), (1, 2)]:\n    t = tskit.Tree.unrank(num_leaves=3, rank=rank)\n    display(SVG(t.draw_svg(node_labels={0: 0, 1: 1, 2: 2}, order=\"tree\", size=(120, 120))))\n```\n\n#### Examples\n\nOne application of tree ranks is to count the different\nleaf-labelled topologies in a tree sequence. Since the ranks\nare just tuples, we can use a Python ``Counter`` to track them.\nHere, we count and unrank the most frequently seen\ntopology in a tree sequence. For brevity, this example assumes\nsamples are synonymous with leaves.\n\n```{code-cell} ipython3\nimport collections\nimport msprime\n# Simulate a tree sequence with 2 diploid individuals (i.e. 4 samples)\nts = msprime.sim_ancestry(2, sequence_length=1e8, recombination_rate=1e-7, random_seed=1)\nrank_counts = collections.Counter(t.rank() for t in ts.trees())\nmost_freq_rank, count = rank_counts.most_common(1)[0]\nmost_freq_topology = tskit.Tree.unrank(ts.num_samples, most_freq_rank)\nprint(\"Most frequent topology\")\ndisplay(SVG(most_freq_topology.draw_svg(node_labels={0: 0, 1: 1, 2: 2, 3: 3})))\n```\n\n(sec_enumerating_topologies)=\n\n### Enumerating Topologies\n\nThis section expands briefly on the approach used to enumerate\ntree topologies that serves as the basis for {meth}`Tree.rank`\nand {meth}`Tree.unrank`.\nTo enumerate all rooted, leaf-labelled tree topologies, we first\nformulate a system of ordering and enumerating tree shapes. Then\nwe define an enumeration of labellings given an arbitrary tree shape.\n\n#### Enumerating Tree Shapes\n\nStarting with $n = 1$, we see that the only shape for a tree\nwith a single leaf is a single root leaf. A tree with $n > 1$\nleaves can be obtained by joining at least two trees whose number of\nleaves sum to $n$.\nThis maps very closely to the concept of integer partitions.\nEach tree shape of $n$ leaves can be represented by taking a\nnondecreasing integer partition of $n$ (elements of the partition\nare sorted in nondecreasing order) and recursively partitioning its\nelements. The order in which we select partitions of $n$ is\ndetermined by the efficient\n[rule_asc](http://jeromekelleher.net/generating-integer-partitions.html)\nalgorithm for generating them.\n\nAll tree shapes with four leaves, and the partitions that generate\nthem, are:\n\n```{image} _static/four_leaf_tree_shapes.png\n:alt: All four-leaf tree shapes and their generating partitions\n```\n\nNote that the middle column reflects all tree shapes of three leaves\nin the right subtree!\n\n`*` This excludes the partition $[n]$, since this would create a unary node\nand trees with unary nodes are inumerable (and potentially infinite).\n\n```{note}\nUsing nondecreasing integer partitions enforces a\n*canonical orientation* on the tree shapes, where children under a node are\nordered by the number of leaves below them.\nThis is important because it prevents us from repeating trees that are\ntopologically the same but whose children are ordered differently.\n```\n\n#### Labelling Tree Shapes\n\nTree shapes are useful in and of themselves, but we can use the enumeration\nformulated above to go further and assign labels to the leaves of each shape.\n\nSay we are given a tree $T$ with $n$ leaves, whose left-most\nsubtree, $T_l$, has `k` leaves. For each of the $n \\choose k$\nways to select labels to assign to $T_l$, we produce a unique labelling\nof $T$. This process of choosing labels is repeated for the other\nchildren of $T$ and then recursively for the subtrees.\n\nLooking back to the example from {ref}`sec_tree_ranks`, we can see\nthe second tree shape, where the tree is a strictly bifucating tree of\nthree leaves, can be labelled in 3 different unique ways.\n\n```{code-cell} ipython3\nsecond_tree = tskit.Tree.unrank(num_leaves=3, rank=(1, 0))\nfor t in tskit.all_tree_labellings(second_tree):\n    display(SVG(t.draw_svg(node_labels={0: 0, 1: 1, 2: 2}, order=\"tree\", size=(120, 120))))\n```\n\nThe order of the tree labellings is a direct result of the way in which\ncombinations of labels are chosen. The implementation in tskit uses a\nstandard lexicographic ordering to choose labels. See how the trees\nare sorted by the order in which the left leaf's label was chosen.\n\n```{note}\nThere is a caveat here regarding symmetry, similar to that of repeating\ntree shapes. Symmetrical trees run the risk of creating redundant labellings\nif all combinations of labels were exhausted. To prevent redundant labellings\nwe impose a *canonical labelling*. In the case of two symmetrical subtrees,\nthe left subtree must receive the minimum label from the label set. Notice\nhow this is the case in the right subtrees above.\n```\n\nThese two enumerations create a complete ordering of topologies where trees are\nordered first by size (number of leaves), then by shape, then by their minimum\nlabel. It is this canonical order that enables efficient ranking and unranking\nof topologies.\n\n"
  },
  {
    "path": "prek.toml",
    "content": "# The prek configuration defining linting requirements. This\n# setup is optimised for long-term stability and determinism,\n# and therefore only uses either \"builtin\" rules or \"local\"\n# rules implementing lint workflows.  We do not use any remote\n# workflow repos.\n\nexclude = { glob = [\"c/subprojects/**\"]}\n\n[[repos]]\nrepo = \"builtin\"\nhooks = [\n    { id = \"check-added-large-files\" },\n    { id = \"check-merge-conflict\" },\n    { id = \"mixed-line-ending\" },\n    { id = \"check-case-conflict\" },\n    { id = \"check-yaml\" },\n    { id = \"check-toml\" },\n]\n\n[[repos]]\nrepo = \"local\"\nhooks = [\n  {\n    id = \"ruff-check\",\n    name = \"ruff check\",\n    language = \"system\",\n    entry = \"uv run --project=python --only-group=lint ruff check --fix --force-exclude\",\n    types = [\"python\"],\n  },\n]\n\n[[repos]]\nrepo = \"local\"\nhooks = [\n  {\n    id = \"ruff-format\",\n    name = \"ruff format\",\n    language = \"system\",\n    entry = \"uv run --project=python --only-group=lint ruff format --force-exclude\",\n    types = [\"python\"],\n  },\n]\n\n[[repos]]\nrepo = \"local\"\nhooks = [\n  {\n    id = \"clang-format\",\n    name = \"clang-format\",\n    language = \"system\",\n    entry = \"uv run --project=python --only-group=lint clang-format -i\",\n    types = [\"c\"],\n    verbose = true,\n  },\n]\n"
  },
  {
    "path": "python/.gitignore",
    "content": "*.pyc\n*.so\n*.egg-info\nbuild\n.*.swp\n.*.swo\n*/.ipynb_checkpoints\n"
  },
  {
    "path": "python/CHANGELOG.rst",
    "content": "--------------------\n[1.0.3] - 2026-XX-XX\n--------------------\n\nIn development\n\n- Add ``json+struct`` metadata codec that allows storing binary data using a struct\n  schema alongside JSON metadata. (:user:`benjeffery`, :pr:`3306`)\n\n**Features**\n\n- Add ``TreeSequence.ld_matrix`` stats method and documentation, for computing\n  two-locus statistics in site and branch mode.\n  (:user:`lkirk`, :user:`apragsdale`, :pr:`3416`)\n- Add `node_labels` parameter to `write_nexus`. (:user:`kaathewisegit`, :pr:`3442`)\n- Document ``TableCollection.load`` (:user:`hyanwong`, :issue:`3406` :pr:`3453`)\n\n**Bugfixes**\n\n- Fix a Y-axis positioning bug in `draw_svg` when a title was provided.\n  (:user:`hyanwong`, :issue:`3451`, :pr:`3452`)\n\n--------------------\n[1.0.2] - 2026-03-06\n--------------------\n\nMaintenance release.\n\n- Require Python >= 3.11\n\n--------------------\n[1.0.1] - 2026-02-06\n--------------------\n\nMaintenance release.\n\n**Bugfixes**\n\n- ``ts.samples(population=...)`` now raises a ``ValueError`` if the population\n  ID is e.g. a population name, rather than silently returning no samples.\n  (:user:`hyanwong`, :pr:`3344`)\n\n**Maintenance**\n\n- Add support for Python 3.14\n\n--------------------\n[1.0.0] - 2025-11-27\n--------------------\n\n**Breaking changes**\n\n- The ``reference_sequence`` argument to ``TreeSequence.alignments`` is now\n  required to be the same length as the tree sequence. Previously it was\n  required to be the length of the requested interval.\n  (:user:`benjeffery`, :pr:`3317`)\n\n- ``TreeSequence.tables`` now returns a zero-copy immutable view of the tables.\n  To get a mutable copy, use ``TreeSequence.dump_tables()``.\n  (:user:`benjeffery`, :pr:`3288`, :issue:`760`)\n\n- For a tree sequence to be valid, the mutation parents in the table collection\n  must be correct and consistent with the topology of the tree at each mutation site.\n  ``TableCollection.tree_sequence()`` will raise a ``_tskit.LibraryError`` if this\n  is not the case.\n  (:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).\n\n- Drop Python 3.9 support and require Python >= 3.10.\n  (:pr:`3267`, :user:`benjeffery`)\n\n- ``ltrim``, ``rtrim``, ``trim`` and ``shift`` raise an error if they are\n  used on a tree sequence containing a reference sequence.\n  (:user:`hyanwong`, :pr:`3210`, :issue:`2091`)\n\n**Features**\n\n- Add ``tskit.jit.numba.jitwrap`` and ``NumbaTreeSequence`` to allow simplified\n  use and development of Numba-jitted functions with tree sequences. See the\n  `documentation <https://tskit.dev/tskit/docs/stable/numba.html>`_ for details.\n  (:user:`andrewkern`, :pr:`3295`, :issue:`3294`)\n\n- ``TreeSequence.map_to_vcf_model`` now also returns the transformed positions and\n  contig length. (:user:`benjeffery`, :pr:`3174`, :issue:`3173`)\n\n- ``draw_svg()`` methods now associate tree branches with edge IDs.\n  (:user:`hyanwong`, :pr:`3193`, :issue:`557`)\n\n- ``draw_svg()`` methods now allow the y-axis to be placed on the right-hand side\n  using ``y_axis=\"right\"``. (:user:`hyanwong`, :pr:`3201`)\n\n- Add ``contig_id`` and ``isolated_as_missing`` to ``VcfModelMapping``\n  (:user:`benjeffery`, :pr:`3219`, :issue:`3177`).\n\n- Add ``TreeSequence.mutations_edge``, which returns the edge ID for each mutation's\n  edge. (:user:`benjeffery`, :pr:`3226`, :issue:`3189`)\n\n- Add ``TreeSequence.sites_ancestral_state``, ``TreeSequence.mutations_derived_state`` and\n  ``TreeSequence.mutations_inherited_state`` properties to return the ancestral state of sites,\n  the derived state of mutations and the inherited state of mutations as NumPy arrays of\n  the new NumPy 2.0 ``StringDType``.\n  (:user:`benjeffery`, :pr:`3228`, :issue:`2632`, :pr:`3276`, :issue:`2631`)\n\n- Tskit now requires NumPy version 2 or later. However, you can still use\n  tskit with NumPy 1.x by building tskit from source with NumPy 1.x using\n  ``pip install tskit --no-binary tskit``. With NumPy 1.x, any use of the new\n  ``StringDType`` properties will result in a ``RuntimeError``. If you try to\n  use another Python module that was compiled against NumPy 1.x with NumPy 2.x\n  you may see the error \"A module that was compiled using NumPy 1.x cannot be\n  run in NumPy 2.0.0 as it may crash.\". If no newer version of the module is\n  available you will have to use the NumPy 1.x build as above.\n\n- Add ``Mutation.inherited_state`` property which returns the inherited state\n  for a single mutation. (:user:`benjeffery`, :pr:`3277`, :issue:`2631`)\n\n- Add ``all_mutations`` and ``all_edges`` options to ``TreeSequence.union``,\n  allowing greater flexibility in \"disjoint union\" situations.\n  (:user:`hyanwong`, :user:`petrelharp`, :issue:`3181`)\n\n- Add ``TreeSequence.divergence_matrix``, which was previously undocumented.\n\n- ``TreeSequence.variants``, ``.genotype_matrix``, ``.haplotypes``, and ``.alignments`` methods\n  now fully support ``isolated_as_missing`` behaviour with internal nodes. ``.alignments`` is\n  also around 10% faster.\n  (:user:`benjeffery`, :pr:`3313`, :pr:`3317`, :issue:`1896`)\n\n\n**Bugfixes**\n\n- In some tables with mutations out-of-order ``TableCollection.sort`` did not re-order\n  the mutations so they formed a valid TreeSequence. ``TableCollection.sort`` and\n  ``TableCollection.canonicalise`` now sort mutations by site, then time (if known),\n  then the mutation's node's time, then number of descendant mutations\n  (ensuring that parent mutations occur before children), then node, then\n  their original order in the tables. (:user:`benjeffery`, :pr:`3257`, :issue:`3253`)\n\n- Fix bug in ``TreeSequence.genetic_relatedness_vector`` that previously ignored\n  ``span_normalise``: previously, ``span_normalise`` was always set to ``False``;\n  now the default is ``True`` in agreement with other statistics, so the returned\n  values will change. (:user:`petrelharp`, :pr:`3300`, :issue:`3241`)\n\n- Fix bug in ``TreeSequence.pair_coalescence_counts`` when ``span_normalise=True``\n  and a window breakpoint falls within an internal missing interval.\n  (:user:`nspope`, :pr:`3176`, :issue:`3175`)\n\n- Fix metadata schemas that are equal but have different byte representations not\n  being considered equal when using ``TableCollection.assert_equals`` and\n  ``Table.assert_equals``.\n  (:user:`benjeffery`, :pr:`3246`, :issue:`3244`)\n\n- k-way statistics no longer require k sample sets, allowing in particular\n  \"self\" comparisons for ``TreeSequence.genetic_relatedness``. This changes the\n  error code returned in some situations.\n  (:user:`andrewkern`, :user:`petrelharp`, :pr:`3235`, :issue:`3055`)\n\n- Fix ``UnboundLocalError`` in ``draw_svg()`` when using numeric ``max_time``\n  values with mutations over roots.\n  (:user:`benjeffery`, :pr:`3274`, :issue:`3273`)\n\n- Prevent iterating over a ``TopologyCounter``.\n  (:user:`benjeffery`, :pr:`3202`, :issue:`1462`)\n\n- Fix ``TreeSequence.concatenate()`` to work with internal samples by using the\n  ``all_mutations`` and ``all_edges`` parameters in ``union()``.\n  (:user:`hyanwong`, :pr:`3283`, :issue:`3181`)\n\n--------------------\n[0.6.4] - 2025-05-21\n--------------------\n\n**Features**\n\n- Add ``TreeSequence.sample_nodes_by_ploidy`` method to return the sample nodes\n  in a tree sequence, grouped by a ploidy value.\n  (:user:`benjeffery`, :pr:`3157`)\n\n- Add ``TreeSequence.individuals_nodes`` attribute to return the nodes\n  associated with each individual as a numpy array.\n  (:user:`benjeffery`, :pr:`3153`)\n\n- Add ``shift`` method to both ``TableCollection`` and ``TreeSequence`` classes\n  allowing the coordinate system to be shifted, and ``TreeSequence.concatenate``\n  so a set of tree sequence can be added to the right of an existing one.\n  (:user:`hyanwong`, :pr:`3165`, :issue:`3164`)\n\n- Add ``TreeSequence.map_to_vcf_model`` method to return a mapping of\n  the tree sequence to the VCF model.\n  (:user:`benjeffery`, :pr:`3163`)\n\n- Use a thin space as the thousands separator in HTML output,\n  and a comma in CLI output.\n  (:user:`hossam26644`, :pr:`3167`, :issue:`2951`)\n\n**Fixes**\n\n- Correct assertion message when tables are compared with metadata ignored.\n  (:user:`benjeffery`, :pr:`3162`, :issue:`3161`)\n\n**Breaking changes**\n\n- ``TreeSequence.write_vcf`` now filters non-sample nodes from individuals\n  by default, instead of raising an error. These nodes can be included using the\n  new ``include_non_sample_nodes`` argument.\n  By default individual names (sample IDs) in VCF output are now of the form\n  ``tsk_{individual.id}`` Previously these were always\n  ``\"tsk_{j}\" for j in range(num_individuals)``. This may break some downstream\n  code if individuals are specified. To fix, manually specify ``individual_names``\n  to the required pattern.\n  (:user:`benjeffery`, :pr:`3163`)\n\n\n--------------------\n[0.6.3] - 2025-04-28\n--------------------\n\n**Bugfixes**\n\n- ``TreeSequence.draw_svg(path=...)`` was failing due to a missing\n  `import xml.dom.minidom` (:user:`petrelharp`, :issue:`3144`, :pr:`3145`)\n\n\n--------------------\n[0.6.2] - 2025-04-01\n--------------------\n\n**Bugfixes**\n\n- Metadata.schema was returning a modified schema, this is fixed to return a copy of\n  the original schema instead (:user:`benjeffery`, :issue:`3129`, :pr:`3130`)\n\n**Breaking Changes**\n\n- Legacy formats from msprime<0.6 (HDF5 formats) support is dropped. This includes the\n  support for ``tskit upgrade``  (:user:`hossam26644`, :issue:`2812`, :pr:`3138`)\n\n--------------------\n[0.6.1] - 2025-03-31\n--------------------\n\n**Bugfixes**\n\n- Fix to ``TreeSequence.pair_coalescence_counts`` output dimension when\n  provided with time windows containing no nodes (:user:`nspope`,\n  :issue:`3046`, :pr:`3058`)\n\n- Fix to ``TreeSequence.pair_coalescence_counts`` to normalise by non-missing\n  span if ``span_normalise=True``. This resolves a bug where\n  ``TreeSequence.pair_coalescence_rates`` would return incorrect values for\n  intervals with missing trees.  (:user:`natep`, :issue:`3053`, :pr:`3059`)\n\n- Fix to ``TreeSequence.pair_coalescence_rates`` causing an\n  assertion to be triggered by floating point error, when all coalescence events are inside a single time window (:user:`natep`, :issue:`3035`, :pr:`3038`)\n\n**Features**\n\n- Add support for fixed-length arrays in metadata struct codec using the ``length`` property.\n  (:user:`benjeffery`, :issue:`3088`,:pr:`3090`)\n\n- Add a new ``TreeSequence.pca`` method that uses randomized linear algebra\n  to find the top eigenvectors/values of the genetic relatedness matrix\n  (:user:`hanbin973`, :user:`petrelharp`, :pr:`3008`)\n\n- Add methods on `TreeSequence` to efficiently get table metadata as a\n  numpy structured array. (:user:`benjeffery`, :pr:`3098`)\n\n- Add Python 3.13 support (:user:`benjeffery`, :pr:`3107`)\n\n- Add a `preamble` argument to `draw_svg()` methods to allow adding arbitrary extra\n  graphics (e.g. legends) to SVG plots (:user:`hyanwong`, `issue:`3086`, :pr:`3121`)\n\n--------------------\n[0.6.0] - 2024-10-16\n--------------------\n\n**Breaking Changes**\n\n- The definition of ``TreeSequence.genetic_relatedness`` and\n  ``TreeSequence.genetic_relatedness_weighted`` are changed\n  to *average* over sample sets, rather than summing over them.\n  For computation with diploid sample sets, this will change the result\n  by a factor of four; for larger sample sets it will now produce\n  sensible values that are comparable between sample sets of different sizes.\n  The default for these methods is also changed to ``polarised=True``,\n  but the output is unchanged for ``centre=True`` (the default).\n  See the documentation for these methods for more discussion.\n  (:user:`petrelharp`, :user:`mmosmond`, :pr:`1623`)\n\n**Bugfixes**\n\n- Fix to ``TreeSequence.genetic_relatedness`` with ``indexes=None`` and\n  ``proportion=True``. (:user:`petrelharp`, :issue:`2984`, :pr:`1623`)\n\n- Fix to ``TreeSequence.general_stat`` when using non-strict summary functions\n  in the presence of non-ancestral material (very rare).\n  (:user:`petrelharp`, :issue:`2983`, :pr:`1623`)\n\n- Printing ``tskit.MetadataSchema(schema=None)`` now shows ``\"Null_schema\"`` rather\n  than ``None``, to avoid confusion (:user:`hyanwong`, :pr:`2720`)\n\n- Limit output HTML when a tree sequence is displayed that has a large amount of metadata.\n  (:user:`benjeffery`, :pr:`2999`)\n\n- Fix warning in `draw_svg` to use correct warnings module.\n  (:user:`duncanMR`, :issue:`2870`, :pr:`2871`)\n\n**Features**\n\n- Add the ``centre`` option to ``TreeSequence.genetic_relatedness`` and\n  ``TreeSequence.genetic_relatedness_weighted``.\n  (:user:`petrelharp`, :user:`mmosmond`, :pr:`1623`)\n\n- Edges now have an ``.interval`` attribute returning a ``tskit.Interval`` object.\n  (:user:`hyanwong`, :pr:`2531`)\n\n- Variants now have a `states()` method that returns the genotypes as an\n  (inefficient) array of strings, rather than integer indexes, to\n  aid comparison of genetic variation (:user:`hyanwong`, :pr:`2617`)\n\n- Added ``distance_between`` that calculates the total distance between two nodes in a tree.\n  (:user:`Billyzhang1229`, :pr:`2771`)\n\n- Added ``genetic_relatedness_matrix`` method to compute\n  pairwise genetic relatedness between sample sets.\n  (:user:`jeromekelleher`, :user:`petrelharp`, :pr:`2823`)\n\n- Add ``TreeSequence.extend_haplotypes`` method that extends ancestral haplotypes\n  using recombination information, leading to unary nodes in many trees and\n  fewer edges. (:user:`petrelharp`, :user:`hfr1tz3`, :user: `nspope`,\n  :user:`avabamf`, :pr:`2651`, :pr:`2938`)\n\n- Add ``Table.drop_metadata`` to make clearing metadata from tables easy.\n  (:user:`jeromekelleher`, :pr:`2944`)\n\n- Add ``Interval.mid`` and ``Tree.mid`` properties to return the midpoint of the interval.\n  (:user:`currocam`, :pr:`2960`)\n\n- Added ``genetic_relatedness_vector`` method to compute product of genetic relatedness\n  matrix and weight vector.\n  (:user:`petrelharp`, :pr:`2980`)\n\n- Added ``pair_coalescence_counts`` method to calculate coalescence events per node or time\n  interval, ``pair_coalescence_quantiles`` method to estimate quantiles of pair\n  coalescence times using empirical CDF inversion, and ``pair_coalescence_rates`` method to\n  estimate instantaneous rates of pair coalescence within time intervals from the empirical CDF.\n  (:user:`nspope`, :pr:`2915`, :pr:`2976`, :pr:`2985`)\n\n- Add provenance information to the HTML notebook representation of a tree sequence.\n  (:user:`benjeffery`, :pr:`3001`)\n\n- The ``.draw_svg()`` methods can add annotated genomic regions (e.g. genes) to the\n  x-axis. (:user:`hyanwong`, :pr:`3002`)\n\n- Added a ``node_titles`` and a ``mutation_titles`` parameter to ``.draw_svg()`` methods\n  which assigns a string to node and mutation symbols, commonly shown on mouseover. This\n  can reduce label clutter while retaining useful info (:user:`hyanwong`, :pr:`3007`)\n\n- Added (currently undocumented) use of the `order` parameter in ``Tree.draw_svg()`` to\n  pass a subset of nodes, so subtrees can be visually collapsed. Additionally, an option\n  ``pack_untracked_polytomies`` allows large polytomies involving untracked samples to\n  be summarised as a dotted line (:user:`hyanwong`, :issue:`3011` :pr:`3010`, :pr:`3012`)\n\n- Added a ``title`` parameter to ``.draw_svg()`` methods (:user:`hyanwong`, :pr:`3015`)\n\n- Add comma separation to all display numbers. (:user:`benjeffery`, :issue:`3017`, :pr:`3018`)\n\n- Added ``Tree.ancestors(u)`` method. (:user:`hyanwong`, :issue:`2706`, :pr:`3021`)\n\n- Add ``resources`` section to provenance schema. (:user:`benjeffery`, :pr:`3016`)\n\n- Add ``Tree.rf_distance`` method to calculate the unweighted Robinson-Foulds distance\n  between two trees. (:user:`Billyzhang1229`, :issue:`995`, :pr:`2643`, :pr:`3032`)\n\n\n--------------------\n[0.5.8] - 2024-06-27\n--------------------\n\n- Add support for numpy 2 (:user:`jeromekelleher`, :user:`benjeffery`, :pr:`2964`)\n\n\n--------------------\n[0.5.7] - 2024-06-17\n--------------------\n\n**Breaking Changes**\n\n- The VCF writing methods (`ts.write_vcf`, `ts.as_vcf`) now error if a site with\n  position zero is encountered. The VCF spec does not allow zero position sites.\n  Suppress this error with the `allow_position_zero` argument.\n  (:user:`benjeffery`, :pr:`2901`, :issue:`2838`)\n\n**Bugfixes**\n\n- Fix to the folded, expected allele frequency spectrum (i.e.,\n  `TreeSequence.allele_frequency_spectrum(mode=\"branch\", polarised=False)`,\n  which was half as big as it should have been. (:user:`petrelharp`,\n  :user:`nspope`, :pr:`2933`)\n\n--------------------\n[0.5.6] - 2023-10-10\n--------------------\n\n**Breaking Changes**\n\n- tskit now requires Python 3.8, as Python 3.7 became end-of-life on 2023-06-27\n\n**Features**\n\n- Tree.trmca now accepts >2 nodes and returns nicer errors\n  (:user:`hyanwong`, :pr:2808, :issue:`2801`, :issue:`2070`, :issue:`2611`)\n\n- Add ``TreeSequence.genetic_relatedness_weighted`` stats method.\n  (:user:`petrelharp`, :user:`brieuclehmann`, :user:`jeromekelleher`,\n  :pr:`2785`, :pr:`1246`)\n\n- Add ``TreeSequence.impute_unknown_mutations_time`` method to return an\n  array of mutation times based on the times of associated nodes\n  (:user:`duncanMR`, :pr:`2760`, :issue:`2758`)\n\n- Add ``asdict`` to all dataclasses. These are returned when you access a row or\n  other tree sequence object. (:user:`benjeffery`, :pr:`2759`, :issue:`2719`)\n\n**Bugfixes**\n\n- Fix incompatibility with ``jsonschema>4.18.6`` which caused\n  ``AttributeError: module jsonschema has no attribute _validators``\n  (:user:`benjeffery`, :pr:`2844`, :issue:`2840`)\n\n--------------------\n[0.5.5] - 2023-05-17\n--------------------\n\n**Performance improvements**\n\n- Methods like ts.at() which seek to a specified position on the sequence from\n  a new Tree instance are now much faster (:user:`molpopgen`, :pr:`2661`).\n\n**Features**\n\n- Add ``__repr__`` for variants to return a string representation of the raw data\n  without spewing megabytes of text (:user:`chriscrsmith`, :pr:`2695`, :issue:`2694`)\n\n**Breaking Changes**\n\n**Bugfixes**\n\n- Fix `UnicodeDecodeError` when calling `Variant.alleles` on the `emscripten` platform.\n  (:user:`benjeffery`, :pr:`2754`, :issue:`2737`)\n\n--------------------\n[0.5.4] - 2023-01-13\n--------------------\n\n**Features**\n\n- A new ``Tree.is_root`` method avoids the need to to search the potentially\n  large list of ``Tree.roots`` (:user:`hyanwong`, :pr:`2669`, :issue:`2620`)\n\n- The ``TreeSequence`` object now has the attributes ``min_time`` and ``max_time``,\n  which are the minimum and maximum among the node times and mutation times,\n  respectively. (:user:`szhan`, :pr:`2612`, :issue:`2271`)\n\n- The ``draw_svg`` methods now have a ``max_num_trees`` parameter to truncate\n  the total number of trees shown, giving a readable display for tree\n  sequences with many trees (:user:`hyanwong`, :pr:`2652`)\n\n- The ``draw_svg`` methods now accept a ``canvas_size`` parameter to allow\n  extra room on the canvas e.g. for long labels or repositioned graphical\n  elements (:user:`hyanwong`, :pr:`2646`, :issue:`2645`)\n\n- The ``Tree`` object now has the method ``siblings`` to get\n   the siblings of a node. It returns an empty tuple if the node\n   has no siblings, is not a node in the tree, is the virtual root,\n   or is an isolated non-sample node.\n   (:user:`szhan`, :pr:`2618`, :issue:`2616`)\n\n- The ``msprime.RateMap`` class has been ported into tskit: functionality should\n  be identical to the version in msprime, apart from minor changes in the formatting\n  of tabular text output (:user:`hyanwong`, :user:`jeromekelleher`, :pr:`2678`)\n\n- Tskit now supports and has wheels for Python 3.11. This Python version has a significant\n  performance boost (:user:`benjeffery`, :pr:`2624`, :issue:`2248`)\n\n- Add the `update_sample_flags` option to `simplify` which ensures\n  no node sample flags are changed to allow calling code to manage sample status.\n  (:user:`jeromekelleher`, :issue:`2662`, :pr:`2663`).\n\n**Breaking Changes**\n\n - the ``filter_populations``, ``filter_individuals``, and ``filter_sites``\n   parameters to simplify previously defaulted to ``True`` but now default\n   to ``None``, which is treated as ``True``. Previously, passing ``None``\n   would result in an error. (:user:`hyanwong`, :pr:`2609`, :issue:`2608`)\n\n--------------------\n[0.5.3] - 2022-10-03\n--------------------\n\n**Fixes**\n\n - The ``Variant`` object can now be initialized with 64 bit numpy ints as\n   returned e.g. from np.where (:user:`hyanwong`, :pr:`2518`, :issue:`2514`)\n\n - Fix `tree.mrca` for the case of a tree with multiple roots.\n   (:user:`benjeffery`, :pr:`2533`, :issue:`2521`)\n\n**Features**\n\n - The ``ts.nodes`` method now takes an ``order`` parameter so that nodes\n   can be visited in time order (:user:`hyanwong`, :pr:`2471`, :issue:`2370`)\n\n - Add ``samples`` argument to ``TreeSequence.genotype_matrix``.\n   Default is ``None``, where all the sample nodes are selected.\n   (:user:`szhan`, :pr:`2493`, :issue:`678`)\n\n - ``ts.draw`` and the ``draw_svg`` methods now have an optional ``omit_sites``\n   parameter, aiding drawing large trees with many sites and mutations\n   (:user:`hyanwong`, :pr:`2519`, :issue:`2516`)\n\n**Breaking Changes**\n\n - Single statistics computed with ``TreeSequence.general_stat`` are now\n   returned as numpy scalars if windows=None, AND; samples is a single\n   list or None (for a 1-way stat), OR indexes is None or a single list of\n   length k (instead of a list of length-k lists).\n   (:user:`gtsambos`, :pr:`2417`, :issue:`2308`)\n\n - Accessor methods such as ts.edge(n) and ts.node(n) now allow negative\n   indexes (:user:`hyanwong`, :pr:`2478`, :issue:`1008`)\n\n - ``ts.subset()`` produces valid tree sequences even if nodes are shuffled\n   out of time order (:user:`hyanwong`, :pr:`2479`, :issue:`2473`), and the\n   same for ``tables.subset()`` (:user:`hyanwong`, :pr:`2489`). This involves\n   sorting the returned tables, potentially changing the returned edge order.\n\n**Performance improvements**\n\n - TreeSequence.link_ancestors no longer continues to process edges once all\n   of the sample and ancestral nodes have been accounted for, improving memory\n   overhead and overall performance\n   (:user:`gtsambos`, :pr:`2456`, :issue:`2442`)\n\n--------------------\n[0.5.2] - 2022-07-29\n--------------------\n\n**Fixes**\n\n- Iterating over ``ts.variants()`` could cause a segfault in tree sequences\n  with large numbers of alleles or very long alleles\n  (:user:`jeromekelleher`, :pr:`2437`, :issue:`2429`).\n\n- Various circular references fixed, lowering peak memory usage\n  (:user:`jeromekelleher`, :pr:`2424`, :issue:`2423`, :issue:`2427`).\n\n- Fix bugs in VCF output when there isn't a 1-1 mapping between individuals\n  and sample nodes (:user:`jeromekelleher`, :pr:`2442`, :issue:`2257`,\n  :issue:`2446`, :issue:`2448`).\n\n**Performance improvements**\n\n- TreeSequence.site position search performance greatly improved, with much lower\n  memory overhead (:user:`jeromekelleher`, :pr:`2424`).\n\n- TreeSequence.samples time/population search performance greatly improved, with\n  much lower memory overhead (:user:`jeromekelleher`, :pr:`2424`, :issue:`1916`).\n\n- The ``timeasc`` and ``timedesc`` orders for ``Tree.nodes`` have much\n  improved performance and lower memory overhead\n  (:user:`jeromekelleher`, :pr:`2424`, :issue:`2423`).\n\n**Features**\n\n- Variant objects now have a ``.num_missing`` attribute and ``.counts()`` and\n  ``.frequencies`` methods (:user:`hyanwong`, :issue:`2390` :pr:`2393`).\n\n- Add the `Tree.num_lineages(t)` method to return the number of lineages present\n  at time t in the tree (:user:`jeromekelleher`, :issue:`386`, :pr:`2422`)\n\n- Efficient array access to table data now provided via attributes like\n  `TreeSequence.nodes_time`, etc (:user:`jeromekelleher`, :pr:`2424`).\n\n**Breaking Changes**\n\n- Previously, accessing (e.g.) ``tables.edges`` returned a different instance of\n  EdgeTable each time. This has been changed to return the same instance\n  for the lifetime of a given TableCollection instance. This is technically\n  a breaking change, although it's difficult to see how code would depend\n  on the property that (e.g.) ``tables.edges is not tables.edges``.\n  (:user:`jeromekelleher`, :pr:`2441`, :issue:`2080`).\n\n\n--------------------\n[0.5.1] - 2022-07-14\n--------------------\n\n**Fixes**\n\n- Copies of a `Variant` object would cause a segfault when ``.samples`` was accessed.\n  (:user:`benjeffery`, :issue:`2400`, :pr:`2401`)\n\n\n**Changes**\n\n- Tables in a table collection can be replaced using the replace_with method\n  (:user:`hyanwong`, :issue:`1489` :pr:`2389`)\n\n- SVG drawing routines now return a special string object that is automatically\n  rendered in a Jupyter notebook (:user:`hyanwong`, :pr:`2377`)\n\n**Features**\n\n- New ``Site.alleles()`` method (:user:`hyanwong`, :issue:`2380`, :pr:`2385`)\n\n- The ``variants()``, ``haplotypes()`` and ``alignments()`` methods can now\n  take a list of sample ids and a left and right position, to restrict the\n  size of the output (:user:`hyanwong`, :issue:`2092`, :pr:`2397`)\n\n\n--------------------\n[0.5.0] - 2022-06-22\n--------------------\n\n**Changes**\n\n- A ``min_time`` parameter in ``draw_svg`` enables the youngest node as the y axis min\n  value, allowing negative times.\n  (:user:`hyanwong`, :issue:`2197`, :pr:`2215`)\n\n- ``VcfWriter.write`` now prints the site ID of variants in the ID field of the\n  output VCF files.\n  (:user:`roohy`, :issue:`2103`, :pr:`2107`)\n\n- Make dumping of tables and tree sequences to disk a zero-copy operation.\n  (:user:`benjeffery`, :issue:`2111`, :pr:`2124`)\n\n- Add ``copy`` argument to ``TreeSequence.variants`` which if False reuses the\n  returned ``Variant`` object for improved performance. Defaults to True.\n  (:user:`benjeffery`, :issue:`605`, :pr:`2172`)\n\n- ``tree.mrca`` now takes 2 or more arguments and gives the common ancestor of them all.\n  (:user:`savitakartik`, :issue:`1340`, :pr:`2121`)\n\n- Add a ``edge`` attribute to the ``Mutation`` class that gives the ID of the\n  edge that the mutation falls on.\n  (:user:`jeromekelleher`, :issue:`685`, :pr:`2279`).\n\n- Add the ``TreeSequence.split_edges`` operation which inserts nodes into\n  edges at a specific time.\n  (:user:`jeromekelleher`, :issue:`2276`, :pr:`2296`).\n\n- Add the ``TreeSequence.decapitate`` (and closely related\n  ``TableCollection.delete_older``) operation to remove topology and mutations\n  older than a give time.\n  (:user:`jeromekelleher`, :issue:`2236`, :pr:`2302`, :pr:`2331`).\n\n- Add the ``TreeSequence.individuals_time`` and ``TreeSequence.individuals_population``\n  methods to return arrays of per-individual times and populations, respectively.\n  (:user:`petrelharp`, :issue:`1481`, :pr:`2298`).\n\n- Add the ``sample_mask`` and ``site_mask`` to ``write_vcf`` to allow parts\n  of an output VCF to be omitted or marked as missing data. Also add the\n  ``as_vcf`` convenience function, to return VCF as a string.\n  (:user:`jeromekelleher`, :pr:`2300`).\n\n- Add support for missing data to ``write_vcf``, and add the ``isolated_as_missing``\n  argument. (:user:`jeromekelleher`, :pr:`2329`, :issue:`447`).\n\n- Add ``Tree.num_children_array`` and ``Tree.num_children``. Returns the counts of\n  the number of child nodes for each or a single node in the tree respectively.\n  (:user:`GertjanBisschop`, :issue:`2318`, :issue:`2319`, :pr:`2332`)\n\n- Add ``Tree.path_length``.\n  (:user:`jeremyguez`, :issue:`2249`, :pr:`2259`).\n\n- Add B1 tree balance index.\n  (:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2251`, :pr:`2281`, :pr:`2346`).\n\n- Add B2 tree balance index.\n  (:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2252`, :pr:`2353`, :pr:`2354`).\n\n- Add Sackin tree imbalance index.\n  (:user:`jeremyguez`, :user:`jeromekelleher`, :pr:`2246`, :pr:`2258`).\n\n- Add Colless tree imbalance index.\n  (:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2250`, :pr:`2266`, :pr:`2344`).\n\n- Add ``direction`` argument to ``TreeSequence.edge_diffs``, allowing iteration\n  over diffs in the reverse direction. NOTE: this comes with a ~10% performance\n  regression as the implementation was moved from C to Python for simplicity\n  and maintainability. Please open an issue if this affects your application.\n  (:user:`jeromekelleher`, :user:`benjeffery`, :pr:`2120`).\n\n- Add ``Tree.edge_array`` and ``Tree.edge``. Returns the edge id of the edge encoding\n  the relationship of each node with its parent.\n  (:user:`GertjanBisschop`, :issue:`2361`, :pr:`2357`)\n\n- Add ``position`` argument to ``TreeSequence.site``. Returns a ``Site`` object if there is\n  one at the specified position. If not, it raises ``ValueError``.\n  (:user:`szhan`, :issue:`2234`, :pr:`2235`)\n\n**Breaking Changes**\n\n- The JSON metadata codec now interprets the empty string as an empty object. This means\n  that applying a schema to an existing table will no longer necessitate modifying the\n  existing rows. (:user:`benjeffery`, :issue:`2064`, :pr:`2104`)\n\n- Remove the previously deprecated ``as_bytes`` argument to ``TreeSequence.variants``.\n  If you need genotypes in byte form this can be done following the code in the\n  ``to_macs`` method on line ``5573`` of ``trees.py``.\n  This argument was initially deprecated more than 3 years ago when the code was part of\n  ``msprime``.\n  (:user:`benjeffery`, :issue:`605`, :pr:`2172`)\n\n- Arguments after ``ploidy`` in ``write_vcf`` marked as keyword only\n  (:user:`jeromekelleher`, :pr:`2329`, :issue:`2315`).\n\n- When metadata equal to ``b''`` is printed to text or HTML tables it will render as\n  an empty string rather than ``\"b''\"``. (:user:`hyanwong`, :issue:`2349`, :pr:`2351`)\n\n----------------------\n[0.4.1] - 2022-01-11\n----------------------\n\n**Changes**\n\n- ``TableCollection.name_map`` has been deprecated in favour of ``table_name_map``.\n  (:user:`benjeffery`, :issue:`1981`, :pr:`2086`)\n\n\n**Fixes**\n\n- ``TreeSequence.dump_text`` now prints decoded metadata if there is a schema.\n  (:user:`benjeffery`, :issue:`1860`, :issue:`1527`)\n\n- Add missing ``ReferenceSequence.__eq__`` method.\n  (:user:`benjeffery`, :issue:`2063`, :pr:`2085`)\n\n\n----------------------\n[0.4.0] - 2021-12-10\n----------------------\n\n**Breaking changes**\n\n- The ``Tree.num_nodes`` method is now deprecated with a warning, because it confusingly\n  returns the number of nodes in the entire tree sequence, rather than in the tree. Text\n  summaries of trees (e.g. ``str(tree)``) now return the number of nodes in the tree,\n  not in the entire tree sequence (:user:`hyanwong`, :issue:`1966` :pr:`1968`)\n\n- The CLI ``info`` command now gives more detailed information on the tree sequence\n  (:user:`benjeffery`, :pr:`1611`)\n\n- 64 bits are now used to store the sizes of ragged table columns such as metadata,\n  allowing them to hold more data. This change is fully backwards and forwards compatible\n  for all tree-sequences whose ragged column sizes fit into 32 bits. New tree-sequences with\n  large offset arrays that require 64 bits will fail to load in previous versions with\n  error ``_tskit.FileFormatError: An incompatible type for a column was found in the\n  file``.\n  (:user:`jeromekelleher`, :issue:`343`, :issue:`1527`, :issue:`1528`, :issue:`1530`,\n  :issue:`1554`, :issue:`1573`, :issue:`1589`,:issue:`1598`,:issue:`1628`, :pr:`1571`,\n  :pr:`1579`, :pr:`1585`, :pr:`1590`, :pr:`1602`, :pr:`1618`, :pr:`1620`, :pr:`1652`).\n\n- The Tree class now conceptually has an extra node, the \"virtual root\" whose\n  children are the roots of the tree. The quintuply linked tree arrays\n  (parent_array, left_child_array, right_child_array, left_sib_array and right_sib_array)\n  all have one extra element.\n  (:user:`jeromekelleher`, :issue:`1691`, :pr:`1704`).\n\n- Tree traversal orders returned by the ``nodes`` method have changed when there\n  are multiple roots. Previously orders were defined locally for each root, but\n  are now globally across all roots. (:user:`jeromekelleher`, :pr:`1704`).\n\n- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.\n  ``TableCollection.sort`` no longer sorts individuals.\n  (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)\n\n- Metadata encoding errors now raise ``MetadataEncodingError``\n  (:user:`benjeffery`, :issue:`1505`, :pr:`1827`).\n\n- For ``TreeSequence.samples`` all arguments after ``population`` are now keyword only\n  (:user:`benjeffery`, :issue:`1715`, :pr:`1831`).\n\n- Remove the method ``TreeSequence.to_nexus`` and replace with ``TreeSequence.as_nexus``.\n  As the old method was not generating standards-compliant output, it seems unlikely\n  that it was used by anyone. Calls to ``to_nexus`` will result in a\n  NotImplementedError, informing users of the change. See below for details on\n  ``as_nexus``.\n\n- Change default value for ``missing_data_char`` in the ``TreeSequence.haplotypes``\n  method from \"-\" to \"N\". This is a more idiomatic usage to indicate\n  missing data rather than a gap in an alignment. (:user:`jeromekelleher`,\n  :issue:`1893`, :pr:`1894`)\n\n**Features**\n\n- Add the ``ibd_segments`` method and associated classes to compute, summarise\n  and store segments of identity by descent from a tree sequence\n  (:user:`gtsambos`, :user:`jeromekelleher`).\n\n- Allow skipping of site and mutation tables in ``TableCollection.sort``\n  (:user:`benjeffery`, :issue:`1475`, :pr:`1826`).\n\n- Add ``TableCollection.sort_individuals`` to sort the individuals as this is no longer done by the\n  default sort (:user:`benjeffery`, :issue:`1774`, :pr:`1789`).\n\n- Add ``__setitem__`` to all tables allowing single rows to be updated. For example\n  ``tables.nodes[0] = tables.nodes[0].replace(flags=tskit.NODE_IS_SAMPLE)``\n  (:user:`jeromekelleher`, :user:`benjeffery`, :issue:`1545`, :pr:`1600`).\n\n- Added a new parameter ``time`` to ``TreeSequence.samples()`` allowing to select\n  samples at a specific time point or time interval.\n  (:user:`mufernando`, :user:`petrelharp`, :issue:`1692`, :pr:`1700`)\n\n- Add ``table.metadata_vector`` to all table classes to allow easy extraction of a single\n  metadata key into an array\n  (:user:`petrelharp`, :issue:`1676`, :pr:`1690`).\n\n- Add ``time_units`` to ``TreeSequence`` to describe the units of the time dimension of the\n  tree sequence. This is then used to generate an error if ``time_units`` is ``uncalibrated`` when\n  using the branch lengths in statistics. (:user:`benjeffery`, :issue:`1644`, :pr:`1760`, :pr:`1832`)\n\n- Add the ``virtual_root`` property to the Tree class (:user:`jeromekelleher`, :pr:`1704`).\n\n- Add the ``num_edges`` property to the Tree class (:user:`jeromekelleher`, :pr:`1704`).\n\n- Improved performance for tree traversal methods in the ``nodes`` iterator.\n  Roughly a 10X performance increase for \"preorder\", \"postorder\", \"timeasc\"\n  and \"timedesc\" (:user:`jeromekelleher`, :pr:`1704`).\n\n- Substantial performance improvement for ``Tree.total_branch_length``\n  (:user:`jeromekelleher`, :issue:`1794` :pr:`1799`)\n\n- Add the ``discrete_genome`` property to the TreeSequence class which is true if\n  all coordinates are discrete (:user:`jeromekelleher`, :issue:`1144`, :pr:`1819`)\n\n- Add a ``random_nucleotides`` function. (user:`jeromekelleher`, :pr:`1825`)\n\n- Add the ``TreeSequence.alignments`` method. (user:`jeromekelleher`, :pr:`1825`)\n\n- Add alignment export in the FASTA and nexus formats using the\n  ``TreeSequence.write_nexus`` and ``TreeSequence.write_fasta`` methods.\n  (:user:`jeromekelleher`, :user:`hyanwong`, :pr:`1894`)\n\n- Add the ``discrete_time`` property to the TreeSequence class which is true if\n  all time coordinates are discrete or unknown (:user:`benjeffery`, :issue:`1839`, :pr:`1890`)\n\n- Add the ``skip_tables`` option to ``load`` to support only loading\n  top-level information from a file. Also add the ``ignore_tables`` option to\n  ``TableCollection.equals`` and ``TableCollection.assert_equals`` to\n  compare only top-level information. (:user:`clwgg`, :pr:`1882`, :issue:`1854`).\n\n- Add the ``skip_reference_sequence`` option to ``load``. Also add the\n  ``ignore_reference_sequence`` option ``equals`` to compare two table\n  collections without comparing their reference sequence. (:user:`clwgg`,\n  :pr:`2019`, :issue:`1971`).\n\n- tskit now supports python 3.10 (:user:`benjeffery`, :issue:`1895`, :pr:`1949`)\n\n\n**Fixes**\n\n- `dump_tables` omitted individual parents. (:user:`benjeffery`, :issue:`1828`, :pr:`1884`)\n\n- Add the ``Tree.as_newick`` method and deprecate ``Tree.newick``. The\n  ``as_newick`` method by default labels samples with the pattern ``\"n{node_id}\"``\n  which is much more useful that the behaviour of ``Tree.newick`` (which mimics\n  ``ms`` output). (:user:`jeromekelleher`, :issue:`1671`, :pr:`1838`.)\n\n- Add the ``as_nexus`` and ``write_nexus`` methods to the TreeSequence class,\n  replacing the broken ``to_nexus`` method (see above). This uses the same\n  sample labelling pattern as ``as_newick``.\n  (:user:`jeetsukumaran`, :user:`jeromekelleher`, :issue:`1785`, :pr:`1835`,\n  :pr:`1836`, :pr:`1838`)\n\n- `load_text` created additional populations even if the population table was specified,\n  and didn't strip newlines from input text (:user:`hyanwong`, :issue:`1909`, :pr:`1910`)\n\n\n--------------------\n[0.3.7] - 2021-07-08\n--------------------\n\n**Features**\n\n- ``map_mutations`` now allows the ancestral state to be specified\n  (:user:`hyanwong`, :user:`jeromekelleher`, :issue:`1542`, :pr:`1550`)\n\n--------------------\n[0.3.6] - 2021-05-14\n--------------------\n\n**Breaking changes**\n\n- ``Mutation.position`` and ``Mutation.index`` which were deprecated in 0.2.2 (Sep '19) have\n  been removed.\n\n**Features**\n\n- Add direct, copy-free access to the arrays representing the quintuply-linked structure\n  of ``Tree`` (e.g. ``left_child_array``). Allows performant algorithms over the tree\n  structure using, for example, numba\n  (:user:`jeromekelleher`, :issue:`1299`, :pr:`1320`).\n\n- Add fancy indexing to tables. E.g. ``table[6:86]`` returns a new table with the\n  specified rows. Supports slices, index arrays and boolean masks\n  (:user:`benjeffery`, :issue:`1221`, :pr:`1348`, :pr:`1342`).\n\n- Add ``Table.append`` method for adding rows from classes such as ``SiteTableRow`` and\n  ``Site`` (:user:`benjeffery`, :issue:`1111`, :pr:`1254`).\n\n- SVG visualization of a tree sequence can be restricted to displaying between left\n  and right genomic coordinates using the ``x_lim`` parameter. The default settings\n  now mean that if the left or right flanks of a tree sequence are entirely empty,\n  these regions will not be plotted in the SVG (:user:`hyanwong`, :pr:`1288`).\n\n- SVG visualization of a single tree allows all mutations on an edge to be plotted\n  via the ``all_edge_mutations`` param (:user:`hyanwong`,:issue:`1253`, :pr:`1258`).\n\n- Entity classes such as ``Mutation``, ``Node`` are now python dataclasses\n  (:user:`benjeffery`, :pr:`1261`).\n\n- Metadata decoding for table row access is now lazy (:user:`benjeffery`, :pr:`1261`).\n\n- Add html notebook representation for ``Tree`` and change ``Tree.__str__`` from dict\n  representation to info table. (:user:`benjeffery`, :issue:`1269`, :pr:`1304`).\n\n- Improve display of tables when ``print``ed, limiting lines set via\n  ``tskit.set_print_options`` (:user:`benjeffery`,:issue:`1270`, :pr:`1300`).\n\n- Add ``Table.assert_equals`` and ``TableCollection.assert_equals`` which give an exact\n  report of any differences. (:user:`benjeffery`,:issue:`1076`, :pr:`1328`)\n\n**Changes**\n\n- In drawing methods ``max_tree_height`` and ``tree_height_scale`` have been deprecated\n  in favour of ``max_time`` and ``time_scale``\n  (:user:`benjeffery`,:issue:`1262`, :pr:`1331`).\n\n**Fixes**\n\n- Tree sequences were not properly init'd after unpickling\n  (:user:`benjeffery`, :issue:`1297`, :pr:`1298`)\n\n--------------------\n[0.3.5] - 2021-03-16\n--------------------\n\n**Features**\n\n- SVG visualization plots mutations at the correct time, if it exists, and a y-axis,\n  with label can be drawn. Both x- and y-axes can be plotted on trees as well as\n  tree sequences (:user:`hyanwong`,:issue:`840`, :issue:`580`, :pr:`1236`)\n\n- SVG visualization now uses squares for sample nodes and red crosses for mutations,\n  with the site/mutation positions marked on the x-axis. Additionally, an x-axis\n  label can be set (:user:`hyanwong`,:issue:`1155`, :issue:`1194`, :pr:`1182`, :pr:`1213`)\n\n- Add ``parents`` column to the individual table to allow recording of pedigrees\n  (:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`, :pr:`1192` :pr:`1199`).\n\n- Added ``Tree.generate_random_binary`` static method to create random\n  binary trees (:user:`hyanwong`, :user:`jeromekelleher`, :pr:`1037`).\n\n- Change the default behaviour of Tree.split_polytomies to generate\n  the shortest possible branch lengths instead of a fixed epsilon of\n  1e-10. (:user:`jeromekelleher`, :issue:`1089`, :pr:`1090`)\n\n- Default value metadata in ``add_row`` functions is now schema-dependant, so that\n  ``metadata={}`` is no longer needed as an argument when a schema is present\n  (:user:`benjeffery`, :issue:`1084`).\n\n- ``default`` in metadata schemas is used to fill in missing values when encoding for\n  the struct codec. (:user:`benjeffery`, :issue:`1073`, :pr:`1116`).\n\n- Added ``canonical`` option to table collection sorting (:user:`mufernando`,\n  :user:`petrelharp`, :issue:`705`)\n\n- Added various arguments to ``TreeSequence.subset``, to allow for stable\n  population indexing and lossless node reordering with subset.\n  (:user:`petrelharp`, :pr:`1097`)\n\n**Changes**\n\n- Allow mutations that have the same derived state as their parent mutation.\n  (:user:`benjeffery`, :issue:`1180`, :pr:`1233`)\n\n- File minor version change to support individual parents\n\n**Breaking changes**\n\n- tskit now requires Python 3.7 (:user:`benjeffery`, :pr:`1235`)\n\n--------------------\n[0.3.4] - 2020-12-02\n--------------------\n\nMinor bugfix release.\n\n\n**Bugfixes**\n\n- Reinstate the unused zlib_compression option to tskit.dump, as msprime < 1.0\n  still uses it (:user:`jeromekelleher`, :issue:`1067`).\n\n--------------------\n[0.3.3] - 2020-11-27\n--------------------\n\n**Features**\n\n- Add ``TreeSequence.genetic_relatedness`` for calculating genetic relatedness between\n  pairs of sets of nodes (:user:`brieuclehmann`, :issue:`1021`, :pr:`1023`, :issue:`974`,\n  :issue:`973`, :pr:`898`).\n\n- Expose ``TreeSequence.coiterate()`` method to allow iteration over 2 sequences\n  simultaneously, aiding comparison of trees from two sequences\n  (:user:`jeromekelleher`, :user:`hyanwong`, :issue:`1021`, :pr:`1022`).\n\n- tskit is now supported on, and has wheels for, python3.9\n  (:user:`benjeffery`, :issue:`982`, :pr:`907`).\n\n- ``Tree.newick()`` now has extra option ``include_branch_lengths`` to allow branch\n  lengths to be omitted (:user:`hyanwong`, :pr:`931`).\n\n- Added ``Tree.generate_star`` static method to create star-topologies (:user:`hyanwong`,\n  :pr:`934`).\n\n- Added ``Tree.generate_comb`` and ``Tree.generate_balanced`` methods to create\n  example trees. (:user:`jeromekelleher`, :pr:`1026`).\n\n- Added ``equals`` method to TreeSequence, TableCollection and each of the tables which\n  provides more flexible equality comparisons, for example, allowing\n  users to ignore metadata or provenance in the comparison\n  (:user:`mufernando`, :user:`jeromekelleher`, :issue:`896`, :pr:`897`,\n  :issue:`913`, :pr:`917`).\n\n- Added ``__eq__`` to TreeSequence\n  (:user:`benjeffery`, :issue:`1011`, :pr:`1020`).\n\n- ``ts.dump`` and ``tskit.load`` now support reading and writing file objects such as\n  FIFOs and sockets (:user:`benjeffery`, :issue:`657`, :pr:`909`).\n\n- Added ``tskit.write_ms`` for writing to MS format\n  (:user:`saurabhbelsare`, :issue:`727`, :pr:`854`).\n\n- Added ``TableCollection.indexes`` for access to the edge insertion/removal order indexes\n  (:user:`benjeffery`, :issue:`4`, :pr:`916`).\n\n- The dictionary representation of a TableCollection now contains its index\n  (:user:`benjeffery`, :issue:`870`, :pr:`921`).\n\n- Added ``TreeSequence._repr_html_`` for use in jupyter notebooks\n  (:user:`benjeffery`, :issue:`872`, :pr:`923`).\n\n- Added ``TreeSequence.__str__`` to display a summary for terminal usage\n  (:user:`benjeffery`, :issue:`938`, :pr:`985`).\n\n- Added ``TableCollection.dump`` and ``TableCollection.load``. This allows table\n  collections that are not valid tree sequences to be manipulated\n  (:user:`benjeffery`, :issue:`14`, :pr:`986`).\n\n- Added ``nbytes`` method to tables, ``TableCollection`` and ``TreeSequence`` which\n  reports the size in bytes of those objects\n  (:user:`jeromekelleher`, :user:`benjeffery`, :issue:`54`, :pr:`871`).\n\n- Added ``TableCollection.clear`` to clear data table rows and optionally\n  provenances, table schemas and tree-sequence level metadata and schema\n  (:user:`benjeffery`, :issue:`929`, :pr:`1001`).\n\n**Bugfixes**\n\n- ``LightWeightTableCollection.asdict`` and ``TableCollection.asdict`` now return copies\n  of arrays (:user:`benjeffery`, :issue:`1025`, :pr:`1029`).\n\n- The ``map_mutations`` method previously used the Fitch parsimony method, but this\n  does not produce parsimonious results on non-binary trees. We now now use the\n  Hartigan parsimony algorithm, which does (:user:`jeromekelleher`,\n  :issue:`987`, :pr:`1030`).\n\n- The ``flag`` argument to tables' ``add_row`` was treating the value as signed\n  (:user:`benjeffery`, :issue:`1027`, :pr:`1031`).\n\n**Breaking changes**\n\n- The argument to ``ts.dump`` and ``tskit.load`` has been renamed `file` from `path`.\n- All arguments to ``Tree.newick()`` except precision are now keyword-only.\n- Renamed ``ts.trait_regression`` to ``ts.trait_linear_model``.\n\n--------------------\n[0.3.2] - 2020-09-29\n--------------------\n\n**Breaking changes**\n\n- The argument order of ``Tree.unrank`` and ``combinatorics.num_labellings`` now\n  positions the number of leaves before the tree rank\n  (:user:`daniel-goldstein`, :issue:`950`, :pr:`978`)\n\n- Change several methods (``simplify()``, ``trees()``, ``Tree()``) so most parameters\n  are keyword only, not positional. This allows reordering of parameters, so\n  that deprecated parameters can be moved, and the parameter order in similar functions,\n  e.g. ``TableCollection.simplify`` and ``TreeSequence.simplify()`` can be made\n  consistent (:user:`hyanwong`, :issue:`374`, :issue:`846`, :pr:`851`)\n\n\n**Features**\n\n- Add ``split_polytomies`` method to the Tree class\n  (:user:`hyanwong`, :user:`jeromekelleher`, :issue:`809`, :pr:`815`)\n\n- Tree accessor functions (e.g. ``ts.first()``, ``ts.at()`` pass extra parameters such as\n  ``sample_indexes`` to the underlying ``Tree`` constructor; also ``root_threshold`` can\n  be specified when calling ``ts.trees()`` (:user:`hyanwong`, :issue:`847`, :pr:`848`)\n\n- Genomic intervals returned by python functions are now namedtuples, allowing ``.left``\n  ``.right`` and ``.span`` usage (:user:`hyanwong`, :issue:`784`, :pr:`786`, :pr:`811`)\n\n- Added ``include_terminal`` parameter to edge diffs iterator, to output the last edges\n  at the end of a tree sequence (:user:`hyanwong`, :issue:`783`, :pr:`787`)\n\n- :issue:`832` - Add ``metadata_bytes`` method to allow access to raw\n  TableCollection metadata (:user:`benjeffery`, :pr:`842`)\n\n- New ``tree.is_isolated(u)`` method (:user:`hyanwong`, :pr:`443`).\n\n- ``tskit.is_unknown_time`` can now check arrays. (:user:`benjeffery`, :pr:`857`).\n\n--------------------\n[0.3.1] - 2020-09-04\n--------------------\n\n**Bugfixes**\n\n- :issue:`823` - Fix mutation time error when using\n  ``simplify(keep_input_roots=True)`` (:user:`petrelharp`, :pr:`823`).\n\n- :issue:`821` - Fix mutation rows with unknown time never being\n  equal (:user:`petrelharp`, :pr:`822`).\n\n--------------------\n[0.3.0] - 2020-08-27\n--------------------\n\nMajor feature release for metadata schemas, set-like operations, mutation times,\nSVG drawing improvements and many others.\n\n**Breaking changes**\n\n- The default display order for tree visualisations has been changed to ``minlex``\n  (see below) to stabilise the node ordering and to make trees more readily\n  comparable. The old behaviour is still available with ``order=\"tree\"``.\n\n- File system operations such as dump/load now raise an appropriate OSError\n  instead of ``tskit.FileFormatError``. Loading from an empty file now raises\n  and ``EOFError``.\n\n- Bad tree topologies are detected earlier, so that it is no longer possible\n  to create a ``TreeSequence`` object which contains a parent with contradictory\n  children on an interval. Previously an error was thrown when some operation\n  building the trees was attempted (:user:`jeromekelleher`, :pr:`709`).\n\n- The ``TableCollection object`` no longer implements the iterator protocol.\n  Previously ``list(tables)`` returned a sequence of (table_name, table_instance)\n  tuples. This has been replaced with the more intuitive and future-proof\n  ``TableCollection.name_map`` and ``TreeSequence.tables_dict`` attributes, which\n  perform the same function (:user:`jeromekelleher`, :issue:`500`,\n  :pr:`694`).\n\n- The arguments to ``TreeSequence.genotype_matrix``, ``TreeSequence.haplotypes``\n  and ``TreeSequence.variants`` must now be keyword arguments, not positional. This\n  is to support the change from ``impute_missing_data`` to ``isolated_as_missing``\n  in the arguments to these methods. (:user:`benjeffery`, :issue:`716`, :pr:`794`)\n\n**New features**\n\n- New methods to perform set operations on TableCollections and TreeSequences.\n  ``TableCollection.subset`` subsets and reorders table collections by nodes\n  (:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).\n  ``TableCollection.union`` forms the node-wise union of two table collections\n  (:user:`mufernando`, :user:`petrelharp`, :issue:`381` :pr:`623`).\n\n- Mutations now have an optional double-precision floating-point ``time`` column.\n  If not specified, this defaults to a particular ``NaN`` value (``tskit.UNKNOWN_TIME``)\n  indicating that the time is unknown. For a tree sequence to be considered valid\n  it must meet new criteria for mutation times, see :ref:`sec_mutation_requirements`.\n  Also added function ``TableCollection.compute_mutation_times``. Table sorting orders\n  mutations by non-increasing time per-site, which is also a requirement for a valid tree\n  sequence (:user:`benjeffery`, :pr:`672`).\n\n- Add support for trees with internal samples for the Kendall-Colijn tree distance\n  metric. (:user:`daniel-goldstein`, :pr:`610`)\n\n- Add background shading to SVG tree sequences to reflect tree position along the\n  sequence (:user:`hyanwong`, :pr:`563`).\n\n- Tables with a metadata column now have a ``metadata_schema`` that is used to\n  validate and encode metadata that is passed to ``add_row`` and decode metadata\n  on calls to ``table[j]`` and e.g. ``tree_sequence.node(j)`` See :ref:`sec_metadata`\n  (:user:`benjeffery`, :pr:`491`, :pr:`542`, :pr:`543`, :pr:`601`).\n\n- The tree-sequence now has top-level metadata with a schema\n  (:user:`benjeffery`, :pr:`666`, :pr:`644`, :pr:`642`).\n\n- Add classes to SVG drawings to allow easy adjustment and styling, and document the new\n  ``tskit.Tree.draw_svg()`` and ``tskit.TreeSequence.draw_svg()`` methods. This also fixes\n  :issue:`467` for duplicate SVG entity ``id`` s in Jupyter notebooks\n  (:user:`hyanwong`, :pr:`555`).\n\n- Add a ``to_nexus`` function that outputs a tree sequence in Nexus format\n  (:user:`saunack`, :pr:`550`).\n\n- Add extension of Kendall-Colijn tree distance metric for tree sequences\n  computed by ``TreeSequence.kc_distance``\n  (:user:`daniel-goldstein`, :pr:`548`).\n\n- Add an optional node traversal order in ``tskit.Tree`` that uses the minimum\n  lexicographic order of leaf nodes visited. This ordering (``\"minlex_postorder\"``)\n  adds more determinism because it constraints the order in which children of\n  a node are visited (:user:`brianzhang01`, :pr:`411`).\n\n- Add an ``order`` argument to the tree visualisation functions which supports\n  two node orderings: ``\"tree\"`` (the previous default) and ``\"minlex\"``\n  which stabilises the node ordering (making it easier to compare trees).\n  The default node ordering is changed to ``\"minlex\"``\n  (:user:`brianzhang01`, :user:`jeromekelleher`, :issue:`389`, :pr:`566`).\n\n- Add ``_repr_html_`` to tables, so that jupyter notebooks render them as\n  html tables (:user:`benjeffery`, :pr:`514`).\n\n- Remove support for ``kc_distance`` on trees with unary nodes\n  (:user:`daniel-goldstein`, :pr:`508`).\n\n- Improve Kendall-Colijn tree distance algorithm to operate in O(n^2) time\n  instead of O(n^2 * log(n)) where n is the number of samples\n  (:user:`daniel-goldstein`, :pr:`490`).\n\n- Add a metadata column to the migrations table. Works similarly to existing\n  metadata columns on other tables (:user:`benjeffery`, :pr:`505`).\n\n- Add a metadata column to the edges table. Works similarly to existing\n  metadata columns on other tables (:user:`benjeffery`, :pr:`496`).\n\n- Allow sites with missing data to be output by the ``haplotypes`` method, by\n  default replacing with ``-``. Errors are no longer raised for missing data\n  with ``isolated_as_missing=True``; the error types returned for bad alleles\n  (e.g. multiletter or non-ascii) have also changed from ``_tskit.LibraryError``\n  to TypeError, or ValueError if the missing data character clashes\n  (:user:`hyanwong`, :pr:`426`).\n\n- Access the number of children of a node in a tree directly using\n  ``tree.num_children(u)`` (:user:`hyanwong`, :pr:`436`).\n\n- User specified allele mapping for genotypes in ``variants`` and\n  ``genotype_matrix`` (:user:`jeromekelleher`, :pr:`430`).\n\n- New ``root_threshold`` option for the Tree class, which allows\n  us to efficiently iterate over 'real' roots when we have\n  missing data (:user:`jeromekelleher`, :pr:`462`).\n\n- Add pickle support for ``TreeSequence`` (:user:`terhorst`, :pr:`473`).\n\n- Add ``tree.as_dict_of_dicts()`` function to enable use with networkx. See\n  :ref:`sec_tutorial_networkx` (:user:`winni2k`, :pr:`457`).\n\n- Add ``tree_sequence.to_macs()`` function to convert tree sequence to MACS\n  format (:user:`winni2k`, :pr:`727`)\n\n- Add a ``keep_input_roots`` option to simplify which, if enabled, adds edges\n  from the MRCAs of samples in the simplified tree sequence back to the roots\n  in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).\n\n**Bugfixes**\n\n- :issue:`453` - Fix LibraryError when ``tree.newick()`` is called with large node time\n  values (:user:`jeromekelleher`, :pr:`637`).\n\n- :issue:`777` - Mutations over isolated samples were incorrectly decoded as\n  missing data. (:user:`jeromekelleher`, :pr:`778`)\n\n- :issue:`776` - Fix a segfault when a partial list of samples\n  was provided to the ``variants`` iterator. (:user:`jeromekelleher`, :pr:`778`)\n\n**Deprecated**\n\n- The ``sample_counts`` feature has been deprecated and is now\n  ignored. Sample counts are now always computed.\n\n- For ``TreeSequence.genotype_matrix``, ``TreeSequence.haplotypes``\n  and ``TreeSequence.variants`` the ``impute_missing_data`` argument is deprecated\n  and replaced with ``isolated_as_missing``. Note that to get the same behaviour\n  ``impute_missing_data=True`` should be replaced with ``isolated_as_missing=False``.\n  (:user:`benjeffery`, :issue:`716`, :pr:`794`)\n\n--------------------\n[0.2.3] - 2019-11-22\n--------------------\n\nMinor feature release, providing a tree distance metric and various\nmethod to manipulate tree sequence data.\n\n**New features**\n\n- Kendall-Colijn tree distance metric computed by ``Tree.kc_distance``\n  (:user:`awohns`, :pr:`172`).\n- New \"timeasc\" and \"timedesc\" orders for tree traversals\n  (:user:`benjeffery`, :issue:`246`, :pr:`399`).\n- Up to 2X performance improvements to tree traversals (:user:`benjeffery`,\n  :pr:`400`).\n- Add ``trim``, ``delete_sites``, ``keep_intervals`` and ``delete_intervals``\n  methods to edit tree sequence data. (:user:`hyanwong`, :pr:`364`,\n  :pr:`372`, :pr:`377`, :pr:`390`).\n- Initial online documentation for CLI (:user:`hyanwong`, :pr:`414`).\n- Various documentation improvements (:user:`hyanwong`, :user:`jeromekelleher`,\n  :user:`petrelharp`).\n- Rename the ``map_ancestors`` function to ``link_ancestors``\n  (:user:`hyanwong`, :user:`gtsambos`; :pr:`406`,\n  :issue:`262`). The original function is retained as an deprecated alias.\n\n**Bugfixes**\n\n- Fix height scaling issues with SVG tree drawing (:user:`jeromekelleher`,\n  :pr:`407`, :issue:`383`, :pr:`378`).\n- Do not reuse buffers in ``LdCalculator`` (:user:`jeromekelleher`). See :pr:`397` and\n  :issue:`396`.\n\n--------------------\n[0.2.2] - 2019-09-01\n--------------------\n\nMinor bugfix release.\n\nRelaxes overly-strict input requirements on individual location data that\ncaused some SLiM tree sequences to fail loading in version 0.2.1\n(see :issue:`351`).\n\n**New features**\n\n- Add log_time height scaling option for drawing SVG trees\n  (:user:`marianne-aspbury`). See :pr:`324` and :issue:`303`.\n\n**Bugfixes**\n\n- Allow 4G metadata columns (:user:`jeromekelleher`). See :pr:`342` and\n  :issue:`341`.\n\n\n--------------------\n[0.2.1] - 2019-08-23\n--------------------\n\nMajor feature release, adding support for population genetic statistics,\nimproved VCF output and many other features.\n\n**Note:** Version 0.2.0 was skipped because of an error uploading to PyPI\nwhich could not be undone.\n\n**Breaking changes**\n\n- Genotype arrays returned by ``TreeSequence.variants`` and\n  ``TreeSequence.genotype_matrix`` have changed from unsigned 8 bit values\n  to signed 8 bit values to accomodate missing data (see :issue:`144` for\n  discussion). Specifically, the dtype of the genotypes arrays have changed\n  from numpy \"u8\" to \"i8\". This should not affect client code in any way\n  unless it specifically depends on the type of the returned numpy array.\n\n- The VCF written by the ``write_vcf`` is no longer compatible with previous\n  versions, which had significant shortcomings. Position values are now rounded\n  to the nearest integer by default, REF and ALT values are derived from the\n  actual allelic states (rather than always being A and T). Sample names\n  are now of the form ``tsk_j`` for sample ID j. Most of the legacy behaviour\n  can be recovered with new options, however.\n\n- The positional parameter ``reference_sets`` in ``genealogical_nearest_neighbours``\n  and ``mean_descendants`` TreeSequence methods has been renamed to\n  ``sample_sets``.\n\n**New features**\n\n- Support for general windowed statistics. Implementations of diversity,\n  divergence, segregating sites, Tajima's D, Fst, Patterson's F statistics,\n  Y statistics, trait correlations and covariance, and k-dimensional allele\n  frequency specra (:user:`petrelharp`, :user:`jeromekelleher`, :user:`molpopgen`).\n\n- Add the ``keep_unary`` option to simplify (:user:`gtsambos`). See :issue:`1`\n  and :pr:`143`.\n\n- Add the ``map_ancestors`` method to TableCollection (user:`gtsambos`). See :pr:`175`.\n\n- Add the ``squash`` method to EdgeTable (:user:`gtsambos`). See :issue:`59` and\n  :pr:`285`.\n\n- Add support for individuals to VCF output, and fix major issues with output\n  format (:user:`jeromekelleher`). Position values are transformed in a much\n  more straightforward manner and output has been generalised substantially.\n  Adds ``individual_names`` and ``position_transform`` arguments.\n  See :pr:`286`, and issues :issue:`2`, :issue:`30` and :issue:`73`.\n\n- Control height scale in SVG trees using 'tree_height_scale' and 'max_tree_height'\n  (:user:`hyanwong`, :user:`jeromekelleher`). See :issue:`167`, :pr:`168`.\n  Various other improvements to tree drawing (:pr:`235`, :pr:`241`, :pr:`242`,\n  :pr:`252`, :pr:`259`).\n\n- Add ``Tree.max_root_time`` property (:user:`hyanwong`, :user:`jeromekelleher`).\n  See :pr:`170`.\n\n- Improved input checking on various methods taking numpy arrays as parameters\n  (:user:`hyanwong`). See :issue:`8` and :pr:`185`.\n\n- Define the branch length over roots in trees to be zero (previously raise\n  an error; :user:`jeromekelleher`). See :issue:`188` and :pr:`191`.\n\n- Implementation of the genealogical nearest neighbours statistic\n  (:user:`hyanwong`, :user:`jeromekelleher`).\n\n- New ``delete_intervals`` and ``keep_intervals`` method for the TableCollection\n  to allow slicing out of topology from specific intervals (:user:`hyanwong`,\n  :user:`andrewkern`, :user:`petrelharp`, :user:`jeromekelleher`). See\n  :pr:`225` and :pr:`261`.\n\n- Support for missing data via a topological definition (:user:`jeromekelleher`).\n  See :issue:`270` and :pr:`272`.\n\n- Add ability to set columns directly in the Tables API (:user:`jeromekelleher`).\n  See :issue:`12` and :pr:`307`.\n\n- Various documentation improvements from :user:`brianzhang01`, :user:`hyanwong`,\n  :user:`petrelharp` and :user:`jeromekelleher`.\n\n**Deprecated**\n\n- Deprecate ``Tree.length`` in favour of ``Tree.span`` (:user:`hyanwong`).\n  See :pr:`169`.\n\n- Deprecate ``TreeSequence.pairwise_diversity`` in favour of the new\n  ``diversity`` method. See :issue:`215`, :pr:`312`.\n\n**Bugfixes**\n\n- Catch NaN and infinity values within tables (:user:`hyanwong`).\n  See :issue:`293` and :pr:`294`.\n\n--------------------\n[0.1.5] - 2019-03-27\n--------------------\n\nThis release removes support for Python 2, adds more flexible tree access and a\nnew ``tskit`` command line interface.\n\n**New features**\n\n- Remove support for Python 2 (:user:`hugovk`). See :issue:`137` and :pr:`140`.\n- More flexible tree API (:pr:`121`). Adds ``TreeSequence.at`` and\n  ``TreeSequence.at_index`` methods to find specific trees, and efficient support\n  for backwards traversal using ``reversed(ts.trees())``.\n- Add initial ``tskit`` CLI (:issue:`80`)\n- Add ``tskit info`` CLI command (:issue:`66`)\n- Enable drawing SVG trees with coloured edges (:user:`hyanwong`; :issue:`149`).\n- Add ``Tree.is_descendant`` method (:issue:`120`)\n- Add ``Tree.copy`` method (:issue:`122`)\n\n**Bugfixes**\n\n- Fixes to the low-level C API (:issue:`132` and :issue:`157`)\n\n\n--------------------\n[0.1.4] - 2019-02-01\n--------------------\n\n\nMinor feature update. Using the C API 0.99.1.\n\n**New features**\n\n- Add interface for setting TableCollection.sequence_length:\n  https://github.com/tskit-dev/tskit/issues/107\n- Add support for building and dropping TableCollection indexes:\n  https://github.com/tskit-dev/tskit/issues/108\n\n\n--------------------\n[0.1.3] - 2019-01-14\n--------------------\n\nBugfix release.\n\n**Bugfixes**\n\n- Fix missing provenance schema: https://github.com/tskit-dev/tskit/issues/81\n\n--------------------\n[0.1.2] - 2019-01-14\n--------------------\n\nBugfix release.\n\n**Bugfixes**\n\n- Fix memory leak in table collection. https://github.com/tskit-dev/tskit/issues/76\n\n--------------------\n[0.1.1] - 2019-01-11\n--------------------\n\nFixes broken distribution tarball for 0.1.0.\n\n--------------------\n[0.1.0] - 2019-01-11\n--------------------\n\nInitial release after separation from msprime 0.6.2. Code that reads tree sequence\nfiles and processes them should be able to work without changes.\n\n**Breaking changes**\n\n- Removal of the previously deprecated ``sort_tables``, ``simplify_tables``\n  and ``load_tables`` functions. All code should change to using corresponding\n  TableCollection methods.\n\n- Rename ``SparseTree`` class to ``Tree``.\n\n----------------------\n[1.1.0a1] - 2019-01-10\n----------------------\n\nInitial alpha version posted to PyPI for bootstrapping.\n\n--------------------\n[0.0.0] - 2019-01-10\n--------------------\n\nInitial extraction of tskit code from msprime. Relicense to MIT.\n\nCode copied at hash 29921408661d5fe0b1a82b1ca302a8b87510fd23\n"
  },
  {
    "path": "python/MANIFEST.in",
    "content": "include LICENSE\ninclude lwt_interface/tskit_lwt_interface.h\ninclude lib/subprojects/kastore/kastore.h\ninclude lib/tskit.h\ninclude lib/tskit/*.h\ninclude tskit/_version.py\ninclude tskit/provenance.schema.json\n"
  },
  {
    "path": "python/Makefile",
    "content": "all: ext3\n\nallchecks: _tskitmodule.c\n\tCFLAGS=\"-std=c99 --coverage -Wall -Wextra -Werror -Wno-unused-parameter -Wno-cast-function-type\" \\\n\tuv run python setup.py build_ext --inplace\n\next3: _tskitmodule.c\n\tuv run python setup.py build_ext --inplace\n\nctags:\n\tctags lib/*.c lib/*.h tskit/*.py\n\nclean:\n\trm -f *.so *.o tags\n\trm -fR build\n"
  },
  {
    "path": "python/README.rst",
    "content": "\nThe tree sequence toolkit.\n\nTskit is a cross-platform library for the storage and analysis of large-scale\ngenetic genealogy and variation data.\nPlease see the `documentation <https://tskit.dev/tskit/docs/stable/>`_\nfor further details.\n\nTskit is highly portable, and provides a number of\n`installation options <https://tskit.dev/tskit/docs/stable/installation.html>`_.\n"
  },
  {
    "path": "python/_tskitmodule.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2025 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#define TSK_BUG_ASSERT_MESSAGE                                                          \\\n    \"Please open an issue on\"                                                           \\\n    \" GitHub, ideally with a reproducible example.\"                                     \\\n    \" (https://github.com/tskit-dev/tskit/issues)\"\n\n#define PY_SSIZE_T_CLEAN\n#include <Python.h>\n#include <numpy/numpyconfig.h>\n\n#if defined(NPY_2_0_API_VERSION) && NPY_API_VERSION >= NPY_2_0_API_VERSION\n#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION\n#undef NPY_FEATURE_VERSION\n#define NPY_FEATURE_VERSION NPY_2_0_API_VERSION\n#define HAVE_NUMPY_2        1\n#else\n#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION\n#define HAVE_NUMPY_2          0\n#endif\n#include <numpy/arrayobject.h>\n\n#include <structmember.h>\n#include <float.h>\n\n#include \"kastore.h\"\n#include \"tskit.h\"\n\n#define SET_COLS    0\n#define APPEND_COLS 1\n\n/* TskitException is the superclass of all exceptions that can be thrown by\n * tskit. We define it here in the low-level library so that exceptions defined\n * here and in the high-level library can inherit from it.\n */\nstatic PyObject *TskitException;\nstatic PyObject *TskitLibraryError;\nstatic PyObject *TskitFileFormatError;\nstatic PyObject *TskitVersionTooOldError;\nstatic PyObject *TskitVersionTooNewError;\nstatic PyObject *TskitIdentityPairsNotStoredError;\nstatic PyObject *TskitIdentitySegmentsNotStoredError;\nstatic PyObject *TskitNoSampleListsError;\n\n#include \"tskit_lwt_interface.h\"\n\n// clang-format off\n\n/* The XTable classes each have 'lock' attribute, which is used to\n * raise an error if a Python thread attempts to access a table\n * while another Python thread is operating on it. Because tables\n * allocate memory dynamically, we cannot guarantee safety otherwise.\n * The locks are set before the GIL is released and unset afterwards.\n * Because C code executed here represents atomic Python operations\n * (while the GIL is held), this should be safe */\n\ntypedef struct _TableCollection {\n    PyObject_HEAD\n    tsk_table_collection_t *tables;\n} TableCollection;\n\n /* The table pointer in each of the Table classes either points to locally\n  * allocated memory or to the table stored in a tbl_collection_t. If we're\n  * using the memory in a tbl_collection_t, we keep a reference to the\n  * TableCollection object to ensure that the memory isn't free'd while a\n  * reference to the table itself is live. */\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_individual_table_t *table;\n    TableCollection *tables;\n} IndividualTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_node_table_t *table;\n    TableCollection *tables;\n} NodeTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_edge_table_t *table;\n    TableCollection *tables;\n} EdgeTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_site_table_t *table;\n    TableCollection *tables;\n} SiteTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_mutation_table_t *table;\n    TableCollection *tables;\n} MutationTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_migration_table_t *table;\n    TableCollection *tables;\n} MigrationTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_population_table_t *table;\n    TableCollection *tables;\n} PopulationTable;\n\ntypedef struct {\n    PyObject_HEAD\n    bool locked;\n    tsk_provenance_table_t *table;\n    TableCollection *tables;\n} ProvenanceTable;\n\ntypedef struct {\n    PyObject_HEAD\n    tsk_treeseq_t *tree_sequence;\n} TreeSequence;\n\ntypedef struct {\n    PyObject_HEAD\n    TreeSequence *tree_sequence;\n    tsk_tree_t *tree;\n} Tree;\n\ntypedef struct {\n    PyObject_HEAD\n    TreeSequence *tree_sequence;\n    tsk_variant_t *variant;\n} Variant;\n\ntypedef struct {\n    PyObject_HEAD\n    TreeSequence *tree_sequence;\n    tsk_ld_calc_t *ld_calc;\n} LdCalculator;\n\ntypedef struct {\n    PyObject_HEAD\n    TreeSequence *tree_sequence;\n    tsk_ls_hmm_t *ls_hmm;\n} LsHmm;\n\ntypedef struct {\n    PyObject_HEAD\n    TreeSequence *tree_sequence;\n    tsk_compressed_matrix_t *compressed_matrix;\n} CompressedMatrix;\n\ntypedef struct {\n    PyObject_HEAD\n    TreeSequence *tree_sequence;\n    tsk_viterbi_matrix_t *viterbi_matrix;\n} ViterbiMatrix;\n\ntypedef struct {\n    PyObject_HEAD\n    PyObject *owner;\n    bool read_only;\n    tsk_reference_sequence_t *reference_sequence;\n} ReferenceSequence;\n\ntypedef struct {\n    PyObject_HEAD\n    tsk_identity_segments_t *identity_segments;\n} IdentitySegments;\n\ntypedef struct {\n    PyObject_HEAD\n    /* Keep a reference to the parent object to ensure that the memory\n     * behind the segment list is always valid */\n    IdentitySegments *identity_segments;\n    tsk_identity_segment_list_t *segment_list;\n} IdentitySegmentList;\n\n/* A named tuple of metadata schemas for a tree sequence */\nstatic PyTypeObject MetadataSchemas;\n\nstatic PyStructSequence_Field metadata_schemas_fields[] = {\n    { \"node\", \"The node metadata schema\" },\n    { \"edge\", \"The edge metadata schema\" },\n    { \"site\", \"The site metadata schema\" },\n    { \"mutation\", \"The mutation metadata schema\" },\n    { \"migration\", \"The migration metadata schema\" },\n    { \"individual\", \"The individual metadata schema\" },\n    { \"population\", \"The population metadata schema\" },\n    { NULL }\n};\n\nstatic PyStructSequence_Desc metadata_schemas_desc = {\n    .name = \"MetadataSchemas\",\n    .doc = \"Namedtuple of metadata schemas for this tree sequence\",\n    .fields = metadata_schemas_fields,\n    .n_in_sequence = 7\n};\n\n// clang-format on\n\nstatic void\nhandle_library_error(int err)\n{\n    int kas_err;\n    const char *not_kas_format_msg\n        = \"File not in kastore format. Either the file is corrupt or it is not a \"\n          \"tskit tree sequence file. It may be a legacy HDF file upgradable with \"\n          \"`tskit upgrade` from tskit version<0.6.2 or a compressed tree sequence file \"\n          \"that can be decompressed \"\n          \"with `tszip`.\";\n    const char *ibd_pairs_not_stored_msg\n        = \"Sample pairs are not stored by default \"\n          \"in the IdentitySegments object returned by ibd_segments(), and you have \"\n          \"attempted to access functionality that requires them. Please use the \"\n          \"store_pairs=True option to identity_segments (but beware this will need more \"\n          \"time and memory).\";\n    const char *identity_segments_not_stored_msg\n        = \"The individual IBD segments are not \"\n          \"stored by default in the IdentitySegments object returned by ibd_segments(), \"\n          \"and you have attempted to access functionality that requires them. \"\n          \"Please use the store_segments=True option to ibd_segments \"\n          \"(but beware this will need more time and memory).\";\n    const char *no_sample_lists_msg\n        = \"This method requires that sample lists are stored in the Tree object. \"\n          \"Please pass sample_lists=True option to the function that created the \"\n          \"Tree object. For example ts.trees(sample_lists=True).\";\n    if (tsk_is_kas_error(err)) {\n        kas_err = tsk_get_kas_error(err);\n        switch (kas_err) {\n            case KAS_ERR_BAD_FILE_FORMAT:\n                PyErr_SetString(TskitFileFormatError, not_kas_format_msg);\n                break;\n            default:\n                PyErr_SetString(TskitFileFormatError, tsk_strerror(err));\n        }\n    } else {\n        switch (err) {\n            case TSK_ERR_FILE_VERSION_TOO_NEW:\n                PyErr_SetString(TskitVersionTooNewError, tsk_strerror(err));\n                break;\n            case TSK_ERR_FILE_VERSION_TOO_OLD:\n                PyErr_SetString(TskitVersionTooOldError, tsk_strerror(err));\n                break;\n            case TSK_ERR_FILE_FORMAT:\n                PyErr_SetString(TskitFileFormatError, tsk_strerror(err));\n                break;\n            case TSK_ERR_BAD_COLUMN_TYPE:\n                PyErr_SetString(TskitFileFormatError, tsk_strerror(err));\n                break;\n            case TSK_ERR_IBD_PAIRS_NOT_STORED:\n                PyErr_SetString(\n                    TskitIdentityPairsNotStoredError, ibd_pairs_not_stored_msg);\n                break;\n            case TSK_ERR_IBD_SEGMENTS_NOT_STORED:\n                PyErr_SetString(TskitIdentitySegmentsNotStoredError,\n                    identity_segments_not_stored_msg);\n                break;\n            case TSK_ERR_NO_SAMPLE_LISTS:\n                PyErr_SetString(TskitNoSampleListsError, no_sample_lists_msg);\n                break;\n            case TSK_ERR_IO:\n                /* Note this case isn't covered by tests because it's actually\n                 * quite hard to provoke. Attempting to write to a read-only\n                 * file etc errors are caught before we go down to the C API.\n                 */\n                PyErr_SetFromErrno(PyExc_OSError);\n                break;\n            case TSK_ERR_EOF:\n                PyErr_Format(PyExc_EOFError, \"End of file\");\n                break;\n            default:\n                PyErr_SetString(TskitLibraryError, tsk_strerror(err));\n        }\n    }\n}\n\nstatic PyObject *\nconvert_node_id_list(tsk_id_t *children, tsk_size_t num_children)\n{\n    PyObject *ret = NULL;\n    PyObject *t;\n    PyObject *py_int;\n    tsk_size_t j;\n\n    t = PyTuple_New(num_children);\n    if (t == NULL) {\n        goto out;\n    }\n    for (j = 0; j < num_children; j++) {\n        py_int = Py_BuildValue(\"i\", (int) children[j]);\n        if (py_int == NULL) {\n            Py_DECREF(t);\n            goto out;\n        }\n        PyTuple_SET_ITEM(t, j, py_int);\n    }\n    ret = t;\nout:\n    return ret;\n}\n\nstatic PyObject *\nmake_metadata(const char *metadata, Py_ssize_t length)\n{\n    const char *m = metadata == NULL ? \"\" : metadata;\n    return PyBytes_FromStringAndSize(m, length);\n}\n\nstatic PyObject *\nmake_mutation_row(const tsk_mutation_t *mutation)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata = NULL;\n\n    metadata = make_metadata(mutation->metadata, (Py_ssize_t) mutation->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"iis#iOd\", mutation->site, mutation->node,\n        mutation->derived_state, (Py_ssize_t) mutation->derived_state_length,\n        mutation->parent, metadata, mutation->time);\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_mutation_object(const tsk_mutation_t *mutation)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata = NULL;\n\n    metadata = make_metadata(mutation->metadata, (Py_ssize_t) mutation->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"iis#iOdis#\", mutation->site, mutation->node,\n        mutation->derived_state, (Py_ssize_t) mutation->derived_state_length,\n        mutation->parent, metadata, mutation->time, mutation->edge,\n        mutation->inherited_state, (Py_ssize_t) mutation->inherited_state_length);\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_mutation_id_list(const tsk_mutation_t *mutations, tsk_size_t length)\n{\n    PyObject *ret = NULL;\n    PyObject *t;\n    PyObject *item;\n    tsk_size_t j;\n\n    t = PyTuple_New(length);\n    if (t == NULL) {\n        goto out;\n    }\n    for (j = 0; j < length; j++) {\n        item = Py_BuildValue(\"i\", mutations[j].id);\n        if (item == NULL) {\n            Py_DECREF(t);\n            goto out;\n        }\n        PyTuple_SET_ITEM(t, j, item);\n    }\n    ret = t;\nout:\n    return ret;\n}\n\nstatic PyObject *\nmake_population(const tsk_population_t *population)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata\n        = make_metadata(population->metadata, (Py_ssize_t) population->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"(O)\", metadata);\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_provenance(const tsk_provenance_t *provenance)\n{\n    PyObject *ret = NULL;\n\n    ret = Py_BuildValue(\"s#s#\", provenance->timestamp,\n        (Py_ssize_t) provenance->timestamp_length, provenance->record,\n        (Py_ssize_t) provenance->record_length);\n    return ret;\n}\n\nstatic PyObject *\nmake_individual_row(const tsk_individual_t *r)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata = make_metadata(r->metadata, (Py_ssize_t) r->metadata_length);\n    PyArrayObject *location = NULL;\n    PyArrayObject *parents = NULL;\n    npy_intp dims;\n\n    dims = (npy_intp) r->location_length;\n    location = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_FLOAT64);\n    if (metadata == NULL || location == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(location), r->location, r->location_length * sizeof(double));\n    dims = (npy_intp) r->parents_length;\n    parents = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32);\n    if (metadata == NULL || parents == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(parents), r->parents, r->parents_length * sizeof(tsk_id_t));\n    ret = Py_BuildValue(\"IOOO\", (unsigned int) r->flags, location, parents, metadata);\nout:\n    Py_XDECREF(location);\n    Py_XDECREF(parents);\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_individual_object(const tsk_individual_t *r)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata = make_metadata(r->metadata, (Py_ssize_t) r->metadata_length);\n    PyArrayObject *location = NULL;\n    PyArrayObject *parents = NULL;\n    PyArrayObject *nodes = NULL;\n    npy_intp dims;\n\n    dims = (npy_intp) r->location_length;\n    location = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_FLOAT64);\n    dims = (npy_intp) r->parents_length;\n    parents = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32);\n    dims = (npy_intp) r->nodes_length;\n    nodes = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32);\n    if (metadata == NULL || location == NULL || parents == NULL || nodes == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(location), r->location, r->location_length * sizeof(double));\n    memcpy(PyArray_DATA(parents), r->parents, r->parents_length * sizeof(tsk_id_t));\n    memcpy(PyArray_DATA(nodes), r->nodes, r->nodes_length * sizeof(tsk_id_t));\n    ret = Py_BuildValue(\n        \"IOOOO\", (unsigned int) r->flags, location, parents, metadata, nodes);\nout:\n    Py_XDECREF(location);\n    Py_XDECREF(parents);\n    Py_XDECREF(metadata);\n    Py_XDECREF(nodes);\n    return ret;\n}\n\nstatic PyObject *\nmake_node(const tsk_node_t *r)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata = make_metadata(r->metadata, (Py_ssize_t) r->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"IdiiO\", (unsigned int) r->flags, r->time, (int) r->population,\n        (int) r->individual, metadata);\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_edge(const tsk_edge_t *edge, bool include_id)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata\n        = make_metadata(edge->metadata, (Py_ssize_t) edge->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    if (include_id) {\n        ret = Py_BuildValue(\"ddiiOi\", edge->left, edge->right, (int) edge->parent,\n            (int) edge->child, metadata, edge->id);\n    } else {\n        ret = Py_BuildValue(\"ddiiO\", edge->left, edge->right, (int) edge->parent,\n            (int) edge->child, metadata);\n    }\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_migration(const tsk_migration_t *r)\n{\n    int source = r->source == TSK_NULL ? -1 : r->source;\n    int dest = r->dest == TSK_NULL ? -1 : r->dest;\n    PyObject *ret = NULL;\n    PyObject *metadata = make_metadata(r->metadata, (Py_ssize_t) r->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"ddiiidO\", r->left, r->right, (int) r->node, source, dest, r->time, metadata);\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_site_row(const tsk_site_t *site)\n{\n    PyObject *ret = NULL;\n    PyObject *metadata = NULL;\n\n    metadata = make_metadata(site->metadata, (Py_ssize_t) site->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"ds#O\", site->position, site->ancestral_state,\n        (Py_ssize_t) site->ancestral_state_length, metadata);\nout:\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_site_object(const tsk_site_t *site)\n{\n    PyObject *ret = NULL;\n    PyObject *mutations = NULL;\n    PyObject *metadata = NULL;\n\n    metadata = make_metadata(site->metadata, (Py_ssize_t) site->metadata_length);\n    if (metadata == NULL) {\n        goto out;\n    }\n    mutations = make_mutation_id_list(site->mutations, site->mutations_length);\n    if (mutations == NULL) {\n        goto out;\n    }\n    /* TODO should reorder this tuple, as it's not very logical. */\n    ret = Py_BuildValue(\"ds#OnO\", site->position, site->ancestral_state,\n        (Py_ssize_t) site->ancestral_state_length, mutations, (Py_ssize_t) site->id,\n        metadata);\nout:\n    Py_XDECREF(mutations);\n    Py_XDECREF(metadata);\n    return ret;\n}\n\nstatic PyObject *\nmake_alleles(tsk_variant_t *variant)\n{\n    PyObject *ret = NULL;\n    PyObject *item, *t;\n    tsk_size_t j;\n\n    t = PyTuple_New(variant->num_alleles + variant->has_missing_data);\n    if (t == NULL) {\n        goto out;\n    }\n    for (j = 0; j < variant->num_alleles; j++) {\n        item = Py_BuildValue(\n            \"s#\", variant->alleles[j], (Py_ssize_t) variant->allele_lengths[j]);\n        if (item == NULL) {\n            Py_DECREF(t);\n            goto out;\n        }\n        PyTuple_SET_ITEM(t, j, item);\n    }\n    if (variant->has_missing_data) {\n        item = Py_BuildValue(\"\");\n        if (item == NULL) {\n            Py_DECREF(t);\n            goto out;\n        }\n        PyTuple_SET_ITEM(t, variant->num_alleles, item);\n    }\n    ret = t;\nout:\n    return ret;\n}\n\nstatic PyObject *\nmake_samples(tsk_variant_t *variant)\n{\n    PyObject *ret = NULL;\n\n    PyArrayObject *samples = NULL;\n    npy_intp dims;\n\n    dims = (npy_intp) variant->num_samples;\n    samples = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32);\n    if (samples == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(samples), variant->samples,\n        variant->num_samples * sizeof(tsk_id_t));\n    ret = (PyObject *) samples;\nout:\n    return ret;\n}\n\nstatic PyObject *\nconvert_sites(const tsk_site_t *sites, tsk_size_t num_sites)\n{\n    PyObject *ret = NULL;\n    PyObject *l = NULL;\n    PyObject *py_site = NULL;\n    tsk_size_t j;\n\n    l = PyList_New(num_sites);\n    if (l == NULL) {\n        goto out;\n    }\n    for (j = 0; j < num_sites; j++) {\n        py_site = make_site_object(&sites[j]);\n        if (py_site == NULL) {\n            Py_DECREF(l);\n            goto out;\n        }\n        PyList_SET_ITEM(l, j, py_site);\n    }\n    ret = l;\nout:\n    return ret;\n}\n\nstatic PyObject *\nconvert_transitions(tsk_state_transition_t *transitions, tsk_size_t num_transitions)\n{\n    PyObject *ret = NULL;\n    PyObject *l = NULL;\n    PyObject *py_transition = NULL;\n    tsk_size_t j;\n\n    l = PyList_New(num_transitions);\n    if (l == NULL) {\n        goto out;\n    }\n    for (j = 0; j < num_transitions; j++) {\n        py_transition = Py_BuildValue(\n            \"iii\", transitions[j].node, transitions[j].parent, transitions[j].state);\n        if (py_transition == NULL) {\n            Py_DECREF(l);\n            goto out;\n        }\n        PyList_SET_ITEM(l, j, py_transition);\n    }\n    ret = l;\nout:\n    return ret;\n}\n\n/* TODO: this should really be a dict we're returning */\nstatic PyObject *\nconvert_compressed_matrix_site(tsk_compressed_matrix_t *matrix, unsigned int site)\n{\n    PyObject *ret = NULL;\n    PyObject *list = NULL;\n    PyObject *item = NULL;\n    tsk_size_t j, num_values;\n\n    if (site >= matrix->num_sites) {\n        PyErr_SetString(PyExc_ValueError, \"Site index out of bounds\");\n        goto out;\n    }\n\n    num_values = matrix->num_transitions[site];\n    list = PyList_New(num_values);\n    if (list == NULL) {\n        goto out;\n    }\n    for (j = 0; j < num_values; j++) {\n        item = Py_BuildValue(\"id\", matrix->nodes[site][j], matrix->values[site][j]);\n        if (item == NULL) {\n            goto out;\n        }\n        PyList_SET_ITEM(list, j, item);\n        item = NULL;\n    }\n    ret = list;\n    list = NULL;\nout:\n    Py_XDECREF(item);\n    Py_XDECREF(list);\n    return ret;\n}\n\nstatic PyObject *\ndecode_compressed_matrix(tsk_compressed_matrix_t *matrix)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyArrayObject *decoded = NULL;\n    npy_intp dims[2];\n\n    dims[0] = tsk_treeseq_get_num_sites(matrix->tree_sequence);\n    dims[1] = tsk_treeseq_get_num_samples(matrix->tree_sequence);\n    decoded = (PyArrayObject *) PyArray_SimpleNew(2, dims, NPY_FLOAT64);\n    if (decoded == NULL) {\n        goto out;\n    }\n    err = tsk_compressed_matrix_decode(matrix, PyArray_DATA(decoded));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) decoded;\n    decoded = NULL;\nout:\n    Py_XDECREF(decoded);\n    return ret;\n}\n\nstatic const char **\nparse_allele_list(PyObject *allele_tuple)\n{\n    const char **ret = NULL;\n    const char **alleles = NULL;\n    PyObject *str;\n    Py_ssize_t j, num_alleles;\n\n    if (!PyTuple_Check(allele_tuple)) {\n        PyErr_SetString(PyExc_TypeError, \"Fixed allele list must be a tuple\");\n        goto out;\n    }\n\n    num_alleles = PyTuple_Size(allele_tuple);\n    if (num_alleles == 0) {\n        PyErr_SetString(PyExc_ValueError, \"Must specify at least one allele\");\n        goto out;\n    }\n    /* Leave space for the sentinel, and initialise to NULL */\n    alleles = PyMem_Calloc(num_alleles + 1, sizeof(*alleles));\n    if (alleles == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    for (j = 0; j < num_alleles; j++) {\n        str = PyTuple_GetItem(allele_tuple, j);\n        if (str == NULL) {\n            goto out;\n        }\n        if (!PyUnicode_Check(str)) {\n            PyErr_SetString(PyExc_TypeError, \"alleles must be strings\");\n            goto out;\n        }\n        /* PyUnicode_AsUTF8AndSize caches the UTF8 representation of the string\n         * within the object, and we're not responsible for freeing it. Thus,\n         * once we're sure the string object stays alive for the lifetime of the\n         * returned string, we can be sure it's safe. These strings are immediately\n         * copied during tsk_vargen_init, so the operation is safe.\n         */\n        alleles[j] = PyUnicode_AsUTF8AndSize(str, NULL);\n        if (alleles[j] == NULL) {\n            goto out;\n        }\n    }\n    ret = alleles;\n    alleles = NULL;\nout:\n    PyMem_Free(alleles);\n    return ret;\n}\n\nstatic int\nparse_sample_sets(PyObject *sample_set_sizes, PyArrayObject **ret_sample_set_sizes_array,\n    PyObject *sample_sets, PyArrayObject **ret_sample_sets_array,\n    tsk_size_t *ret_num_sample_sets)\n{\n    int ret = -1;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    npy_intp *shape;\n    tsk_size_t num_sample_sets = 0;\n    tsk_size_t j, sum;\n    uint64_t *a;\n\n    sample_set_sizes_array = (PyArrayObject *) PyArray_FROMANY(\n        sample_set_sizes, NPY_UINT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (sample_set_sizes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(sample_set_sizes_array);\n    num_sample_sets = shape[0];\n\n    /* The sum of the lengths in sample_set_sizes must be equal to the length\n     * of the sample_sets array */\n    sum = 0;\n    a = PyArray_DATA(sample_set_sizes_array);\n    for (j = 0; j < num_sample_sets; j++) {\n        if (sum + a[j] < sum) {\n            PyErr_SetString(PyExc_ValueError, \"Overflow in sample set sizes sum\");\n            goto out;\n        }\n        sum += a[j];\n    }\n\n    sample_sets_array = (PyArrayObject *) PyArray_FROMANY(\n        sample_sets, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (sample_sets_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(sample_sets_array);\n    if (sum != (tsk_size_t) shape[0]) {\n        PyErr_SetString(PyExc_ValueError,\n            \"Sum of sample_set_sizes must equal length of sample_sets array\");\n        goto out;\n    }\n    ret = 0;\nout:\n    *ret_sample_set_sizes_array = sample_set_sizes_array;\n    *ret_sample_sets_array = sample_sets_array;\n    *ret_num_sample_sets = num_sample_sets;\n    return ret;\n}\n\nstatic PyObject *\ntable_get_column_array(\n    tsk_size_t num_rows, void *data, int npy_type, size_t element_size)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array;\n    npy_intp dims = (npy_intp) num_rows;\n\n    array = (PyArrayObject *) PyArray_EMPTY(1, &dims, npy_type, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), data, num_rows * element_size);\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic PyObject *\ntable_get_offset_array(tsk_size_t num_rows, tsk_size_t *data)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array;\n    npy_intp dims = (npy_intp) num_rows + 1;\n\n    array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_UINT64, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), data, dims * sizeof(*data));\n\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic FILE *\nmake_file(PyObject *fileobj, const char *mode)\n{\n    FILE *ret = NULL;\n    FILE *file = NULL;\n    int fileobj_fd, new_fd;\n\n    fileobj_fd = PyObject_AsFileDescriptor(fileobj);\n    if (fileobj_fd == -1) {\n        goto out;\n    }\n    new_fd = dup(fileobj_fd);\n    if (new_fd == -1) {\n        PyErr_SetFromErrno(PyExc_OSError);\n        goto out;\n    }\n    file = fdopen(new_fd, mode);\n    if (file == NULL) {\n        (void) close(new_fd);\n        PyErr_SetFromErrno(PyExc_OSError);\n        goto out;\n    }\n    ret = file;\nout:\n    return ret;\n}\n\nstatic int\nuint32_converter(PyObject *py_obj, uint32_t *uint_out)\n{\n    long long temp_long;\n    int ret = 0;\n\n    if (!PyArg_Parse(py_obj, \"L\", &temp_long)) {\n        goto out;\n    }\n    if (temp_long > UINT32_MAX) {\n        PyErr_SetString(PyExc_OverflowError, \"unsigned int32 >= than 2^32\");\n        goto out;\n    }\n    if (temp_long < 0) {\n        PyErr_SetString(\n            PyExc_ValueError, \"Can't convert negative value to unsigned int\");\n        goto out;\n    }\n\n    uint_out[0] = (uint32_t) temp_long;\n    ret = 1;\nout:\n    return ret;\n}\n\nstatic int\ntsk_id_converter(PyObject *py_obj, tsk_id_t *id_out)\n{\n    long long temp_long;\n    int ret = 0;\n\n    if (!PyArg_Parse(py_obj, \"L\", &temp_long)) {\n        goto out;\n    }\n    if (temp_long > TSK_MAX_ID) {\n        PyErr_SetString(PyExc_OverflowError, \"Value too large for tskit id type\");\n        goto out;\n    }\n    if (temp_long < TSK_NULL) {\n        PyErr_SetString(\n            PyExc_ValueError, \"tskit ids must be NULL(-1), 0 or a positive number\");\n        goto out;\n    }\n\n    id_out[0] = (tsk_id_t) temp_long;\n    ret = 1;\nout:\n    return ret;\n}\n\nstatic int\narray_converter(int type, PyObject *py_obj, PyArrayObject **array_out)\n{\n    int ret = 0;\n    PyArrayObject *temp_array;\n\n    temp_array = (PyArrayObject *) PyArray_FromAny(\n        py_obj, PyArray_DescrFromType(type), 1, 1, NPY_ARRAY_IN_ARRAY, NULL);\n\n    if (temp_array == NULL) {\n        goto out;\n    }\n    *array_out = temp_array;\n    ret = 1;\nout:\n    return ret;\n}\n\nstatic int\nint32_array_converter(PyObject *py_obj, PyArrayObject **array_out)\n{\n    return array_converter(NPY_INT32, py_obj, array_out);\n}\n\nstatic int\nbool_array_converter(PyObject *py_obj, PyArrayObject **array_out)\n{\n    return array_converter(NPY_BOOL, py_obj, array_out);\n}\n\n/* Note: it doesn't seem to be possible to cast pointers to the actual\n * table functions to this type because the first argument must be a\n * void *, so the simplest option is to put in a small shim that\n * wraps the library function and casts to the correct table type.\n */\ntypedef int keep_row_func_t(\n    void *self, const tsk_bool_t *keep, tsk_flags_t options, tsk_id_t *id_map);\n\nstatic PyObject *\ntable_keep_rows(\n    PyObject *args, void *table, tsk_size_t num_rows, keep_row_func_t keep_row_func)\n{\n\n    PyObject *ret = NULL;\n    PyArrayObject *keep = NULL;\n    PyArrayObject *id_map = NULL;\n    npy_intp n = (npy_intp) num_rows;\n    npy_intp array_len;\n    int err;\n\n    if (!PyArg_ParseTuple(args, \"O&\", &bool_array_converter, &keep)) {\n        goto out;\n    }\n    array_len = PyArray_DIMS(keep)[0];\n    if (array_len != n) {\n        PyErr_SetString(PyExc_ValueError, \"keep array must be of length Table.num_rows\");\n        goto out;\n    }\n    id_map = (PyArrayObject *) PyArray_SimpleNew(1, &n, NPY_INT32);\n    if (id_map == NULL) {\n        goto out;\n    }\n    err = keep_row_func(table, PyArray_DATA(keep), 0, PyArray_DATA(id_map));\n\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) id_map;\n    id_map = NULL;\nout:\n    Py_XDECREF(keep);\n    Py_XDECREF(id_map);\n    return ret;\n}\n\n/*===================================================================\n * Table Macros\n *===================================================================\n * These macros generate boilerplate code for the table classes, which are identical\n * other than the table type and the columns they contain.\n */\n\n#define DEFINE_TABLE_COLUMN_GETTER(table_class, column_name, numpy_type, c_type)        \\\n    static PyObject *table_class##Table_get_##column_name(                              \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = table_get_column_array(self->table->num_rows, self->table->column_name,   \\\n            numpy_type, sizeof(c_type));                                                \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\n/* Macro for ragged column getters (generates both data and offset getters) */\n#define DEFINE_TABLE_RAGGED_COLUMN_GETTER(                                              \\\n    table_class, column_name, numpy_type, c_type, length_field)                         \\\n    static PyObject *table_class##Table_get_##column_name(                              \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = table_get_column_array(self->table->length_field,                         \\\n            self->table->column_name, numpy_type, sizeof(c_type));                      \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n    static PyObject *table_class##Table_get_##column_name##_offset(                     \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = table_get_offset_array(                                                   \\\n            self->table->num_rows, self->table->column_name##_offset);                  \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\n#define DEFINE_TABLE_METADATA_SCHEMA_GETTER(table_class)                                \\\n    static PyObject *table_class##Table_get_metadata_schema(                            \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = make_Py_Unicode_FromStringAndLength(                                      \\\n            self->table->metadata_schema, self->table->metadata_schema_length);         \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\n#define DEFINE_TABLE_METADATA_SCHEMA_SETTER(table_class, table_type)                    \\\n    static int table_class##Table_set_metadata_schema(                                  \\\n        table_class##Table *self, PyObject *arg, void *closure)                         \\\n    {                                                                                   \\\n        int ret = -1;                                                                   \\\n        int err;                                                                        \\\n        const char *metadata_schema;                                                    \\\n        Py_ssize_t metadata_schema_length;                                              \\\n                                                                                        \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        metadata_schema = parse_unicode_arg(arg, &metadata_schema_length);              \\\n        if (metadata_schema == NULL) {                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = tsk_##table_type##_table_set_metadata_schema(                             \\\n            self->table, metadata_schema, metadata_schema_length);                      \\\n        if (err != 0) {                                                                 \\\n            handle_library_error(err);                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = 0;                                                                        \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\n#define DEFINE_TABLE_EQUALS(table_class, table_type)                                    \\\n    static PyObject *table_class##Table_equals(                                         \\\n        table_class##Table *self, PyObject *args, PyObject *kwds)                       \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        table_class##Table *other = NULL;                                               \\\n        tsk_flags_t options = 0;                                                        \\\n        int ignore_metadata = false;                                                    \\\n        static char *kwlist[] = { \"other\", \"ignore_metadata\", NULL };                   \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|i\", kwlist,                    \\\n                &table_class##TableType, &other, &ignore_metadata)) {                   \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (table_class##Table_check_state(other) != 0) {                               \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (ignore_metadata) {                                                          \\\n            options |= TSK_CMP_IGNORE_METADATA;                                         \\\n        }                                                                               \\\n        ret = Py_BuildValue(                                                            \\\n            \"i\", tsk_##table_type##_table_equals(self->table, other->table, options));  \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\n/* Macro to generate common table methods */\n#define DEFINE_TABLE_METHODS(table_class, table_type, row_type, make_row_func)          \\\n    static int table_class##Table_check_state(table_class##Table *self)                 \\\n    {                                                                                   \\\n        int ret = -1;                                                                   \\\n        if (self->table == NULL) {                                                      \\\n            PyErr_SetString(PyExc_SystemError, #table_class \"Table not initialised\");   \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (self->locked) {                                                             \\\n            PyErr_SetString(                                                            \\\n                PyExc_RuntimeError, #table_class \"Table in use by other thread.\");      \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = 0;                                                                        \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static void table_class##Table_dealloc(table_class##Table *self)                    \\\n    {                                                                                   \\\n        if (self->tables != NULL) {                                                     \\\n            Py_DECREF(self->tables);                                                    \\\n        } else if (self->table != NULL) {                                               \\\n            tsk_##table_type##_table_free(self->table);                                 \\\n            PyMem_Free(self->table);                                                    \\\n            self->table = NULL;                                                         \\\n        }                                                                               \\\n        Py_TYPE(self)->tp_free((PyObject *) self);                                      \\\n    }                                                                                   \\\n                                                                                        \\\n    static int table_class##Table_init(                                                 \\\n        table_class##Table *self, PyObject *args, PyObject *kwds)                       \\\n    {                                                                                   \\\n        int ret = -1;                                                                   \\\n        int err;                                                                        \\\n        static char *kwlist[] = { \"max_rows_increment\", NULL };                         \\\n        Py_ssize_t max_rows_increment = 0;                                              \\\n        self->table = NULL;                                                             \\\n        self->locked = false;                                                           \\\n        self->tables = NULL;                                                            \\\n        if (!PyArg_ParseTupleAndKeywords(                                               \\\n                args, kwds, \"|n\", kwlist, &max_rows_increment)) {                       \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (max_rows_increment < 0) {                                                   \\\n            PyErr_SetString(PyExc_ValueError, \"max_rows_increment must be positive\");   \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        self->table = PyMem_Malloc(sizeof(tsk_##table_type##_table_t));                 \\\n        if (self->table == NULL) {                                                      \\\n            PyErr_NoMemory();                                                           \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = tsk_##table_type##_table_init(self->table, 0);                            \\\n        if (err != 0) {                                                                 \\\n            handle_library_error(err);                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        tsk_##table_type##_table_set_max_rows_increment(                                \\\n            self->table, max_rows_increment);                                           \\\n        ret = 0;                                                                        \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_get_row(                                        \\\n        table_class##Table *self, PyObject *args)                                       \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        int err;                                                                        \\\n        Py_ssize_t row_id;                                                              \\\n        tsk_##row_type##_t row_type;                                                    \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (!PyArg_ParseTuple(args, \"n\", &row_id)) {                                    \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = tsk_##table_type##_table_get_row(                                         \\\n            self->table, (tsk_id_t) row_id, &row_type);                                 \\\n        if (err != 0) {                                                                 \\\n            handle_library_error(err);                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = make_row_func;                                                            \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_parse_dict_arg(                                 \\\n        table_class##Table *self, PyObject *args, bool clear_table)                     \\\n    {                                                                                   \\\n        int err;                                                                        \\\n        PyObject *ret = NULL;                                                           \\\n        PyObject *dict = NULL;                                                          \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (!PyArg_ParseTuple(args, \"O!\", &PyDict_Type, &dict)) {                       \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = parse_##table_type##_table_dict(self->table, dict, clear_table);          \\\n        if (err != 0) {                                                                 \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"\");                                                        \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_append_columns(                                 \\\n        table_class##Table *self, PyObject *args)                                       \\\n    {                                                                                   \\\n        return table_class##Table_parse_dict_arg(self, args, false);                    \\\n    }                                                                                   \\\n    static PyObject *table_class##Table_set_columns(                                    \\\n        table_class##Table *self, PyObject *args)                                       \\\n    {                                                                                   \\\n        return table_class##Table_parse_dict_arg(self, args, true);                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_clear(table_class##Table *self)                 \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        int err;                                                                        \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = tsk_##table_type##_table_clear(self->table);                              \\\n        if (err != 0) {                                                                 \\\n            handle_library_error(err);                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"\");                                                        \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_truncate(                                       \\\n        table_class##Table *self, PyObject *args)                                       \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        Py_ssize_t num_rows;                                                            \\\n        int err;                                                                        \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (!PyArg_ParseTuple(args, \"n\", &num_rows)) {                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (num_rows < 0 || num_rows > (Py_ssize_t) self->table->num_rows) {            \\\n            PyErr_SetString(PyExc_ValueError, \"num_rows out of bounds\");                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = tsk_##table_type##_table_truncate(self->table, (tsk_size_t) num_rows);    \\\n        if (err != 0) {                                                                 \\\n            handle_library_error(err);                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"\");                                                        \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_extend(                                         \\\n        table_class##Table *self, PyObject *args, PyObject *kwds)                       \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        table_class##Table *other = NULL;                                               \\\n        PyArrayObject *row_indexes = NULL;                                              \\\n        int err;                                                                        \\\n        static char *kwlist[] = { \"other\", \"row_indexes\", NULL };                       \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!O&\", kwlist,                    \\\n                &table_class##TableType, &other, &int32_array_converter,                \\\n                &row_indexes)) {                                                        \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        if (table_class##Table_check_state(other) != 0) {                               \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        err = tsk_##table_type##_table_extend(self->table, other->table,                \\\n            PyArray_DIMS(row_indexes)[0], PyArray_DATA(row_indexes), 0);                \\\n        if (err != 0) {                                                                 \\\n            handle_library_error(err);                                                  \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"\");                                                        \\\n    out:                                                                                \\\n        Py_XDECREF(row_indexes);                                                        \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static int table_type##_table_keep_rows_generic(                                    \\\n        void *table, const tsk_bool_t *keep, tsk_flags_t options, tsk_id_t *id_map)     \\\n    {                                                                                   \\\n        return tsk_##table_type##_table_keep_rows(                                      \\\n            (tsk_##table_type##_table_t *) table, keep, options, id_map);               \\\n    }                                                                                   \\\n    static PyObject *table_class##Table_keep_rows(                                      \\\n        table_class##Table *self, PyObject *args)                                       \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = table_keep_rows(args, (void *) self->table, self->table->num_rows,        \\\n            table_type##_table_keep_rows_generic);                                      \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n                                                                                        \\\n    static PyObject *table_class##Table_get_max_rows_increment(                         \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"n\", (Py_ssize_t) self->table->max_rows_increment);         \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n    static PyObject *table_class##Table_get_num_rows(                                   \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"n\", (Py_ssize_t) self->table->num_rows);                   \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }                                                                                   \\\n    static PyObject *table_class##Table_get_max_rows(                                   \\\n        table_class##Table *self, void *closure)                                        \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        if (table_class##Table_check_state(self) != 0) {                                \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        ret = Py_BuildValue(\"n\", (Py_ssize_t) self->table->max_rows);                   \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\n/*===================================================================\n * IndividualTable\n *===================================================================\n */\n\nstatic PyTypeObject IndividualTableType;\nDEFINE_TABLE_METHODS(\n    Individual, individual, individual, make_individual_row(&individual))\nDEFINE_TABLE_EQUALS(Individual, individual)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Individual)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Individual, individual)\n\nDEFINE_TABLE_COLUMN_GETTER(Individual, flags, NPY_UINT32, uint32_t)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(\n    Individual, location, NPY_FLOAT64, double, location_length)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(\n    Individual, parents, NPY_INT32, tsk_id_t, parents_length)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Individual, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nIndividualTable_add_row(IndividualTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    unsigned int flags = 0;\n    PyObject *py_metadata = Py_None;\n    PyObject *py_location = Py_None;\n    PyObject *py_parents = Py_None;\n    PyArrayObject *location_array = NULL;\n    double *location_data = NULL;\n    tsk_size_t location_length = 0;\n    PyArrayObject *parents_array = NULL;\n    tsk_id_t *parents_data = NULL;\n    tsk_size_t parents_length = 0;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    npy_intp *shape;\n    static char *kwlist[] = { \"flags\", \"location\", \"parents\", \"metadata\", NULL };\n\n    if (IndividualTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|O&OOO\", kwlist, &uint32_converter,\n            &flags, &py_location, &py_parents, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    if (py_location != Py_None) {\n        /* This ensures that only 1D arrays are accepted. */\n        location_array = (PyArrayObject *) PyArray_FromAny(py_location,\n            PyArray_DescrFromType(NPY_FLOAT64), 1, 1, NPY_ARRAY_IN_ARRAY, NULL);\n        if (location_array == NULL) {\n            goto out;\n        }\n        shape = PyArray_DIMS(location_array);\n        location_length = (tsk_size_t) shape[0];\n        location_data = PyArray_DATA(location_array);\n    }\n    if (py_parents != Py_None) {\n        /* This ensures that only 1D arrays are accepted. */\n        parents_array = (PyArrayObject *) PyArray_FromAny(py_parents,\n            PyArray_DescrFromType(NPY_INT32), 1, 1, NPY_ARRAY_IN_ARRAY, NULL);\n        if (parents_array == NULL) {\n            goto out;\n        }\n        shape = PyArray_DIMS(parents_array);\n        parents_length = (tsk_size_t) shape[0];\n        parents_data = PyArray_DATA(parents_array);\n    }\n    err = tsk_individual_table_add_row(self->table, (tsk_flags_t) flags, location_data,\n        location_length, parents_data, parents_length, metadata,\n        (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    Py_XDECREF(location_array);\n    Py_XDECREF(parents_array);\n    return ret;\n}\n\nstatic PyObject *\nIndividualTable_update_row(IndividualTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    unsigned int flags = 0;\n    PyObject *py_metadata = Py_None;\n    PyObject *py_location = Py_None;\n    PyObject *py_parents = Py_None;\n    PyArrayObject *location_array = NULL;\n    double *location_data = NULL;\n    tsk_size_t location_length = 0;\n    PyArrayObject *parents_array = NULL;\n    tsk_id_t *parents_data = NULL;\n    tsk_size_t parents_length = 0;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    npy_intp *shape;\n    static char *kwlist[]\n        = { \"row_index\", \"flags\", \"location\", \"parents\", \"metadata\", NULL };\n\n    if (IndividualTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&|O&OOO\", kwlist, &tsk_id_converter,\n            &row_index, &uint32_converter, &flags, &py_location, &py_parents,\n            &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    if (py_location != Py_None) {\n        /* This ensures that only 1D arrays are accepted. */\n        location_array = (PyArrayObject *) PyArray_FromAny(py_location,\n            PyArray_DescrFromType(NPY_FLOAT64), 1, 1, NPY_ARRAY_IN_ARRAY, NULL);\n        if (location_array == NULL) {\n            goto out;\n        }\n        shape = PyArray_DIMS(location_array);\n        location_length = (tsk_size_t) shape[0];\n        location_data = PyArray_DATA(location_array);\n    }\n    if (py_parents != Py_None) {\n        /* This ensures that only 1D arrays are accepted. */\n        parents_array = (PyArrayObject *) PyArray_FromAny(py_parents,\n            PyArray_DescrFromType(NPY_INT32), 1, 1, NPY_ARRAY_IN_ARRAY, NULL);\n        if (parents_array == NULL) {\n            goto out;\n        }\n        shape = PyArray_DIMS(parents_array);\n        parents_length = (tsk_size_t) shape[0];\n        parents_data = PyArray_DATA(parents_array);\n    }\n    err = tsk_individual_table_update_row(self->table, row_index, (tsk_flags_t) flags,\n        location_data, location_length, parents_data, parents_length, metadata,\n        (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    Py_XDECREF(location_array);\n    Py_XDECREF(parents_array);\n    return ret;\n}\n\nstatic PyGetSetDef IndividualTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) IndividualTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) IndividualTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) IndividualTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"flags\",\n        .get = (getter) IndividualTable_get_flags,\n        .doc = \"The flags array\" },\n    { .name = \"location\",\n        .get = (getter) IndividualTable_get_location,\n        .doc = \"The location array\" },\n    { .name = \"location_offset\",\n        .get = (getter) IndividualTable_get_location_offset,\n        .doc = \"The location offset array\" },\n    { .name = \"parents\",\n        .get = (getter) IndividualTable_get_parents,\n        .doc = \"The parents array\" },\n    { .name = \"parents_offset\",\n        .get = (getter) IndividualTable_get_parents_offset,\n        .doc = \"The parents offset array\" },\n    { .name = \"metadata\",\n        .get = (getter) IndividualTable_get_metadata,\n        .doc = \"The metadata array\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) IndividualTable_get_metadata_offset,\n        .doc = \"The metadata offset array\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) IndividualTable_get_metadata_schema,\n        .set = (setter) IndividualTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef IndividualTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) IndividualTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) IndividualTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) IndividualTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) IndividualTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns true if the specified individual table is equal.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) IndividualTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Appends the data in the specified arrays into the columns.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) IndividualTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) IndividualTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) IndividualTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) IndividualTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) IndividualTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject IndividualTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.IndividualTable\",\n    .tp_basicsize = sizeof(IndividualTable),\n    .tp_dealloc = (destructor) IndividualTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"IndividualTable objects\",\n    .tp_methods = IndividualTable_methods,\n    .tp_getset = IndividualTable_getsetters,\n    .tp_init = (initproc) IndividualTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * NodeTable\n *===================================================================\n */\n\nstatic PyTypeObject NodeTableType;\nDEFINE_TABLE_METHODS(Node, node, node, make_node(&node))\nDEFINE_TABLE_EQUALS(Node, node)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Node)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Node, node)\n\nDEFINE_TABLE_COLUMN_GETTER(Node, time, NPY_FLOAT64, double)\nDEFINE_TABLE_COLUMN_GETTER(Node, flags, NPY_UINT32, uint32_t)\nDEFINE_TABLE_COLUMN_GETTER(Node, population, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Node, individual, NPY_INT32, int32_t)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Node, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nNodeTable_add_row(NodeTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    unsigned int flags = 0;\n    double time = 0;\n    tsk_id_t population = TSK_NULL;\n    tsk_id_t individual = TSK_NULL;\n    PyObject *py_metadata = Py_None;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[]\n        = { \"flags\", \"time\", \"population\", \"individual\", \"metadata\", NULL };\n\n    if (NodeTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|O&dO&O&O\", kwlist, &uint32_converter,\n            &flags, &time, &tsk_id_converter, &population, &tsk_id_converter,\n            &individual, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_node_table_add_row(self->table, (tsk_flags_t) flags, time, population,\n        individual, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nNodeTable_update_row(NodeTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    unsigned int flags = 0;\n    double time = 0;\n    tsk_id_t population = -1;\n    tsk_id_t individual = -1;\n    PyObject *py_metadata = Py_None;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[]\n        = { \"row_index\", \"flags\", \"time\", \"population\", \"individual\", \"metadata\", NULL };\n\n    if (NodeTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&|O&dO&O&O\", kwlist,\n            &tsk_id_converter, &row_index, &uint32_converter, &flags, &time,\n            &tsk_id_converter, &population, &tsk_id_converter, &individual,\n            &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_node_table_update_row(self->table, row_index, (tsk_flags_t) flags, time,\n        population, individual, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef NodeTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) NodeTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) NodeTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) NodeTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"time\", .get = (getter) NodeTable_get_time, .doc = \"The time array\" },\n    { .name = \"flags\", .get = (getter) NodeTable_get_flags, .doc = \"The flags array\" },\n    { .name = \"population\",\n        .get = (getter) NodeTable_get_population,\n        .doc = \"The population array\" },\n    { .name = \"individual\",\n        .get = (getter) NodeTable_get_individual,\n        .doc = \"The individual array\" },\n    { .name = \"metadata\",\n        .get = (getter) NodeTable_get_metadata,\n        .doc = \"The metadata array\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) NodeTable_get_metadata_offset,\n        .doc = \"The metadata offset array\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) NodeTable_get_metadata_schema,\n        .set = (setter) NodeTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef NodeTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) NodeTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) NodeTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) NodeTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns True if the specified NodeTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) NodeTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) NodeTable_append_columns,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Appends the data in the specified arrays into the columns.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) NodeTable_set_columns,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) NodeTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) NodeTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) NodeTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) NodeTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject NodeTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.NodeTable\",\n    .tp_basicsize = sizeof(NodeTable),\n    .tp_dealloc = (destructor) NodeTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"NodeTable objects\",\n    .tp_methods = NodeTable_methods,\n    .tp_getset = NodeTable_getsetters,\n    .tp_init = (initproc) NodeTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * EdgeTable\n *===================================================================\n */\n\nstatic PyTypeObject EdgeTableType;\nDEFINE_TABLE_METHODS(Edge, edge, edge, make_edge(&edge, false))\nDEFINE_TABLE_EQUALS(Edge, edge)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Edge)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Edge, edge)\n\nDEFINE_TABLE_COLUMN_GETTER(Edge, left, NPY_FLOAT64, double)\nDEFINE_TABLE_COLUMN_GETTER(Edge, right, NPY_FLOAT64, double)\nDEFINE_TABLE_COLUMN_GETTER(Edge, parent, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Edge, child, NPY_INT32, int32_t)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Edge, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nEdgeTable_add_row(EdgeTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    double left, right;\n    tsk_id_t parent, child;\n    PyObject *py_metadata = Py_None;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[] = { \"left\", \"right\", \"parent\", \"child\", \"metadata\", NULL };\n\n    if (EdgeTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"ddO&O&|O\", kwlist, &left, &right,\n            &tsk_id_converter, &parent, &tsk_id_converter, &child, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_edge_table_add_row(\n        self->table, left, right, parent, child, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nEdgeTable_update_row(EdgeTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    double left, right;\n    tsk_id_t parent, child;\n    PyObject *py_metadata = Py_None;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[]\n        = { \"row_index\", \"left\", \"right\", \"parent\", \"child\", \"metadata\", NULL };\n\n    if (EdgeTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&ddO&O&|O\", kwlist, &tsk_id_converter,\n            &row_index, &left, &right, &tsk_id_converter, &parent, &tsk_id_converter,\n            &child, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_edge_table_update_row(self->table, row_index, left, right, parent, child,\n        metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nEdgeTable_squash(EdgeTable *self)\n{\n    PyObject *ret = NULL;\n    int err;\n\n    if (EdgeTable_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_edge_table_squash(self->table);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef EdgeTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) EdgeTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) EdgeTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) EdgeTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"left\", .get = (getter) EdgeTable_get_left, .doc = \"The left array\" },\n    { .name = \"right\", .get = (getter) EdgeTable_get_right, .doc = \"The right array\" },\n    { .name = \"parent\",\n        .get = (getter) EdgeTable_get_parent,\n        .doc = \"The parent array\" },\n    { .name = \"child\", .get = (getter) EdgeTable_get_child, .doc = \"The child array\" },\n    { .name = \"metadata\",\n        .get = (getter) EdgeTable_get_metadata,\n        .doc = \"The metadata array\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) EdgeTable_get_metadata_offset,\n        .doc = \"The metadata offset array\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) EdgeTable_get_metadata_schema,\n        .set = (setter) EdgeTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef EdgeTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) EdgeTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) EdgeTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) EdgeTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns True if the specified EdgeTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) EdgeTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) EdgeTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) EdgeTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) EdgeTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) EdgeTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) EdgeTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"squash\",\n        .ml_meth = (PyCFunction) EdgeTable_squash,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Squashes sets of edges with adjacent L,R and identical P,C values.\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) EdgeTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject EdgeTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.EdgeTable\",\n    .tp_basicsize = sizeof(EdgeTable),\n    .tp_dealloc = (destructor) EdgeTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"EdgeTable objects\",\n    .tp_methods = EdgeTable_methods,\n    .tp_getset = EdgeTable_getsetters,\n    .tp_init = (initproc) EdgeTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * MigrationTable\n *===================================================================\n */\n\nstatic PyTypeObject MigrationTableType;\nDEFINE_TABLE_METHODS(Migration, migration, migration, make_migration(&migration))\nDEFINE_TABLE_EQUALS(Migration, migration)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Migration)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Migration, migration)\n\nDEFINE_TABLE_COLUMN_GETTER(Migration, left, NPY_FLOAT64, double)\nDEFINE_TABLE_COLUMN_GETTER(Migration, right, NPY_FLOAT64, double)\nDEFINE_TABLE_COLUMN_GETTER(Migration, time, NPY_FLOAT64, double)\nDEFINE_TABLE_COLUMN_GETTER(Migration, node, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Migration, source, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Migration, dest, NPY_INT32, int32_t)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Migration, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nMigrationTable_add_row(MigrationTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    double left, right, time;\n    tsk_id_t node, source, dest;\n    PyObject *py_metadata = Py_None;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[]\n        = { \"left\", \"right\", \"node\", \"source\", \"dest\", \"time\", \"metadata\", NULL };\n\n    if (MigrationTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"ddO&O&O&d|O\", kwlist, &left, &right,\n            &tsk_id_converter, &node, &tsk_id_converter, &source, &tsk_id_converter,\n            &dest, &time, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_migration_table_add_row(self->table, left, right, node, source, dest, time,\n        metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nMigrationTable_update_row(MigrationTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    double left, right, time;\n    tsk_id_t node, source, dest;\n    PyObject *py_metadata = Py_None;\n    char *metadata = \"\";\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[] = { \"row_index\", \"left\", \"right\", \"node\", \"source\", \"dest\",\n        \"time\", \"metadata\", NULL };\n\n    if (MigrationTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&ddO&O&O&d|O\", kwlist,\n            &tsk_id_converter, &row_index, &left, &right, &tsk_id_converter, &node,\n            &tsk_id_converter, &source, &tsk_id_converter, &dest, &time, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_migration_table_update_row(self->table, row_index, left, right, node,\n        source, dest, time, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef MigrationTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) MigrationTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) MigrationTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) MigrationTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"left\", .get = (getter) MigrationTable_get_left, .doc = \"The left array\" },\n    { .name = \"right\",\n        .get = (getter) MigrationTable_get_right,\n        .doc = \"The right array\" },\n    { .name = \"node\", .get = (getter) MigrationTable_get_node, .doc = \"The node array\" },\n    { .name = \"source\",\n        .get = (getter) MigrationTable_get_source,\n        .doc = \"The source array\" },\n    { .name = \"dest\", .get = (getter) MigrationTable_get_dest, .doc = \"The dest array\" },\n    { .name = \"time\", .get = (getter) MigrationTable_get_time, .doc = \"The time array\" },\n    { .name = \"metadata\",\n        .get = (getter) MigrationTable_get_metadata,\n        .doc = \"The metadata array\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) MigrationTable_get_metadata_offset,\n        .doc = \"The metadata offset array\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) MigrationTable_get_metadata_schema,\n        .set = (setter) MigrationTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef MigrationTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) MigrationTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) MigrationTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) MigrationTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns True if the specified MigrationTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) MigrationTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) MigrationTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) MigrationTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Appends the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) MigrationTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) MigrationTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) MigrationTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) MigrationTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject MigrationTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.MigrationTable\",\n    .tp_basicsize = sizeof(MigrationTable),\n    .tp_dealloc = (destructor) MigrationTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"MigrationTable objects\",\n    .tp_methods = MigrationTable_methods,\n    .tp_getset = MigrationTable_getsetters,\n    .tp_init = (initproc) MigrationTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * SiteTable\n *===================================================================\n */\n\nstatic PyTypeObject SiteTableType;\nDEFINE_TABLE_METHODS(Site, site, site, make_site_row(&site))\nDEFINE_TABLE_EQUALS(Site, site)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Site)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Site, site)\n\nDEFINE_TABLE_COLUMN_GETTER(Site, position, NPY_FLOAT64, double)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(\n    Site, ancestral_state, NPY_INT8, char, ancestral_state_length)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Site, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nSiteTable_add_row(SiteTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    double position;\n    char *ancestral_state = NULL;\n    Py_ssize_t ancestral_state_length = 0;\n    PyObject *py_metadata = Py_None;\n    char *metadata = NULL;\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[] = { \"position\", \"ancestral_state\", \"metadata\", NULL };\n\n    if (SiteTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"ds#|O\", kwlist, &position,\n            &ancestral_state, &ancestral_state_length, &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_site_table_add_row(self->table, position, ancestral_state,\n        (tsk_size_t) ancestral_state_length, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nSiteTable_update_row(SiteTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    double position;\n    char *ancestral_state = NULL;\n    Py_ssize_t ancestral_state_length = 0;\n    PyObject *py_metadata = Py_None;\n    char *metadata = NULL;\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[]\n        = { \"row_index\", \"position\", \"ancestral_state\", \"metadata\", NULL };\n\n    if (SiteTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&ds#|O\", kwlist, &tsk_id_converter,\n            &row_index, &position, &ancestral_state, &ancestral_state_length,\n            &py_metadata)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_site_table_update_row(self->table, row_index, position, ancestral_state,\n        (tsk_size_t) ancestral_state_length, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef SiteTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) SiteTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) SiteTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) SiteTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"position\",\n        .get = (getter) SiteTable_get_position,\n        .doc = \"The position array.\" },\n    { .name = \"ancestral_state\",\n        .get = (getter) SiteTable_get_ancestral_state,\n        .doc = \"The ancestral state array.\" },\n    { .name = \"ancestral_state_offset\",\n        .get = (getter) SiteTable_get_ancestral_state_offset,\n        .doc = \"The ancestral state offset array.\" },\n    { .name = \"metadata\",\n        .get = (getter) SiteTable_get_metadata,\n        .doc = \"The metadata array.\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) SiteTable_get_metadata_offset,\n        .doc = \"The metadata offset array.\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) SiteTable_get_metadata_schema,\n        .set = (setter) SiteTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef SiteTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) SiteTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) SiteTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) SiteTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns True if the specified SiteTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) SiteTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) SiteTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) SiteTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Appends the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) SiteTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) SiteTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) SiteTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) SiteTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject SiteTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.SiteTable\",\n    .tp_basicsize = sizeof(SiteTable),\n    .tp_dealloc = (destructor) SiteTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"SiteTable objects\",\n    .tp_methods = SiteTable_methods,\n    .tp_getset = SiteTable_getsetters,\n    .tp_init = (initproc) SiteTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * MutationTable\n *===================================================================\n */\n\nstatic PyTypeObject MutationTableType;\nDEFINE_TABLE_METHODS(Mutation, mutation, mutation, make_mutation_row(&mutation))\nDEFINE_TABLE_EQUALS(Mutation, mutation)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Mutation)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Mutation, mutation)\n\nDEFINE_TABLE_COLUMN_GETTER(Mutation, site, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Mutation, node, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Mutation, parent, NPY_INT32, int32_t)\nDEFINE_TABLE_COLUMN_GETTER(Mutation, time, NPY_FLOAT64, double)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(\n    Mutation, derived_state, NPY_INT8, char, derived_state_length)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Mutation, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nMutationTable_add_row(MutationTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t site, node;\n    tsk_id_t parent = TSK_NULL;\n    double time = TSK_UNKNOWN_TIME;\n    char *derived_state;\n    Py_ssize_t derived_state_length;\n    PyObject *py_metadata = Py_None;\n    char *metadata = NULL;\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[]\n        = { \"site\", \"node\", \"derived_state\", \"parent\", \"metadata\", \"time\", NULL };\n\n    if (MutationTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&O&s#|O&Od\", kwlist,\n            &tsk_id_converter, &site, &tsk_id_converter, &node, &derived_state,\n            &derived_state_length, &tsk_id_converter, &parent, &py_metadata, &time)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_mutation_table_add_row(self->table, site, node, parent, time,\n        derived_state, (tsk_size_t) derived_state_length, metadata,\n        (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nMutationTable_update_row(MutationTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index, site, node;\n    tsk_id_t parent = TSK_NULL;\n    double time = TSK_UNKNOWN_TIME;\n    char *derived_state;\n    Py_ssize_t derived_state_length;\n    PyObject *py_metadata = Py_None;\n    char *metadata = NULL;\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[] = { \"row_index\", \"site\", \"node\", \"derived_state\", \"parent\",\n        \"metadata\", \"time\", NULL };\n\n    if (MutationTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&O&O&s#|O&Od\", kwlist,\n            &tsk_id_converter, &row_index, &tsk_id_converter, &site, &tsk_id_converter,\n            &node, &derived_state, &derived_state_length, &tsk_id_converter, &parent,\n            &py_metadata, &time)) {\n        goto out;\n    }\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_mutation_table_update_row(self->table, row_index, site, node, parent, time,\n        derived_state, (tsk_size_t) derived_state_length, metadata,\n        (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef MutationTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) MutationTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) MutationTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) MutationTable_get_max_rows,\n        .doc = \"The curret maximum number of rows in the table.\" },\n    { .name = \"site\", .get = (getter) MutationTable_get_site, .doc = \"The site array\" },\n    { .name = \"node\", .get = (getter) MutationTable_get_node, .doc = \"The node array\" },\n    { .name = \"parent\",\n        .get = (getter) MutationTable_get_parent,\n        .doc = \"The parent array\" },\n    { .name = \"time\", .get = (getter) MutationTable_get_time, .doc = \"The time array\" },\n    { .name = \"derived_state\",\n        .get = (getter) MutationTable_get_derived_state,\n        .doc = \"The derived_state array\" },\n    { .name = \"derived_state_offset\",\n        .get = (getter) MutationTable_get_derived_state_offset,\n        .doc = \"The derived_state_offset array\" },\n    { .name = \"metadata\",\n        .get = (getter) MutationTable_get_metadata,\n        .doc = \"The metadata array\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) MutationTable_get_metadata_offset,\n        .doc = \"The metadata_offset array\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) MutationTable_get_metadata_schema,\n        .set = (setter) MutationTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef MutationTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) MutationTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) MutationTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) MutationTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns True if the specified MutationTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) MutationTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) MutationTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) MutationTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Appends the data in the specified  arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) MutationTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) MutationTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) MutationTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) MutationTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject MutationTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.MutationTable\",\n    .tp_basicsize = sizeof(MutationTable),\n    .tp_dealloc = (destructor) MutationTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"MutationTable objects\",\n    .tp_methods = MutationTable_methods,\n    .tp_getset = MutationTable_getsetters,\n    .tp_init = (initproc) MutationTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * PopulationTable\n *===================================================================\n */\n\nstatic PyTypeObject PopulationTableType;\nDEFINE_TABLE_METHODS(Population, population, population, make_population(&population))\nDEFINE_TABLE_EQUALS(Population, population)\nDEFINE_TABLE_METADATA_SCHEMA_GETTER(Population)\nDEFINE_TABLE_METADATA_SCHEMA_SETTER(Population, population)\n\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Population, metadata, NPY_INT8, char, metadata_length)\n\nstatic PyObject *\nPopulationTable_add_row(PopulationTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    PyObject *py_metadata = Py_None;\n    char *metadata = NULL;\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[] = { \"metadata\", NULL };\n\n    if (PopulationTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|O\", kwlist, &py_metadata)) {\n        goto out;\n    }\n\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_population_table_add_row(\n        self->table, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nPopulationTable_update_row(PopulationTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    PyObject *py_metadata = Py_None;\n    char *metadata = NULL;\n    Py_ssize_t metadata_length = 0;\n    static char *kwlist[] = { \"row_index\", \"metadata\", NULL };\n\n    if (PopulationTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O&|O\", kwlist, &tsk_id_converter, &row_index, &py_metadata)) {\n        goto out;\n    }\n\n    if (py_metadata != Py_None) {\n        if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n            goto out;\n        }\n    }\n    err = tsk_population_table_update_row(\n        self->table, row_index, metadata, (tsk_size_t) metadata_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef PopulationTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) PopulationTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) PopulationTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) PopulationTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"metadata\",\n        .get = (getter) PopulationTable_get_metadata,\n        .doc = \"The metadata array\" },\n    { .name = \"metadata_offset\",\n        .get = (getter) PopulationTable_get_metadata_offset,\n        .doc = \"The metadata offset array\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) PopulationTable_get_metadata_schema,\n        .set = (setter) PopulationTable_set_metadata_schema,\n        .doc = \"The metadata schema\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef PopulationTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) PopulationTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) PopulationTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) PopulationTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc\n        = \"Returns True if the specified PopulationTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) PopulationTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) PopulationTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Appends the data in the specified arrays into the columns.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) PopulationTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) PopulationTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) PopulationTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) PopulationTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) PopulationTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject PopulationTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.PopulationTable\",\n    .tp_basicsize = sizeof(PopulationTable),\n    .tp_dealloc = (destructor) PopulationTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"PopulationTable objects\",\n    .tp_methods = PopulationTable_methods,\n    .tp_getset = PopulationTable_getsetters,\n    .tp_init = (initproc) PopulationTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * ProvenanceTable\n *===================================================================\n */\n\nstatic PyTypeObject ProvenanceTableType;\nDEFINE_TABLE_METHODS(Provenance, provenance, provenance, make_provenance(&provenance))\n\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(\n    Provenance, timestamp, NPY_INT8, char, timestamp_length)\nDEFINE_TABLE_RAGGED_COLUMN_GETTER(Provenance, record, NPY_INT8, char, record_length)\n\nstatic PyObject *\nProvenanceTable_equals(ProvenanceTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    ProvenanceTable *other = NULL;\n    tsk_flags_t options = 0;\n    int ignore_timestamps = false;\n    static char *kwlist[] = { \"other\", \"ignore_timestamps\", NULL };\n\n    if (ProvenanceTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|i\", kwlist, &ProvenanceTableType,\n            &other, &ignore_timestamps)) {\n        goto out;\n    }\n    if (ProvenanceTable_check_state(other) != 0) {\n        goto out;\n    }\n    if (ignore_timestamps) {\n        options |= TSK_CMP_IGNORE_TIMESTAMPS;\n    }\n    ret = Py_BuildValue(\n        \"i\", tsk_provenance_table_equals(self->table, other->table, options));\nout:\n    return ret;\n}\n\nstatic PyObject *\nProvenanceTable_add_row(ProvenanceTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    char *timestamp = \"\";\n    Py_ssize_t timestamp_length = 0;\n    char *record = \"\";\n    Py_ssize_t record_length = 0;\n    static char *kwlist[] = { \"timestamp\", \"record\", NULL };\n\n    if (ProvenanceTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"s#s#\", kwlist, &timestamp,\n            &timestamp_length, &record, &record_length)) {\n        goto out;\n    }\n    err = tsk_provenance_table_add_row(self->table, timestamp,\n        (tsk_size_t) timestamp_length, record, (tsk_size_t) record_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err);\nout:\n    return ret;\n}\n\nstatic PyObject *\nProvenanceTable_update_row(ProvenanceTable *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t row_index = -1;\n    char *timestamp = \"\";\n    Py_ssize_t timestamp_length = 0;\n    char *record = \"\";\n    Py_ssize_t record_length = 0;\n    static char *kwlist[] = { \"row_index\", \"timestamp\", \"record\", NULL };\n\n    if (ProvenanceTable_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&s#s#\", kwlist, &tsk_id_converter,\n            &row_index, &timestamp, &timestamp_length, &record, &record_length)) {\n        goto out;\n    }\n    err = tsk_provenance_table_update_row(self->table, row_index, timestamp,\n        (tsk_size_t) timestamp_length, record, (tsk_size_t) record_length);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef ProvenanceTable_getsetters[] = {\n    { .name = \"max_rows_increment\",\n        .get = (getter) ProvenanceTable_get_max_rows_increment,\n        .doc = \"The size increment\" },\n    { .name = \"num_rows\",\n        .get = (getter) ProvenanceTable_get_num_rows,\n        .doc = \"The number of rows in the table.\" },\n    { .name = \"max_rows\",\n        .get = (getter) ProvenanceTable_get_max_rows,\n        .doc = \"The current maximum number of rows in the table.\" },\n    { .name = \"timestamp\",\n        .get = (getter) ProvenanceTable_get_timestamp,\n        .doc = \"The timestamp array\" },\n    { .name = \"timestamp_offset\",\n        .get = (getter) ProvenanceTable_get_timestamp_offset,\n        .doc = \"The timestamp offset array\" },\n    { .name = \"record\",\n        .get = (getter) ProvenanceTable_get_record,\n        .doc = \"The record array\" },\n    { .name = \"record_offset\",\n        .get = (getter) ProvenanceTable_get_record_offset,\n        .doc = \"The record offset array\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef ProvenanceTable_methods[] = {\n    { .ml_name = \"add_row\",\n        .ml_meth = (PyCFunction) ProvenanceTable_add_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Adds a new row to this table.\" },\n    { .ml_name = \"update_row\",\n        .ml_meth = (PyCFunction) ProvenanceTable_update_row,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Updates an existing row in this table.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) ProvenanceTable_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc\n        = \"Returns True if the specified ProvenanceTable is equal to this one.\" },\n    { .ml_name = \"get_row\",\n        .ml_meth = (PyCFunction) ProvenanceTable_get_row,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the kth row in this table.\" },\n    { .ml_name = \"append_columns\",\n        .ml_meth = (PyCFunction) ProvenanceTable_append_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Appends the data in the specified arrays into the columns.\" },\n    { .ml_name = \"set_columns\",\n        .ml_meth = (PyCFunction) ProvenanceTable_set_columns,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Copies the data in the specified arrays into the columns.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) ProvenanceTable_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Clears this table.\" },\n    { .ml_name = \"truncate\",\n        .ml_meth = (PyCFunction) ProvenanceTable_truncate,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Truncates this table to the specified number of rows.\" },\n    { .ml_name = \"extend\",\n        .ml_meth = (PyCFunction) ProvenanceTable_extend,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extend this table from another using specified row_indexes\" },\n    { .ml_name = \"keep_rows\",\n        .ml_meth = (PyCFunction) ProvenanceTable_keep_rows,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Keep rows in this table according to boolean array\" },\n\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject ProvenanceTableType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.ProvenanceTable\",\n    .tp_basicsize = sizeof(ProvenanceTable),\n    .tp_dealloc = (destructor) ProvenanceTable_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"ProvenanceTable objects\",\n    .tp_methods = ProvenanceTable_methods,\n    .tp_getset = ProvenanceTable_getsetters,\n    .tp_init = (initproc) ProvenanceTable_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * IdentitySegmentList\n *===================================================================\n */\n\nstatic int\nIdentitySegmentList_check_state(IdentitySegmentList *self)\n{\n    int ret = -1;\n    if (self->segment_list == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"IdentitySegmentList not initialised\");\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\nIdentitySegmentList_check_segments_stored(IdentitySegmentList *self)\n{\n    int ret = -1;\n    tsk_identity_segments_t *ibd_segs = self->identity_segments->identity_segments;\n    if (!ibd_segs->store_segments) {\n        handle_library_error(TSK_ERR_IBD_SEGMENTS_NOT_STORED);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nIdentitySegmentList_dealloc(IdentitySegmentList *self)\n{\n    /* The segment list memory is handled by the parent IdentitySegments object */\n    Py_XDECREF(self->identity_segments);\n    self->segment_list = NULL;\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nIdentitySegmentList_init(IdentitySegmentList *self, PyObject *args, PyObject *kwds)\n{\n    /* This object cannot be initialised from client code, and can only\n     * be created from the IdentitySegments_get method below, which sets up the\n     * correct pointers and handles the refcounting */\n    self->segment_list = NULL;\n    self->identity_segments = NULL;\n    return 0;\n}\n\nstatic PyObject *\nIdentitySegmentList_get_num_segments(IdentitySegmentList *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (IdentitySegmentList_check_state(self) != 0) {\n        goto out;\n    }\n\n    ret = Py_BuildValue(\"K\", (unsigned long long) self->segment_list->num_segments);\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegmentList_get_total_span(IdentitySegmentList *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (IdentitySegmentList_check_state(self) != 0) {\n        goto out;\n    }\n\n    ret = Py_BuildValue(\"d\", self->segment_list->total_span);\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegmentList_get_left(IdentitySegmentList *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *left_array = NULL;\n    double *left;\n    tsk_size_t seg_index;\n    tsk_identity_segment_t *u;\n    npy_intp num_segments;\n\n    if (IdentitySegmentList_check_state(self) != 0) {\n        goto out;\n    }\n    if (IdentitySegmentList_check_segments_stored(self) != 0) {\n        goto out;\n    }\n\n    num_segments = (npy_intp) self->segment_list->num_segments;\n    left_array = (PyArrayObject *) PyArray_SimpleNew(1, &num_segments, NPY_FLOAT64);\n    if (left_array == NULL) {\n        goto out;\n    }\n    left = (double *) PyArray_DATA(left_array);\n    seg_index = 0;\n    for (u = self->segment_list->head; u != NULL; u = u->next) {\n        left[seg_index] = u->left;\n        seg_index++;\n    }\n    ret = (PyObject *) left_array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegmentList_get_right(IdentitySegmentList *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *right_array = NULL;\n    double *right;\n    tsk_size_t seg_index;\n    tsk_identity_segment_t *u;\n    npy_intp num_segments;\n\n    if (IdentitySegmentList_check_state(self) != 0) {\n        goto out;\n    }\n    if (IdentitySegmentList_check_segments_stored(self) != 0) {\n        goto out;\n    }\n\n    num_segments = (npy_intp) self->segment_list->num_segments;\n    right_array = (PyArrayObject *) PyArray_SimpleNew(1, &num_segments, NPY_FLOAT64);\n    if (right_array == NULL) {\n        goto out;\n    }\n    right = (double *) PyArray_DATA(right_array);\n    seg_index = 0;\n    for (u = self->segment_list->head; u != NULL; u = u->next) {\n        right[seg_index] = u->right;\n        seg_index++;\n    }\n    ret = (PyObject *) right_array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegmentList_get_node(IdentitySegmentList *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *node_array = NULL;\n    int32_t *node;\n    tsk_size_t seg_index;\n    tsk_identity_segment_t *u;\n    npy_intp num_segments;\n\n    if (IdentitySegmentList_check_state(self) != 0) {\n        goto out;\n    }\n    if (IdentitySegmentList_check_segments_stored(self) != 0) {\n        goto out;\n    }\n\n    num_segments = (npy_intp) self->segment_list->num_segments;\n    node_array = (PyArrayObject *) PyArray_SimpleNew(1, &num_segments, NPY_INT32);\n    if (node_array == NULL) {\n        goto out;\n    }\n    node = (int32_t *) PyArray_DATA(node_array);\n    seg_index = 0;\n    for (u = self->segment_list->head; u != NULL; u = u->next) {\n        node[seg_index] = u->node;\n        seg_index++;\n    }\n    ret = (PyObject *) node_array;\nout:\n    return ret;\n}\n\nstatic PyMethodDef IdentitySegmentList_methods[] = {\n    { NULL } /* Sentinel */\n};\n\nstatic PyGetSetDef IdentitySegmentList_getsetters[] = {\n    { .name = \"num_segments\",\n        .get = (getter) IdentitySegmentList_get_num_segments,\n        .doc = \"The number of segments in this list\" },\n    { .name = \"total_span\",\n        .get = (getter) IdentitySegmentList_get_total_span,\n        .doc = \"The sequence length spanned by all segments\" },\n    { .name = \"left\",\n        .get = (getter) IdentitySegmentList_get_left,\n        .doc = \"A numpy array of the left coordinates of each segment.\" },\n    { .name = \"right\",\n        .get = (getter) IdentitySegmentList_get_right,\n        .doc = \"A numpy array of the right coordinates of each segment.\" },\n    { .name = \"node\",\n        .get = (getter) IdentitySegmentList_get_node,\n        .doc = \"A numpy array of the node of each segment.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject IdentitySegmentListType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.IdentitySegmentList\",\n    .tp_basicsize = sizeof(IdentitySegmentList),\n    .tp_dealloc = (destructor) IdentitySegmentList_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"A thin Python translation layer over the C tsk_segment_list_t struct\",\n    .tp_methods = IdentitySegmentList_methods,\n    .tp_getset = IdentitySegmentList_getsetters,\n    .tp_init = (initproc) IdentitySegmentList_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * IdentitySegments\n *===================================================================\n */\n\nstatic int\nIdentitySegments_check_state(IdentitySegments *self)\n{\n    int ret = -1;\n    if (self->identity_segments == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"IdentitySegments not initialised\");\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nIdentitySegments_dealloc(IdentitySegments *self)\n{\n    if (self->identity_segments != NULL) {\n        tsk_identity_segments_free(self->identity_segments);\n        PyMem_Free(self->identity_segments);\n        self->identity_segments = NULL;\n    }\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nIdentitySegments_init(IdentitySegments *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n\n    self->identity_segments = NULL;\n    self->identity_segments = PyMem_Calloc(1, sizeof(*self->identity_segments));\n    if (self->identity_segments == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegments_get(IdentitySegments *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    IdentitySegmentList *py_seglist = NULL;\n    int sample_a, sample_b;\n    tsk_identity_segment_list_t *seglist;\n    int err;\n\n    if (IdentitySegments_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"ii\", &sample_a, &sample_b)) {\n        goto out;\n    }\n    err = tsk_identity_segments_get(\n        self->identity_segments, (tsk_id_t) sample_a, (tsk_id_t) sample_b, &seglist);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    if (seglist == NULL) {\n        PyErr_SetString(PyExc_KeyError, \"Sample pair not found\");\n        goto out;\n    }\n    py_seglist = (IdentitySegmentList *) PyObject_CallObject(\n        (PyObject *) &IdentitySegmentListType, NULL);\n    if (py_seglist == NULL) {\n        goto out;\n    }\n    py_seglist->segment_list = seglist;\n    py_seglist->identity_segments = self;\n    /* The segment list uses a reference to this IdentitySegments to ensure its\n     * memory is valid, so increment our refcount here */\n    Py_INCREF(self);\n\n    ret = (PyObject *) py_seglist;\n    py_seglist = NULL;\nout:\n    Py_XDECREF(py_seglist);\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegments_get_keys(IdentitySegments *self)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *pairs_array = NULL;\n    npy_intp dims[2];\n    int err;\n\n    if (IdentitySegments_check_state(self) != 0) {\n        goto out;\n    }\n    dims[0] = tsk_identity_segments_get_num_pairs(self->identity_segments);\n    dims[1] = 2;\n    pairs_array = (PyArrayObject *) PyArray_SimpleNew(2, dims, NPY_INT32);\n    if (pairs_array == NULL) {\n        goto out;\n    }\n    err = tsk_identity_segments_get_keys(\n        self->identity_segments, (int32_t *) PyArray_DATA(pairs_array));\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) pairs_array;\n    pairs_array = NULL;\nout:\n    Py_XDECREF(pairs_array);\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegments_print_state(IdentitySegments *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    PyObject *fileobj;\n    FILE *file = NULL;\n\n    if (IdentitySegments_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"O\", &fileobj)) {\n        goto out;\n    }\n    file = make_file(fileobj, \"w\");\n    if (file == NULL) {\n        goto out;\n    }\n    tsk_identity_segments_print_state(self->identity_segments, file);\n    ret = Py_BuildValue(\"\");\nout:\n    if (file != NULL) {\n        (void) fclose(file);\n    }\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegments_get_num_segments(IdentitySegments *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (IdentitySegments_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"K\", (unsigned long long) tsk_identity_segments_get_num_segments(\n                                 self->identity_segments));\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegments_get_total_span(IdentitySegments *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (IdentitySegments_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"d\", tsk_identity_segments_get_total_span(self->identity_segments));\nout:\n    return ret;\n}\n\nstatic PyObject *\nIdentitySegments_get_num_pairs(IdentitySegments *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (IdentitySegments_check_state(self) != 0) {\n        goto out;\n    }\n    if (!self->identity_segments->store_pairs) {\n        handle_library_error(TSK_ERR_IBD_PAIRS_NOT_STORED);\n        goto out;\n    }\n    ret = Py_BuildValue(\"K\", (unsigned long long) tsk_identity_segments_get_num_pairs(\n                                 self->identity_segments));\nout:\n    return ret;\n}\n\nstatic PyMethodDef IdentitySegments_methods[] = {\n    { .ml_name = \"print_state\",\n        .ml_meth = (PyCFunction) IdentitySegments_print_state,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Debug method to print out the low-level state\" },\n    { .ml_name = \"get\",\n        .ml_meth = (PyCFunction) IdentitySegments_get,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Return a dictionary representing the IBD segments for a given pair\" },\n    { .ml_name = \"get_keys\",\n        .ml_meth = (PyCFunction) IdentitySegments_get_keys,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Return a (n, 2) dim numpy array of all the sample pairs.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyGetSetDef IdentitySegments_getsetters[] = {\n    { .name = \"num_segments\",\n        .get = (getter) IdentitySegments_get_num_segments,\n        .doc = \"The total number of segments in this IBD Result\" },\n    { .name = \"total_span\",\n        .get = (getter) IdentitySegments_get_total_span,\n        .doc = \"The sum of (right - left) across all segments\" },\n    { .name = \"num_pairs\",\n        .get = (getter) IdentitySegments_get_num_pairs,\n        .doc = \"The number of node pairs stored in the result\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject IdentitySegmentsType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.IdentitySegments\",\n    .tp_basicsize = sizeof(IdentitySegments),\n    .tp_dealloc = (destructor) IdentitySegments_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"IdentitySegments objects\",\n    .tp_methods = IdentitySegments_methods,\n    .tp_getset = IdentitySegments_getsetters,\n    .tp_init = (initproc) IdentitySegments_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * ReferenceSequence\n *===================================================================\n */\n\nstatic int\nReferenceSequence_check_read(ReferenceSequence *self)\n{\n    int ret = -1;\n    if (self->reference_sequence == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"ReferenceSequence not initialised\");\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\nReferenceSequence_check_write(ReferenceSequence *self)\n{\n    int ret = ReferenceSequence_check_read(self);\n\n    if (ret != 0) {\n        goto out;\n    }\n    if (self->read_only) {\n        PyErr_SetString(PyExc_AttributeError,\n            \"ReferenceSequence is read-only and can only be modified \"\n            \"in a TableCollection\");\n        ret = -1;\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nReferenceSequence_dealloc(ReferenceSequence *self)\n{\n    self->reference_sequence = NULL;\n    Py_XDECREF(self->owner);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nReferenceSequence_init(ReferenceSequence *self, PyObject *args, PyObject *kwds)\n{\n    self->reference_sequence = NULL;\n    self->owner = NULL;\n    self->read_only = true;\n    return 0;\n}\n\nstatic PyObject *\nReferenceSequence_get_data(ReferenceSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (ReferenceSequence_check_read(self) != 0) {\n        goto out;\n    }\n    /* This isn't zero-copy, so we'll possible want to return a\n     * numpy array wrapping this at some point */\n    ret = make_Py_Unicode_FromStringAndLength(\n        self->reference_sequence->data, self->reference_sequence->data_length);\nout:\n    return ret;\n}\n\ntypedef int(refseq_string_setter_func)(\n    tsk_reference_sequence_t *obj, const char *str, tsk_size_t len);\n\nstatic int\nReferenceSequence_set_string_attr(ReferenceSequence *self, PyObject *arg,\n    const char *attr_name, refseq_string_setter_func setter_func)\n{\n    int ret = -1;\n    int err;\n    const char *str;\n    Py_ssize_t length;\n\n    if (ReferenceSequence_check_write(self) != 0) {\n        goto out;\n    }\n    if (arg == NULL) {\n        PyErr_Format(\n            PyExc_AttributeError, \"Cannot del %s, set to None to clear.\", attr_name);\n        goto out;\n    }\n    if (!PyUnicode_Check(arg)) {\n        PyErr_Format(PyExc_TypeError, \"%s must be a string\", attr_name);\n        goto out;\n    }\n    str = PyUnicode_AsUTF8AndSize(arg, &length);\n    if (str == NULL) {\n        goto out;\n    }\n    err = setter_func(self->reference_sequence, str, (tsk_size_t) length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\nReferenceSequence_set_data(ReferenceSequence *self, PyObject *arg, void *closure)\n{\n    return ReferenceSequence_set_string_attr(\n        self, arg, \"data\", tsk_reference_sequence_set_data);\n}\n\nstatic PyObject *\nReferenceSequence_get_url(ReferenceSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (ReferenceSequence_check_read(self) != 0) {\n        goto out;\n    }\n    ret = make_Py_Unicode_FromStringAndLength(\n        self->reference_sequence->url, self->reference_sequence->url_length);\nout:\n    return ret;\n}\n\nstatic int\nReferenceSequence_set_url(ReferenceSequence *self, PyObject *arg, void *closure)\n{\n    return ReferenceSequence_set_string_attr(\n        self, arg, \"url\", tsk_reference_sequence_set_url);\n}\n\nstatic PyObject *\nReferenceSequence_get_metadata_schema(ReferenceSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (ReferenceSequence_check_read(self) != 0) {\n        goto out;\n    }\n    ret = make_Py_Unicode_FromStringAndLength(self->reference_sequence->metadata_schema,\n        self->reference_sequence->metadata_schema_length);\nout:\n    return ret;\n}\n\nstatic int\nReferenceSequence_set_metadata_schema(\n    ReferenceSequence *self, PyObject *arg, void *closure)\n{\n    return ReferenceSequence_set_string_attr(\n        self, arg, \"metadata_schema\", tsk_reference_sequence_set_metadata_schema);\n}\n\nstatic PyObject *\nReferenceSequence_get_metadata(ReferenceSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (ReferenceSequence_check_read(self) != 0) {\n        goto out;\n    }\n\n    ret = PyBytes_FromStringAndSize(\n        self->reference_sequence->metadata, self->reference_sequence->metadata_length);\nout:\n    return ret;\n}\n\nstatic int\nReferenceSequence_set_metadata(ReferenceSequence *self, PyObject *arg, void *closure)\n{\n    int ret = -1;\n    int err;\n    char *metadata;\n    Py_ssize_t metadata_length;\n\n    if (ReferenceSequence_check_write(self) != 0) {\n        goto out;\n    }\n    if (arg == NULL) {\n        PyErr_Format(PyExc_AttributeError,\n            \"Cannot del metadata, set to empty string (b\\\"\\\") to clear.\");\n        goto out;\n    }\n    err = PyBytes_AsStringAndSize(arg, &metadata, &metadata_length);\n    if (err != 0) {\n        goto out;\n    }\n    err = tsk_reference_sequence_set_metadata(\n        self->reference_sequence, metadata, metadata_length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nReferenceSequence_is_null(ReferenceSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (ReferenceSequence_check_read(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"i\", (int) tsk_reference_sequence_is_null(self->reference_sequence));\nout:\n    return ret;\n}\n\nstatic PyMethodDef ReferenceSequence_methods[] = {\n    { .ml_name = \"is_null\",\n        .ml_meth = (PyCFunction) ReferenceSequence_is_null,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns True if this is the null reference sequence .\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyGetSetDef ReferenceSequence_getsetters[] = {\n    { .name = \"data\",\n        .set = (setter) ReferenceSequence_set_data,\n        .get = (getter) ReferenceSequence_get_data,\n        .doc = \"The data string for this reference sequence. \" },\n    { .name = \"url\",\n        .set = (setter) ReferenceSequence_set_url,\n        .get = (getter) ReferenceSequence_get_url,\n        .doc = \"The url string for this reference sequence. \" },\n    { .name = \"metadata_schema\",\n        .set = (setter) ReferenceSequence_set_metadata_schema,\n        .get = (getter) ReferenceSequence_get_metadata_schema,\n        .doc = \"The metadata_schema string for this reference sequence. \" },\n    { .name = \"metadata\",\n        .set = (setter) ReferenceSequence_set_metadata,\n        .get = (getter) ReferenceSequence_get_metadata,\n        .doc = \"The metadata string for this reference sequence. \" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject ReferenceSequenceType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.ReferenceSequence\",\n    .tp_basicsize = sizeof(ReferenceSequence),\n    .tp_dealloc = (destructor) ReferenceSequence_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"A thin Python translation layer over the C tsk_reference_sequence_t struct\",\n    .tp_methods = ReferenceSequence_methods,\n    .tp_getset = ReferenceSequence_getsetters,\n    .tp_init = (initproc) ReferenceSequence_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\nstatic PyObject *\nReferenceSequence_get_new(\n    tsk_reference_sequence_t *refseq, PyObject *owner, bool read_only)\n{\n\n    PyObject *ret = NULL;\n    ReferenceSequence *py_refseq = NULL;\n\n    py_refseq = (ReferenceSequence *) PyObject_CallObject(\n        (PyObject *) &ReferenceSequenceType, NULL);\n    if (py_refseq == NULL) {\n        goto out;\n    }\n    py_refseq->reference_sequence = refseq;\n    py_refseq->owner = owner;\n    py_refseq->read_only = read_only;\n    /* We increment the reference on the owner */\n    Py_INCREF(owner);\n\n    ret = (PyObject *) py_refseq;\n    py_refseq = NULL;\nout:\n    Py_XDECREF(py_refseq);\n    return ret;\n}\n\n/*===================================================================\n * TableCollection\n *===================================================================\n */\n\nstatic int\nTableCollection_check_state(TableCollection *self)\n{\n    int ret = 0;\n    if (self->tables == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"TableCollection not initialised\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic int\nTableCollection_alloc(TableCollection *self)\n{\n    int ret = -1;\n\n    if (self->tables != NULL) {\n        tsk_table_collection_free(self->tables);\n        PyMem_Free(self->tables);\n    }\n    self->tables = PyMem_Malloc(sizeof(tsk_table_collection_t));\n    if (self->tables == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    memset(self->tables, 0, sizeof(*self->tables));\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nTableCollection_dealloc(TableCollection *self)\n{\n    if (self->tables != NULL) {\n        tsk_table_collection_free(self->tables);\n        PyMem_Free(self->tables);\n        self->tables = NULL;\n    }\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nTableCollection_init(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"sequence_length\", NULL };\n    double sequence_length = -1;\n\n    self->tables = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|d\", kwlist, &sequence_length)) {\n        goto out;\n    }\n\n    self->tables = PyMem_Malloc(sizeof(tsk_table_collection_t));\n    if (self->tables == NULL) {\n        PyErr_NoMemory();\n    }\n    err = tsk_table_collection_init(self->tables, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    self->tables->sequence_length = sequence_length;\n    ret = 0;\nout:\n    return ret;\n}\n\n/* The getters for each of the tables returns a new reference which we\n * set up here. These references use a pointer to the table stored in\n * the table collection, so to guard against this memory getting freed\n * we the Python Table classes keep a reference to the TableCollection\n * and INCREF it. We don't keep permanent references to the Table classes\n * in the TableCollection as this gives a circular references which would\n * require implementing support for cyclic garbage collection.\n */\n\nstatic PyObject *\nTableCollection_get_individuals(TableCollection *self, void *closure)\n{\n    IndividualTable *individuals = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    individuals = PyObject_New(IndividualTable, &IndividualTableType);\n    if (individuals == NULL) {\n        goto out;\n    }\n    individuals->table = &self->tables->individuals;\n    individuals->locked = false;\n    individuals->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) individuals;\n}\n\nstatic PyObject *\nTableCollection_get_nodes(TableCollection *self, void *closure)\n{\n    NodeTable *nodes = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    nodes = PyObject_New(NodeTable, &NodeTableType);\n    if (nodes == NULL) {\n        goto out;\n    }\n    nodes->table = &self->tables->nodes;\n    nodes->locked = false;\n    nodes->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) nodes;\n}\n\nstatic PyObject *\nTableCollection_get_edges(TableCollection *self, void *closure)\n{\n    EdgeTable *edges = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    edges = PyObject_New(EdgeTable, &EdgeTableType);\n    if (edges == NULL) {\n        goto out;\n    }\n    edges->table = &self->tables->edges;\n    edges->locked = false;\n    edges->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) edges;\n}\n\nstatic PyObject *\nTableCollection_get_migrations(TableCollection *self, void *closure)\n{\n    MigrationTable *migrations = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    migrations = PyObject_New(MigrationTable, &MigrationTableType);\n    if (migrations == NULL) {\n        goto out;\n    }\n    migrations->table = &self->tables->migrations;\n    migrations->locked = false;\n    migrations->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) migrations;\n}\n\nstatic PyObject *\nTableCollection_get_sites(TableCollection *self, void *closure)\n{\n    SiteTable *sites = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    sites = PyObject_New(SiteTable, &SiteTableType);\n    if (sites == NULL) {\n        goto out;\n    }\n    sites->table = &self->tables->sites;\n    sites->locked = false;\n    sites->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) sites;\n}\n\nstatic PyObject *\nTableCollection_get_mutations(TableCollection *self, void *closure)\n{\n    MutationTable *mutations = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    mutations = PyObject_New(MutationTable, &MutationTableType);\n    if (mutations == NULL) {\n        goto out;\n    }\n    mutations->table = &self->tables->mutations;\n    mutations->locked = false;\n    mutations->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) mutations;\n}\n\nstatic PyObject *\nTableCollection_get_populations(TableCollection *self, void *closure)\n{\n    PopulationTable *populations = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    populations = PyObject_New(PopulationTable, &PopulationTableType);\n    if (populations == NULL) {\n        goto out;\n    }\n    populations->table = &self->tables->populations;\n    populations->locked = false;\n    populations->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) populations;\n}\n\nstatic PyObject *\nTableCollection_get_provenances(TableCollection *self, void *closure)\n{\n    ProvenanceTable *provenances = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    provenances = PyObject_New(ProvenanceTable, &ProvenanceTableType);\n    if (provenances == NULL) {\n        goto out;\n    }\n    provenances->table = &self->tables->provenances;\n    provenances->locked = false;\n    provenances->tables = self;\n    Py_INCREF(self);\nout:\n    return (PyObject *) provenances;\n}\n\nstatic PyObject *\nTableCollection_get_sequence_length(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"f\", self->tables->sequence_length);\nout:\n    return ret;\n}\n\nstatic int\nTableCollection_set_sequence_length(\n    TableCollection *self, PyObject *value, void *closure)\n{\n    int ret = -1;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (value == NULL) {\n        PyErr_SetString(PyExc_TypeError, \"Cannot delete the sequence_length attribute\");\n        goto out;\n    }\n    if (!PyNumber_Check(value)) {\n        PyErr_SetString(PyExc_TypeError, \"sequence_length must be a number\");\n        goto out;\n    }\n    self->tables->sequence_length = PyFloat_AsDouble(value);\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_get_file_uuid(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"s\", self->tables->file_uuid);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_get_time_units(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = make_Py_Unicode_FromStringAndLength(\n        self->tables->time_units, self->tables->time_units_length);\nout:\n    return ret;\n}\n\nstatic int\nTableCollection_set_time_units(TableCollection *self, PyObject *arg, void *closure)\n{\n    int ret = -1;\n    int err;\n    const char *time_units;\n    Py_ssize_t time_units_length;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    time_units = parse_unicode_arg(arg, &time_units_length);\n    if (time_units == NULL) {\n        goto out;\n    }\n    err = tsk_table_collection_set_time_units(\n        self->tables, time_units, time_units_length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_get_metadata(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = PyBytes_FromStringAndSize(\n        self->tables->metadata, self->tables->metadata_length);\nout:\n    return ret;\n}\n\nstatic int\nTableCollection_set_metadata(TableCollection *self, PyObject *arg, void *closure)\n{\n    int ret = -1;\n    int err;\n    char *metadata;\n    Py_ssize_t metadata_length;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (arg == NULL) {\n        PyErr_Format(PyExc_AttributeError,\n            \"Cannot del metadata, set to empty string (b\\\"\\\") to clear.\");\n        goto out;\n    }\n    err = PyBytes_AsStringAndSize(arg, &metadata, &metadata_length);\n    if (err != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_set_metadata(self->tables, metadata, metadata_length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_get_metadata_schema(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = make_Py_Unicode_FromStringAndLength(\n        self->tables->metadata_schema, self->tables->metadata_schema_length);\nout:\n    return ret;\n}\n\nstatic int\nTableCollection_set_metadata_schema(TableCollection *self, PyObject *arg, void *closure)\n{\n    int ret = -1;\n    int err;\n    const char *metadata_schema;\n    Py_ssize_t metadata_schema_length;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    metadata_schema = parse_unicode_arg(arg, &metadata_schema_length);\n    if (metadata_schema == NULL) {\n        goto out;\n    }\n    err = tsk_table_collection_set_metadata_schema(\n        self->tables, metadata_schema, metadata_schema_length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_get_reference_sequence(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = ReferenceSequence_get_new(\n        &self->tables->reference_sequence, (PyObject *) self, false);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_simplify(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *samples = NULL;\n    PyArrayObject *samples_array = NULL;\n    PyArrayObject *node_map_array = NULL;\n    npy_intp *shape, dims;\n    tsk_size_t num_samples;\n    tsk_flags_t options = 0;\n    int filter_sites = false;\n    int filter_individuals = false;\n    int filter_populations = false;\n    int filter_nodes = true;\n    int update_sample_flags = true;\n    int keep_unary = false;\n    int keep_unary_in_individuals = false;\n    int keep_input_roots = false;\n    int reduce_to_site_topology = false;\n    static char *kwlist[]\n        = { \"samples\", \"filter_sites\", \"filter_populations\", \"filter_individuals\",\n              \"filter_nodes\", \"update_sample_flags\", \"reduce_to_site_topology\",\n              \"keep_unary\", \"keep_unary_in_individuals\", \"keep_input_roots\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O|iiiiiiiii\", kwlist, &samples,\n            &filter_sites, &filter_populations, &filter_individuals, &filter_nodes,\n            &update_sample_flags, &reduce_to_site_topology, &keep_unary,\n            &keep_unary_in_individuals, &keep_input_roots)) {\n        goto out;\n    }\n    samples_array = (PyArrayObject *) PyArray_FROMANY(\n        samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (samples_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(samples_array);\n    num_samples = (tsk_size_t) shape[0];\n    if (filter_sites) {\n        options |= TSK_SIMPLIFY_FILTER_SITES;\n    }\n    if (filter_individuals) {\n        options |= TSK_SIMPLIFY_FILTER_INDIVIDUALS;\n    }\n    if (filter_populations) {\n        options |= TSK_SIMPLIFY_FILTER_POPULATIONS;\n    }\n    if (!filter_nodes) {\n        options |= TSK_SIMPLIFY_NO_FILTER_NODES;\n    }\n    if (!update_sample_flags) {\n        options |= TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS;\n    }\n    if (reduce_to_site_topology) {\n        options |= TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY;\n    }\n    if (keep_unary) {\n        options |= TSK_SIMPLIFY_KEEP_UNARY;\n    }\n    if (keep_unary_in_individuals) {\n        options |= TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS;\n    }\n    if (keep_input_roots) {\n        options |= TSK_SIMPLIFY_KEEP_INPUT_ROOTS;\n    }\n\n    /* Allocate a new array to hold the node map. */\n    dims = self->tables->nodes.num_rows;\n    node_map_array = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32);\n    if (node_map_array == NULL) {\n        goto out;\n    }\n    err = tsk_table_collection_simplify(self->tables, PyArray_DATA(samples_array),\n        num_samples, options, PyArray_DATA(node_map_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) node_map_array;\n    node_map_array = NULL;\nout:\n    Py_XDECREF(samples_array);\n    Py_XDECREF(node_map_array);\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_link_ancestors(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *samples = NULL;\n    PyObject *ancestors = NULL;\n    PyArrayObject *samples_array = NULL;\n    PyArrayObject *ancestors_array = NULL;\n    npy_intp *shape;\n    tsk_size_t num_samples, num_ancestors;\n    static char *kwlist[] = { \"samples\", \"ancestors\", NULL };\n    EdgeTable *result = NULL;\n    PyObject *result_args = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OO\", kwlist, &samples, &ancestors)) {\n        goto out;\n    }\n\n    samples_array = (PyArrayObject *) PyArray_FROMANY(\n        samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (samples_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(samples_array);\n    num_samples = (tsk_size_t) shape[0];\n\n    ancestors_array = (PyArrayObject *) PyArray_FROMANY(\n        ancestors, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (ancestors_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(ancestors_array);\n    num_ancestors = (tsk_size_t) shape[0];\n\n    result_args = PyTuple_New(0);\n    if (result_args == NULL) {\n        goto out;\n    }\n    result = (EdgeTable *) PyObject_CallObject((PyObject *) &EdgeTableType, result_args);\n    if (result == NULL) {\n        goto out;\n    }\n    err = tsk_table_collection_link_ancestors(self->tables, PyArray_DATA(samples_array),\n        num_samples, PyArray_DATA(ancestors_array), num_ancestors, 0, result->table);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result;\n    result = NULL;\nout:\n    Py_XDECREF(samples_array);\n    Py_XDECREF(ancestors_array);\n    Py_XDECREF(result);\n    Py_XDECREF(result_args);\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_subset(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *nodes = NULL;\n    PyArrayObject *nodes_array = NULL;\n    npy_intp *shape;\n    tsk_flags_t options = 0;\n    int reorder_populations = true;\n    int remove_unreferenced = true;\n    tsk_size_t num_nodes;\n    static char *kwlist[]\n        = { \"nodes\", \"reorder_populations\", \"remove_unreferenced\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O|ii\", kwlist, &nodes,\n            &reorder_populations, &remove_unreferenced)) {\n        goto out;\n    }\n    nodes_array\n        = (PyArrayObject *) PyArray_FROMANY(nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (nodes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(nodes_array);\n    num_nodes = (tsk_size_t) shape[0];\n    if (!reorder_populations) {\n        options |= TSK_SUBSET_NO_CHANGE_POPULATIONS;\n    }\n    if (!remove_unreferenced) {\n        options |= TSK_SUBSET_KEEP_UNREFERENCED;\n    }\n\n    err = tsk_table_collection_subset(\n        self->tables, PyArray_DATA(nodes_array), num_nodes, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    Py_XDECREF(nodes_array);\n    return ret;\n}\n\n/* Forward declaration */\nstatic PyTypeObject TableCollectionType;\n\nstatic PyObject *\nTableCollection_union(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    TableCollection *other = NULL;\n    PyObject *ret = NULL;\n    PyObject *other_node_mapping = NULL;\n    PyArrayObject *nmap_array = NULL;\n    npy_intp *shape;\n    tsk_flags_t options = 0;\n    int check_shared = true;\n    int all_edges = false;\n    int all_mutations = false;\n    int add_populations = true;\n    static char *kwlist[] = { \"other\", \"other_node_mapping\", \"check_shared_equality\",\n        \"add_populations\", \"all_edges\", \"all_mutations\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!O|iiii\", kwlist,\n            &TableCollectionType, &other, &other_node_mapping, &check_shared,\n            &add_populations, &all_edges, &all_mutations)) {\n        goto out;\n    }\n    nmap_array = (PyArrayObject *) PyArray_FROMANY(\n        other_node_mapping, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (nmap_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(nmap_array);\n    if (other->tables->nodes.num_rows != (tsk_size_t) shape[0]) {\n        PyErr_SetString(PyExc_ValueError,\n            \"The length of the node mapping array should be equal to the\"\n            \" number of nodes in the other tree sequence.\");\n        goto out;\n    }\n    if (all_edges) {\n        options |= TSK_UNION_ALL_EDGES;\n    }\n    if (all_mutations) {\n        options |= TSK_UNION_ALL_MUTATIONS;\n    }\n    if (!check_shared) {\n        options |= TSK_UNION_NO_CHECK_SHARED;\n    }\n    if (!add_populations) {\n        options |= TSK_UNION_NO_ADD_POP;\n    }\n    err = tsk_table_collection_union(\n        self->tables, other->tables, PyArray_DATA(nmap_array), options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    Py_XDECREF(nmap_array);\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_ibd_segments_within(\n    TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *py_samples = Py_None;\n    IdentitySegments *result = NULL;\n    PyArrayObject *samples_array = NULL;\n    int32_t *samples = NULL;\n    tsk_size_t num_samples = 0;\n    double min_span = 0;\n    double max_time = DBL_MAX;\n    int store_pairs = 0;\n    int store_segments = 0;\n    npy_intp *shape;\n    static char *kwlist[]\n        = { \"samples\", \"min_span\", \"max_time\", \"store_pairs\", \"store_segments\", NULL };\n    tsk_flags_t options = 0;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|Oddii\", kwlist, &py_samples,\n            &min_span, &max_time, &store_pairs, &store_segments)) {\n        goto out;\n    }\n    if (py_samples != Py_None) {\n        samples_array = (PyArrayObject *) PyArray_FROMANY(\n            py_samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n        if (samples_array == NULL) {\n            goto out;\n        }\n        shape = PyArray_DIMS(samples_array);\n        samples = PyArray_DATA(samples_array);\n        num_samples = (tsk_size_t) shape[0];\n    }\n    result = (IdentitySegments *) PyObject_CallObject(\n        (PyObject *) &IdentitySegmentsType, NULL);\n    if (result == NULL) {\n        goto out;\n    }\n    options = 0;\n    if (store_pairs) {\n        options |= TSK_IBD_STORE_PAIRS;\n    }\n    if (store_segments) {\n        options |= TSK_IBD_STORE_SEGMENTS;\n    }\n\n    err = tsk_table_collection_ibd_within(self->tables, result->identity_segments,\n        samples, num_samples, min_span, max_time, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result;\n    result = NULL;\nout:\n    Py_XDECREF(samples_array);\n    Py_XDECREF(result);\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_ibd_segments_between(\n    TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *sample_sets = NULL;\n    PyObject *sample_set_sizes = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    IdentitySegments *result = NULL;\n    tsk_size_t num_sample_sets;\n    double min_span = 0;\n    double max_time = DBL_MAX;\n    int store_pairs = 0;\n    int store_segments = 0;\n    static char *kwlist[] = { \"sample_set_sizes\", \"sample_sets\", \"min_span\", \"max_time\",\n        \"store_pairs\", \"store_segments\", NULL };\n    tsk_flags_t options = 0;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OO|ddii\", kwlist, &sample_set_sizes,\n            &sample_sets, &min_span, &max_time, &store_pairs, &store_segments)) {\n        goto out;\n    }\n    if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    result = (IdentitySegments *) PyObject_CallObject(\n        (PyObject *) &IdentitySegmentsType, NULL);\n    if (result == NULL) {\n        goto out;\n    }\n    options = 0;\n    if (store_pairs) {\n        options |= TSK_IBD_STORE_PAIRS;\n    }\n    if (store_segments) {\n        options |= TSK_IBD_STORE_SEGMENTS;\n    }\n\n    err = tsk_table_collection_ibd_between(self->tables, result->identity_segments,\n        num_sample_sets, (tsk_size_t *) PyArray_DATA(sample_set_sizes_array),\n        (tsk_id_t *) PyArray_DATA(sample_sets_array), min_span, max_time, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result;\n    result = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(result);\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_sort(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t edge_start = 0;\n    Py_ssize_t site_start = 0;\n    Py_ssize_t mutation_start = 0;\n    tsk_bookmark_t start;\n    static char *kwlist[] = { \"edge_start\", \"site_start\", \"mutation_start\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"|nnn\", kwlist, &edge_start, &site_start, &mutation_start)) {\n        goto out;\n    }\n    memset(&start, 0, sizeof(start));\n    start.edges = (tsk_size_t) edge_start;\n    start.sites = (tsk_size_t) site_start;\n    start.mutations = (tsk_size_t) mutation_start;\n    err = tsk_table_collection_sort(self->tables, &start, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_sort_individuals(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n\n    err = tsk_table_collection_individual_topological_sort(self->tables, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_canonicalise(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    tsk_flags_t options = 0;\n    int remove_unreferenced = true;\n    static char *kwlist[] = { \"remove_unreferenced\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|i\", kwlist, &remove_unreferenced)) {\n        goto out;\n    }\n    if (!remove_unreferenced) {\n        options |= TSK_SUBSET_KEEP_UNREFERENCED;\n    }\n\n    err = tsk_table_collection_canonicalise(self->tables, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_delete_older(TableCollection *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int err;\n    double time;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"d\", &time)) {\n        goto out;\n    }\n    err = tsk_table_collection_delete_older(self->tables, time, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_compute_mutation_parents(TableCollection *self)\n{\n    int err;\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_compute_mutation_parents(self->tables, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_compute_mutation_times(TableCollection *self)\n{\n    int err;\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_compute_mutation_times(self->tables, NULL, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_deduplicate_sites(TableCollection *self)\n{\n    int err;\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_deduplicate_sites(self->tables, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_build_index(TableCollection *self)\n{\n    int err;\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_build_index(self->tables, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_drop_index(TableCollection *self)\n{\n    int err;\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_drop_index(self->tables, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_get_indexes(TableCollection *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyObject *indexes_dict = NULL;\n    PyObject *insertion = NULL;\n    PyObject *removal = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n\n    indexes_dict = PyDict_New();\n    if (indexes_dict == NULL) {\n        goto out;\n    }\n\n    if (tsk_table_collection_has_index(self->tables, 0)) {\n        insertion = table_get_column_array(self->tables->indexes.num_edges,\n            self->tables->indexes.edge_insertion_order, NPY_INT32, sizeof(tsk_id_t));\n        if (insertion == NULL) {\n            goto out;\n        }\n        removal = table_get_column_array(self->tables->indexes.num_edges,\n            self->tables->indexes.edge_removal_order, NPY_INT32, sizeof(tsk_id_t));\n        if (removal == NULL) {\n            goto out;\n        }\n\n        if (PyDict_SetItemString(indexes_dict, \"edge_insertion_order\", insertion) != 0) {\n            goto out;\n        }\n        if (PyDict_SetItemString(indexes_dict, \"edge_removal_order\", removal) != 0) {\n            goto out;\n        }\n    }\n\n    ret = indexes_dict;\n    indexes_dict = NULL;\nout:\n    Py_XDECREF(indexes_dict);\n    Py_XDECREF(insertion);\n    Py_XDECREF(removal);\n    return ret;\n}\n\nstatic int\nTableCollection_set_indexes(TableCollection *self, PyObject *arg, void *closure)\n{\n    int err;\n    int ret = -1;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n\n    err = parse_indexes_dict(self->tables, arg);\n    if (err != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_has_reference_sequence(TableCollection *self)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"i\", (int) tsk_table_collection_has_reference_sequence(self->tables));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_has_index(TableCollection *self)\n{\n    PyObject *ret = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    bool has_index = tsk_table_collection_has_index(self->tables, 0);\n    ret = Py_BuildValue(\"i\", (int) has_index);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_equals(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    TableCollection *other = NULL;\n    tsk_flags_t options = 0;\n    int ignore_metadata = false;\n    int ignore_ts_metadata = false;\n    int ignore_provenance = false;\n    int ignore_timestamps = true;\n    int ignore_tables = false;\n    int ignore_reference_sequence = false;\n    static char *kwlist[]\n        = { \"other\", \"ignore_metadata\", \"ignore_ts_metadata\", \"ignore_provenance\",\n              \"ignore_timestamps\", \"ignore_tables\", \"ignore_reference_sequence\", NULL };\n\n    if (TableCollection_check_state(self)) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|iiiiii\", kwlist,\n            &TableCollectionType, &other, &ignore_metadata, &ignore_ts_metadata,\n            &ignore_provenance, &ignore_timestamps, &ignore_tables,\n            &ignore_reference_sequence)) {\n        goto out;\n    }\n    if (ignore_metadata) {\n        options |= TSK_CMP_IGNORE_METADATA;\n    }\n    if (ignore_ts_metadata) {\n        options |= TSK_CMP_IGNORE_TS_METADATA;\n    }\n    if (ignore_provenance) {\n        options |= TSK_CMP_IGNORE_PROVENANCE;\n    }\n    if (ignore_timestamps) {\n        options |= TSK_CMP_IGNORE_TIMESTAMPS;\n    }\n    if (ignore_tables) {\n        options |= TSK_CMP_IGNORE_TABLES;\n    }\n    if (ignore_reference_sequence) {\n        options |= TSK_CMP_IGNORE_REFERENCE_SEQUENCE;\n    }\n    if (TableCollection_check_state(other) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"i\", tsk_table_collection_equals(self->tables, other->tables, options));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_clear(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    tsk_flags_t options = 0;\n    int clear_provenance = false;\n    int clear_metadata_schemas = false;\n    int clear_ts_metadata = false;\n    static char *kwlist[] = { \"clear_provenance\", \"clear_metadata_schemas\",\n        \"clear_ts_metadata_and_schema\", NULL };\n\n    if (TableCollection_check_state(self)) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|iii\", kwlist, &clear_provenance,\n            &clear_metadata_schemas, &clear_ts_metadata)) {\n        goto out;\n    }\n    if (clear_provenance) {\n        options |= TSK_CLEAR_PROVENANCE;\n    }\n    if (clear_metadata_schemas) {\n        options |= TSK_CLEAR_METADATA_SCHEMAS;\n    }\n    if (clear_ts_metadata) {\n        options |= TSK_CLEAR_TS_METADATA_AND_SCHEMA;\n    }\n\n    err = tsk_table_collection_clear(self->tables, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_dump(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    FILE *file = NULL;\n    PyObject *py_file = NULL;\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"file\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O\", kwlist, &py_file)) {\n        goto out;\n    }\n\n    file = make_file(py_file, \"wb\");\n    if (file == NULL) {\n        goto out;\n    }\n\n    err = tsk_table_collection_dumpf(self->tables, file, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    if (file != NULL) {\n        (void) fclose(file);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_load(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *py_file;\n    FILE *file = NULL;\n    tsk_flags_t options = 0;\n    int skip_tables = false;\n    int skip_reference_sequence = false;\n    static char *kwlist[] = { \"file\", \"skip_tables\", \"skip_reference_sequence\", NULL };\n\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O|ii\", kwlist, &py_file, &skip_tables,\n            &skip_reference_sequence)) {\n        goto out;\n    }\n    if (skip_tables) {\n        options |= TSK_LOAD_SKIP_TABLES;\n    }\n    if (skip_reference_sequence) {\n        options |= TSK_LOAD_SKIP_REFERENCE_SEQUENCE;\n    }\n    file = make_file(py_file, \"rb\");\n    if (file == NULL) {\n        goto out;\n    }\n    /* Set unbuffered mode to ensure no more bytes are read than requested.\n     * Buffered reads could read beyond the end of the current store in a\n     * multi-store file or stream. This data would be discarded when we\n     * fclose() the file below, such that attempts to load the next store\n     * will fail. */\n    if (setvbuf(file, NULL, _IONBF, 0) != 0) {\n        PyErr_SetFromErrno(PyExc_OSError);\n        goto out;\n    }\n    err = TableCollection_alloc(self);\n    if (err != 0) {\n        goto out;\n    }\n    err = tsk_table_collection_loadf(self->tables, file, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    if (file != NULL) {\n        (void) fclose(file);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_asdict(TableCollection *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    int force_offset_64 = 0;\n    static char *kwlist[] = { \"force_offset_64\", NULL };\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|i\", kwlist, &force_offset_64)) {\n        goto out;\n    }\n    /* Use the LWT tables code */\n    ret = dump_tables_dict(self->tables, force_offset_64);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTableCollection_fromdict(TableCollection *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    PyObject *dict = NULL;\n\n    if (TableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"O!\", &PyDict_Type, &dict)) {\n        goto out;\n    }\n    /* Use the LWT tables code */\n    if (parse_table_collection_dict(self->tables, dict) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyGetSetDef TableCollection_getsetters[] = {\n    { .name = \"individuals\",\n        .get = (getter) TableCollection_get_individuals,\n        .doc = \"The individual table.\" },\n    { .name = \"nodes\",\n        .get = (getter) TableCollection_get_nodes,\n        .doc = \"The node table.\" },\n    { .name = \"edges\",\n        .get = (getter) TableCollection_get_edges,\n        .doc = \"The edge table.\" },\n    { .name = \"migrations\",\n        .get = (getter) TableCollection_get_migrations,\n        .doc = \"The migration table.\" },\n    { .name = \"sites\",\n        .get = (getter) TableCollection_get_sites,\n        .doc = \"The site table.\" },\n    { .name = \"mutations\",\n        .get = (getter) TableCollection_get_mutations,\n        .doc = \"The mutation table.\" },\n    { .name = \"populations\",\n        .get = (getter) TableCollection_get_populations,\n        .doc = \"The population table.\" },\n    { .name = \"provenances\",\n        .get = (getter) TableCollection_get_provenances,\n        .doc = \"The provenance table.\" },\n    { .name = \"indexes\",\n        .get = (getter) TableCollection_get_indexes,\n        .set = (setter) TableCollection_set_indexes,\n        .doc = \"The indexes.\" },\n    { .name = \"sequence_length\",\n        .get = (getter) TableCollection_get_sequence_length,\n        .set = (setter) TableCollection_set_sequence_length,\n        .doc = \"The sequence length.\" },\n    { .name = \"file_uuid\",\n        .get = (getter) TableCollection_get_file_uuid,\n        .doc = \"The UUID of the corresponding file.\" },\n    { .name = \"time_units\",\n        .get = (getter) TableCollection_get_time_units,\n        .set = (setter) TableCollection_set_time_units,\n        .doc = \"The time_units.\" },\n    { .name = \"metadata\",\n        .get = (getter) TableCollection_get_metadata,\n        .set = (setter) TableCollection_set_metadata,\n        .doc = \"The metadata.\" },\n    { .name = \"metadata_schema\",\n        .get = (getter) TableCollection_get_metadata_schema,\n        .set = (setter) TableCollection_set_metadata_schema,\n        .doc = \"The metadata schema.\" },\n    { .name = \"reference_sequence\",\n        .get = (getter) TableCollection_get_reference_sequence,\n        .doc = \"The reference sequence.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef TableCollection_methods[] = {\n    { .ml_name = \"simplify\",\n        .ml_meth = (PyCFunction) TableCollection_simplify,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Simplifies for a given sample subset.\" },\n    { .ml_name = \"link_ancestors\",\n        .ml_meth = (PyCFunction) TableCollection_link_ancestors,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc\n        = \"Returns an edge table linking samples to a set of specified ancestors.\" },\n    { .ml_name = \"subset\",\n        .ml_meth = (PyCFunction) TableCollection_subset,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Subsets the table collection to a set of nodes.\" },\n    { .ml_name = \"union\",\n        .ml_meth = (PyCFunction) TableCollection_union,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc\n        = \"Adds to this table collection the portions of another table collection \"\n          \"that are not shared with this one.\" },\n    { .ml_name = \"ibd_segments_within\",\n        .ml_meth = (PyCFunction) TableCollection_ibd_segments_within,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns IBD segments within the specified set of samples.\" },\n    { .ml_name = \"ibd_segments_between\",\n        .ml_meth = (PyCFunction) TableCollection_ibd_segments_between,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns IBD segments between pairs in the specified sets.\" },\n    { .ml_name = \"sort\",\n        .ml_meth = (PyCFunction) TableCollection_sort,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Sorts the tables to satisfy tree sequence requirements.\" },\n    { .ml_name = \"sort_individuals\",\n        .ml_meth = (PyCFunction) TableCollection_sort_individuals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Sorts the individual table topologically\" },\n    { .ml_name = \"canonicalise\",\n        .ml_meth = (PyCFunction) TableCollection_canonicalise,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Puts the tables in canonical form.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) TableCollection_equals,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc\n        = \"Returns True if the parameter table collection is equal to this one.\" },\n    { .ml_name = \"delete_older\",\n        .ml_meth = (PyCFunction) TableCollection_delete_older,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Delete edges, mutations and migrations older than this time\" },\n    { .ml_name = \"compute_mutation_parents\",\n        .ml_meth = (PyCFunction) TableCollection_compute_mutation_parents,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Computes the mutation parents for the tables.\" },\n    { .ml_name = \"compute_mutation_times\",\n        .ml_meth = (PyCFunction) TableCollection_compute_mutation_times,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Computes the mutation times for the tables.\" },\n    { .ml_name = \"deduplicate_sites\",\n        .ml_meth = (PyCFunction) TableCollection_deduplicate_sites,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Removes sites with duplicate positions.\" },\n    { .ml_name = \"build_index\",\n        .ml_meth = (PyCFunction) TableCollection_build_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Builds an index on the table collection.\" },\n    { .ml_name = \"drop_index\",\n        .ml_meth = (PyCFunction) TableCollection_drop_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Drops indexes.\" },\n    { .ml_name = \"has_reference_sequence\",\n        .ml_meth = (PyCFunction) TableCollection_has_reference_sequence,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns True if the TableCollection has a reference sequence.\" },\n    { .ml_name = \"has_index\",\n        .ml_meth = (PyCFunction) TableCollection_has_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns True if the TableCollection is indexed.\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) TableCollection_clear,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Clears table contents, and optionally provenances and metadata\" },\n    { .ml_name = \"dump\",\n        .ml_meth = (PyCFunction) TableCollection_dump,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Writes the table collection out to the specified file.\" },\n    { .ml_name = \"load\",\n        .ml_meth = (PyCFunction) TableCollection_load,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Loads the table collection out to the specified file.\" },\n    { .ml_name = \"asdict\",\n        .ml_meth = (PyCFunction) TableCollection_asdict,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the table collection in dictionary encoding. \" },\n    { .ml_name = \"fromdict\",\n        .ml_meth = (PyCFunction) TableCollection_fromdict,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Sets the state of this table collection from the specified dict\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject TableCollectionType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.TableCollection\",\n    .tp_basicsize = sizeof(TableCollection),\n    .tp_dealloc = (destructor) TableCollection_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,\n    .tp_doc = \"TableCollection objects\",\n    .tp_methods = TableCollection_methods,\n    .tp_getset = TableCollection_getsetters,\n    .tp_init = (initproc) TableCollection_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * TreeSequence\n *===================================================================\n */\n\nstatic int\nTreeSequence_check_state(TreeSequence *self)\n{\n    int ret = 0;\n    if (self->tree_sequence == NULL) {\n        PyErr_SetString(PyExc_ValueError, \"tree_sequence not initialised\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic void\nTreeSequence_dealloc(TreeSequence *self)\n{\n    if (self->tree_sequence != NULL) {\n        tsk_treeseq_free(self->tree_sequence);\n        PyMem_Free(self->tree_sequence);\n        self->tree_sequence = NULL;\n    }\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nTreeSequence_alloc(TreeSequence *self)\n{\n    int ret = -1;\n\n    if (self->tree_sequence != NULL) {\n        tsk_treeseq_free(self->tree_sequence);\n        PyMem_Free(self->tree_sequence);\n    }\n    self->tree_sequence = PyMem_Malloc(sizeof(tsk_treeseq_t));\n    if (self->tree_sequence == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    memset(self->tree_sequence, 0, sizeof(*self->tree_sequence));\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\nTreeSequence_init(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    self->tree_sequence = NULL;\n    return 0;\n}\n\nstatic PyObject *\nTreeSequence_dump(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    FILE *file = NULL;\n    PyObject *py_file = NULL;\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"file\", NULL };\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O\", kwlist, &py_file)) {\n        goto out;\n    }\n\n    file = make_file(py_file, \"wb\");\n    if (file == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_dumpf(self->tree_sequence, file, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    if (file != NULL) {\n        (void) fclose(file);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_load_tables(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    TableCollection *tables = NULL;\n    static char *kwlist[] = { \"tables\", \"build_indexes\", NULL };\n    int build_indexes = false;\n    tsk_flags_t options = 0;\n\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O!|i\", kwlist, &TableCollectionType, &tables, &build_indexes)) {\n        goto out;\n    }\n    err = TreeSequence_alloc(self);\n    if (err != 0) {\n        goto out;\n    }\n    if (build_indexes) {\n        options |= TSK_TS_INIT_BUILD_INDEXES;\n    }\n    err = tsk_treeseq_init(self->tree_sequence, tables->tables, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_dump_tables(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    TableCollection *tables = NULL;\n    static char *kwlist[] = { \"tables\", NULL };\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O!\", kwlist, &TableCollectionType, &tables)) {\n        goto out;\n    }\n    err = tsk_treeseq_copy_tables(self->tree_sequence, tables->tables, TSK_NO_INIT);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_link_ancestors(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *samples = NULL;\n    PyObject *ancestors = NULL;\n    PyArrayObject *samples_array = NULL;\n    PyArrayObject *ancestors_array = NULL;\n    npy_intp *shape;\n    tsk_size_t num_samples, num_ancestors;\n    EdgeTable *result = NULL;\n    PyObject *result_args = NULL;\n    static char *kwlist[] = { \"samples\", \"ancestors\", NULL };\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OO\", kwlist, &samples, &ancestors)) {\n        goto out;\n    }\n\n    samples_array = (PyArrayObject *) PyArray_FROMANY(\n        samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (samples_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(samples_array);\n    num_samples = (tsk_size_t) shape[0];\n\n    ancestors_array = (PyArrayObject *) PyArray_FROMANY(\n        ancestors, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (ancestors_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(ancestors_array);\n    num_ancestors = (tsk_size_t) shape[0];\n\n    result_args = PyTuple_New(0);\n    if (result_args == NULL) {\n        goto out;\n    }\n    result = (EdgeTable *) PyObject_CallObject((PyObject *) &EdgeTableType, result_args);\n    if (result == NULL) {\n        goto out;\n    }\n    err = tsk_table_collection_link_ancestors(self->tree_sequence->tables,\n        PyArray_DATA(samples_array), num_samples, PyArray_DATA(ancestors_array),\n        num_ancestors, 0, result->table);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result;\n    result = NULL;\nout:\n    Py_XDECREF(samples_array);\n    Py_XDECREF(ancestors_array);\n    Py_XDECREF(result);\n    Py_XDECREF(result_args);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_load(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *py_file;\n    FILE *file = NULL;\n    tsk_flags_t options = 0;\n    int skip_tables = false;\n    int skip_reference_sequence = false;\n    static char *kwlist[] = { \"file\", \"skip_tables\", \"skip_reference_sequence\", NULL };\n\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O|ii\", kwlist, &py_file, &skip_tables,\n            &skip_reference_sequence)) {\n        goto out;\n    }\n    if (skip_tables) {\n        options |= TSK_LOAD_SKIP_TABLES;\n    }\n    if (skip_reference_sequence) {\n        options |= TSK_LOAD_SKIP_REFERENCE_SEQUENCE;\n    }\n    file = make_file(py_file, \"rb\");\n    if (file == NULL) {\n        goto out;\n    }\n    /* Set unbuffered mode to ensure no more bytes are read than requested.\n     * Buffered reads could read beyond the end of the current store in a\n     * multi-store file or stream. This data would be discarded when we\n     * fclose() the file below, such that attempts to load the next store\n     * will fail. */\n    if (setvbuf(file, NULL, _IONBF, 0) != 0) {\n        PyErr_SetFromErrno(PyExc_OSError);\n        goto out;\n    }\n    err = TreeSequence_alloc(self);\n    if (err != 0) {\n        goto out;\n    }\n    err = tsk_treeseq_loadf(self->tree_sequence, file, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    if (file != NULL) {\n        (void) fclose(file);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_node(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_node_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_nodes(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_node(self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_node(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_edge(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_edge_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_edges(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_edge(self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_edge(&record, false);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_migration(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_migration_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_migrations(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_migration(\n        self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_migration(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_site(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_site_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_sites(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_site(self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_site_object(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_metadata(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = PyBytes_FromStringAndSize(self->tree_sequence->tables->metadata,\n        self->tree_sequence->tables->metadata_length);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_metadata_schema(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = make_Py_Unicode_FromStringAndLength(\n        self->tree_sequence->tables->metadata_schema,\n        self->tree_sequence->tables->metadata_schema_length);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_time_units(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = make_Py_Unicode_FromStringAndLength(self->tree_sequence->tables->time_units,\n        self->tree_sequence->tables->time_units_length);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_table_metadata_schemas(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    PyObject *value = NULL;\n    PyObject *schema = NULL;\n    tsk_size_t j;\n    tsk_table_collection_t *tables;\n    struct schema_pair {\n        const char *schema;\n        tsk_size_t length;\n    };\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    tables = self->tree_sequence->tables;\n    struct schema_pair schema_pairs[] = {\n        { tables->nodes.metadata_schema, tables->nodes.metadata_schema_length },\n        { tables->edges.metadata_schema, tables->edges.metadata_schema_length },\n        { tables->sites.metadata_schema, tables->sites.metadata_schema_length },\n        { tables->mutations.metadata_schema, tables->mutations.metadata_schema_length },\n        { tables->migrations.metadata_schema,\n            tables->migrations.metadata_schema_length },\n        { tables->individuals.metadata_schema,\n            tables->individuals.metadata_schema_length },\n        { tables->populations.metadata_schema,\n            tables->populations.metadata_schema_length },\n    };\n    value = PyStructSequence_New(&MetadataSchemas);\n    if (value == NULL) {\n        goto out;\n    }\n    for (j = 0; j < sizeof(schema_pairs) / sizeof(*schema_pairs); j++) {\n        schema = make_Py_Unicode_FromStringAndLength(\n            schema_pairs[j].schema, schema_pairs[j].length);\n        if (schema == NULL) {\n            goto out;\n        }\n        PyStructSequence_SetItem(value, j, schema);\n    }\n    ret = value;\n    value = NULL;\nout:\n    Py_XDECREF(value);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_mutation(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_mutation_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_mutations(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_mutation(\n        self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_mutation_object(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_individual(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_individual_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_individuals(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_individual(\n        self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_individual_object(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_population(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_population_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_populations(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_population(\n        self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_population(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_provenance(TreeSequence *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t record_index, num_records;\n    tsk_provenance_t record;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"n\", &record_index)) {\n        goto out;\n    }\n    num_records = (Py_ssize_t) tsk_treeseq_get_num_provenances(self->tree_sequence);\n    if (record_index < 0 || record_index >= num_records) {\n        PyErr_SetString(PyExc_IndexError, \"record index out of bounds\");\n        goto out;\n    }\n    err = tsk_treeseq_get_provenance(\n        self->tree_sequence, (tsk_id_t) record_index, &record);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = make_provenance(&record);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_edges(TreeSequence *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_records;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_records = tsk_treeseq_get_num_edges(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_records);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_migrations(TreeSequence *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_records;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_records = tsk_treeseq_get_num_migrations(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_records);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_individuals(TreeSequence *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_records;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_records = tsk_treeseq_get_num_individuals(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_records);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_populations(TreeSequence *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_records;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_records = tsk_treeseq_get_num_populations(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_records);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_trees(TreeSequence *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_trees;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_trees = tsk_treeseq_get_num_trees(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_trees);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_sequence_length(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", tsk_treeseq_get_sequence_length(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_discrete_genome(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", tsk_treeseq_get_discrete_genome(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_discrete_time(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", tsk_treeseq_get_discrete_time(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_min_time(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", tsk_treeseq_get_min_time(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_max_time(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", tsk_treeseq_get_max_time(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_breakpoints(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    const double *breakpoints;\n    PyArrayObject *array = NULL;\n    npy_intp dims;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    breakpoints = tsk_treeseq_get_breakpoints(self->tree_sequence);\n    dims = tsk_treeseq_get_num_trees(self->tree_sequence) + 1;\n    array = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_FLOAT64);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), breakpoints, dims * sizeof(*breakpoints));\n    ret = (PyObject *) array;\n    array = NULL;\nout:\n    Py_XDECREF(array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_file_uuid(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"s\", tsk_treeseq_get_file_uuid(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_samples(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_samples;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_samples = tsk_treeseq_get_num_samples(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_samples);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_nodes(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_nodes;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_nodes = tsk_treeseq_get_num_nodes(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_nodes);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_samples(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    const tsk_id_t *samples;\n    PyArrayObject *samples_array = NULL;\n    npy_intp dim;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    dim = tsk_treeseq_get_num_samples(self->tree_sequence);\n    samples = tsk_treeseq_get_samples(self->tree_sequence);\n\n    /* TODO it would be nice to return a read-only array that points to the\n     * tree sequence's memory and to INCREF ts to ensure the pointer stays\n     * alive. The details are tricky though. */\n    samples_array = (PyArrayObject *) PyArray_SimpleNew(1, &dim, NPY_INT32);\n    if (samples_array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(samples_array), samples, dim * sizeof(*samples));\n    ret = (PyObject *) samples_array;\n    samples_array = NULL;\nout:\n    Py_XDECREF(samples_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_individuals_population(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *ret_array = NULL;\n    npy_intp dim;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n\n    dim = tsk_treeseq_get_num_individuals(self->tree_sequence);\n    ret_array = (PyArrayObject *) PyArray_SimpleNew(1, &dim, NPY_INT32);\n    if (ret_array == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_get_individuals_population(\n        self->tree_sequence, PyArray_DATA(ret_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n\n    ret = (PyObject *) ret_array;\n    ret_array = NULL;\nout:\n    Py_XDECREF(ret_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_individuals_time(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *ret_array = NULL;\n    npy_intp dim;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n\n    dim = tsk_treeseq_get_num_individuals(self->tree_sequence);\n    ret_array = (PyArrayObject *) PyArray_SimpleNew(1, &dim, NPY_FLOAT64);\n    if (ret_array == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_get_individuals_time(self->tree_sequence, PyArray_DATA(ret_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n\n    ret = (PyObject *) ret_array;\n    ret_array = NULL;\nout:\n    Py_XDECREF(ret_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_individuals_nodes(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *ret_array = NULL;\n    npy_intp dims[2];\n    tsk_size_t ploidy;\n    tsk_size_t max_ploidy = 0;\n    tsk_id_t *node_mem;\n    tsk_size_t j;\n    tsk_size_t num_individuals;\n    tsk_id_t *const *individual_nodes;\n    const tsk_size_t *individual_nodes_length;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n\n    num_individuals = tsk_treeseq_get_num_individuals(self->tree_sequence);\n    individual_nodes = self->tree_sequence->individual_nodes;\n    individual_nodes_length = self->tree_sequence->individual_nodes_length;\n\n    for (tsk_id_t i = 0; i < (tsk_id_t) num_individuals; i++) {\n        ploidy = individual_nodes_length[i];\n        if (ploidy > max_ploidy) {\n            max_ploidy = ploidy;\n        }\n    }\n\n    dims[0] = (npy_intp) num_individuals;\n    dims[1] = (npy_intp) max_ploidy;\n    ret_array = (PyArrayObject *) PyArray_SimpleNew(2, dims, NPY_INT32);\n    if (ret_array == NULL) {\n        goto out;\n    }\n\n    /* Fill with -1 (TSK_NULL) */\n    node_mem = (tsk_id_t *) PyArray_DATA(ret_array);\n    memset(node_mem, 0xFF, PyArray_NBYTES(ret_array));\n\n    for (tsk_id_t i = 0; i < (tsk_id_t) num_individuals; i++) {\n        ploidy = individual_nodes_length[i];\n        for (j = 0; j < ploidy; j++) {\n            node_mem[i * max_ploidy + j] = individual_nodes[i][j];\n        }\n    }\n\n    ret = (PyObject *) ret_array;\n    ret_array = NULL;\n\nout:\n    Py_XDECREF(ret_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_decode_alignments(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *py_ref, *py_nodes, *py_missing;\n    PyArrayObject *nodes_array = NULL;\n    const char *ref_seq;\n    Py_ssize_t ref_len, missing_len;\n    tsk_id_t *nodes;\n    tsk_size_t num_nodes;\n    double left, right;\n    char missing_char;\n    const char *missing_utf8;\n    int isolated_as_missing = 1;\n    tsk_flags_t options = 0;\n    PyObject *buf_obj = NULL;\n    char *buf = NULL;\n\n    static char *kwlist[] = { \"reference_sequence\", \"nodes\", \"left\", \"right\",\n        \"missing_data_character\", \"isolated_as_missing\", NULL };\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOddOp\", kwlist, &py_ref, &py_nodes,\n            &left, &right, &py_missing, &isolated_as_missing)) {\n        goto out;\n    }\n\n    if (!PyBytes_Check(py_ref)) {\n        PyErr_SetString(PyExc_TypeError, \"reference_sequence must be bytes\");\n        goto out;\n    }\n    if (PyBytes_AsStringAndSize(py_ref, (char **) &ref_seq, &ref_len) < 0) {\n        goto out;\n    }\n\n    if (!PyUnicode_Check(py_missing)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"missing_data_character must be a (length 1) string\");\n        goto out;\n    }\n    missing_utf8 = PyUnicode_AsUTF8AndSize(py_missing, &missing_len);\n    if (missing_utf8 == NULL) {\n        goto out;\n    }\n    if (missing_len != 1) {\n        PyErr_SetString(\n            PyExc_TypeError, \"missing_data_character must be a single character\");\n        goto out;\n    }\n    missing_char = missing_utf8[0];\n\n    if (!isolated_as_missing) {\n        options |= TSK_ISOLATED_NOT_MISSING;\n    }\n\n    nodes_array = (PyArrayObject *) PyArray_FROMANY(\n        py_nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (nodes_array == NULL) {\n        goto out;\n    }\n    num_nodes = (tsk_size_t) PyArray_DIM(nodes_array, 0);\n    nodes = PyArray_DATA(nodes_array);\n\n    buf_obj = PyBytes_FromStringAndSize(\n        NULL, (Py_ssize_t) (num_nodes * (tsk_size_t) (right - left)));\n    if (buf_obj == NULL) {\n        goto out;\n    }\n    buf = PyBytes_AS_STRING(buf_obj);\n\n    Py_BEGIN_ALLOW_THREADS\n    err = tsk_treeseq_decode_alignments(self->tree_sequence, ref_seq,\n        (tsk_size_t) ref_len, nodes, num_nodes, left, right, missing_char, buf, options);\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n\n    ret = buf_obj;\n    buf_obj = NULL;\n\nout:\n    Py_XDECREF(nodes_array);\n    Py_XDECREF(buf_obj);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_mutations_edge(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    npy_intp num_mutations;\n    tsk_size_t j;\n    tsk_id_t *data;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n\n    num_mutations = (npy_intp) tsk_treeseq_get_num_mutations(self->tree_sequence);\n    array = (PyArrayObject *) PyArray_SimpleNew(1, &num_mutations, NPY_INT32);\n    if (array == NULL) {\n        goto out;\n    }\n\n    data = (tsk_id_t *) PyArray_DATA(array);\n    for (j = 0; j < (tsk_size_t) num_mutations; j++) {\n        data[j] = self->tree_sequence->site_mutations_mem[j].edge;\n    }\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_genealogical_nearest_neighbours(\n    TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"focal\", \"reference_sets\", NULL };\n    const tsk_id_t **reference_sets = NULL;\n    tsk_size_t *reference_set_size = NULL;\n    PyObject *focal = NULL;\n    PyObject *reference_sets_list = NULL;\n    PyArrayObject *focal_array = NULL;\n    PyArrayObject **reference_set_arrays = NULL;\n    PyArrayObject *ret_array = NULL;\n    npy_intp *shape, dims[2];\n    tsk_size_t num_focal = 0;\n    tsk_size_t num_reference_sets = 0;\n    tsk_size_t j;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"OO!\", kwlist, &focal, &PyList_Type, &reference_sets_list)) {\n        goto out;\n    }\n\n    /* We're releasing the GIL here so we need to make sure that the memory we\n     * pass to the low-level code doesn't change while it's in use. This is\n     * why we take copies of the input arrays. */\n    focal_array = (PyArrayObject *) PyArray_FROMANY(\n        focal, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ENSURECOPY);\n    if (focal_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(focal_array);\n    num_focal = shape[0];\n    num_reference_sets = PyList_Size(reference_sets_list);\n    if (num_reference_sets == 0) {\n        PyErr_SetString(PyExc_ValueError, \"Must have at least one sample set\");\n        goto out;\n    }\n    reference_set_size = PyMem_Malloc(num_reference_sets * sizeof(*reference_set_size));\n    reference_sets = PyMem_Malloc(num_reference_sets * sizeof(*reference_sets));\n    reference_set_arrays\n        = PyMem_Malloc(num_reference_sets * sizeof(*reference_set_arrays));\n    if (reference_sets == NULL || reference_set_size == NULL\n        || reference_set_arrays == NULL) {\n        goto out;\n    }\n    memset(reference_set_arrays, 0, num_reference_sets * sizeof(*reference_set_arrays));\n    for (j = 0; j < num_reference_sets; j++) {\n        reference_set_arrays[j]\n            = (PyArrayObject *) PyArray_FROMANY(PyList_GetItem(reference_sets_list, j),\n                NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ENSURECOPY);\n        if (reference_set_arrays[j] == NULL) {\n            goto out;\n        }\n        reference_sets[j] = PyArray_DATA(reference_set_arrays[j]);\n        shape = PyArray_DIMS(reference_set_arrays[j]);\n        reference_set_size[j] = shape[0];\n    }\n\n    /* Allocate the return array */\n    dims[0] = num_focal;\n    dims[1] = num_reference_sets;\n    ret_array = (PyArrayObject *) PyArray_SimpleNew(2, dims, NPY_FLOAT64);\n    if (ret_array == NULL) {\n        goto out;\n    }\n\n    Py_BEGIN_ALLOW_THREADS\n    err = tsk_treeseq_genealogical_nearest_neighbours(self->tree_sequence,\n        PyArray_DATA(focal_array), num_focal, reference_sets, reference_set_size,\n        num_reference_sets, 0, PyArray_DATA(ret_array));\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n\n    ret = (PyObject *) ret_array;\n    ret_array = NULL;\nout:\n    if (reference_sets != NULL) {\n        PyMem_Free(reference_sets);\n    }\n    if (reference_set_size != NULL) {\n        PyMem_Free(reference_set_size);\n    }\n    if (reference_set_arrays != NULL) {\n        for (j = 0; j < num_reference_sets; j++) {\n            Py_XDECREF(reference_set_arrays[j]);\n        }\n        PyMem_Free(reference_set_arrays);\n    }\n    Py_XDECREF(focal_array);\n    Py_XDECREF(ret_array);\n    return ret;\n}\n\n/* Forward Declaration */\nstatic PyTypeObject TreeSequenceType;\n\nstatic PyObject *\nTreeSequence_get_kc_distance(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    TreeSequence *other = NULL;\n    static char *kwlist[] = { \"other\", \"lambda_\", NULL };\n    double lambda = 0;\n    double result = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O!d\", kwlist, &TreeSequenceType, &other, &lambda)) {\n        goto out;\n    }\n    err = tsk_treeseq_kc_distance(\n        self->tree_sequence, other->tree_sequence, lambda, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_mean_descendants(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"reference_sets\", NULL };\n    const tsk_id_t **reference_sets = NULL;\n    tsk_size_t *reference_set_size = NULL;\n    PyObject *reference_sets_list = NULL;\n    PyArrayObject **reference_set_arrays = NULL;\n    PyArrayObject *ret_array = NULL;\n    npy_intp *shape, dims[2];\n    tsk_size_t num_reference_sets = 0;\n    tsk_size_t j;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O!\", kwlist, &PyList_Type, &reference_sets_list)) {\n        goto out;\n    }\n\n    num_reference_sets = PyList_Size(reference_sets_list);\n    if (num_reference_sets == 0) {\n        PyErr_SetString(PyExc_ValueError, \"Must have at least one sample set\");\n        goto out;\n    }\n    reference_set_size = PyMem_Malloc(num_reference_sets * sizeof(*reference_set_size));\n    reference_sets = PyMem_Malloc(num_reference_sets * sizeof(*reference_sets));\n    reference_set_arrays\n        = PyMem_Malloc(num_reference_sets * sizeof(*reference_set_arrays));\n    if (reference_sets == NULL || reference_set_size == NULL\n        || reference_set_arrays == NULL) {\n        goto out;\n    }\n    memset(reference_set_arrays, 0, num_reference_sets * sizeof(*reference_set_arrays));\n    for (j = 0; j < num_reference_sets; j++) {\n        /* We're releasing the GIL here so we need to make sure that the memory we\n         * pass to the low-level code doesn't change while it's in use. This is\n         * why we take copies of the input arrays. */\n        reference_set_arrays[j]\n            = (PyArrayObject *) PyArray_FROMANY(PyList_GetItem(reference_sets_list, j),\n                NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ENSURECOPY);\n        if (reference_set_arrays[j] == NULL) {\n            goto out;\n        }\n        reference_sets[j] = PyArray_DATA(reference_set_arrays[j]);\n        shape = PyArray_DIMS(reference_set_arrays[j]);\n        reference_set_size[j] = shape[0];\n    }\n\n    /* Allocate the return array */\n    dims[0] = tsk_treeseq_get_num_nodes(self->tree_sequence);\n    dims[1] = num_reference_sets;\n    ret_array = (PyArrayObject *) PyArray_SimpleNew(2, dims, NPY_FLOAT64);\n    if (ret_array == NULL) {\n        goto out;\n    }\n\n    Py_BEGIN_ALLOW_THREADS\n    err = tsk_treeseq_mean_descendants(self->tree_sequence, reference_sets,\n        reference_set_size, num_reference_sets, 0, PyArray_DATA(ret_array));\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n\n    ret = (PyObject *) ret_array;\n    ret_array = NULL;\nout:\n    if (reference_sets != NULL) {\n        PyMem_Free(reference_sets);\n    }\n    if (reference_set_size != NULL) {\n        PyMem_Free(reference_set_size);\n    }\n    if (reference_set_arrays != NULL) {\n        for (j = 0; j < num_reference_sets; j++) {\n            Py_XDECREF(reference_set_arrays[j]);\n        }\n        PyMem_Free(reference_set_arrays);\n    }\n    Py_XDECREF(ret_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_extend_haplotypes(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    int max_iter;\n    tsk_flags_t options = 0;\n    static char *kwlist[] = { \"max_iter\", NULL };\n    TreeSequence *output = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"i\", kwlist, &max_iter)) {\n        goto out;\n    }\n\n    output = (TreeSequence *) _PyObject_New((PyTypeObject *) &TreeSequenceType);\n    if (output == NULL) {\n        goto out;\n    }\n    output->tree_sequence = PyMem_Malloc(sizeof(*output->tree_sequence));\n    if (output->tree_sequence == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n\n    err = tsk_treeseq_extend_haplotypes(\n        self->tree_sequence, max_iter, options, output->tree_sequence);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) output;\n    output = NULL;\nout:\n    Py_XDECREF(output);\n    return ret;\n}\n\n/* Error value returned from summary_func callback if an error occured.\n * This is chosen so that it is not a valid tskit error code and so can\n * never be mistaken for a different error */\n#define TSK_PYTHON_CALLBACK_ERROR (-100000)\n\n/* Run the Python callable that takes X as parameter and must return a\n * 1D array of length M that we copy in to the Y array */\nstatic int\ngeneral_stat_func(tsk_size_t K, const double *X, tsk_size_t M, double *Y, void *params)\n{\n    int ret = TSK_PYTHON_CALLBACK_ERROR;\n    PyObject *callable = (PyObject *) params;\n    PyObject *arglist = NULL;\n    PyObject *result = NULL;\n    PyArrayObject *X_array = NULL;\n    PyArrayObject *Y_array = NULL;\n    npy_intp X_dims = (npy_intp) K;\n    npy_intp *Y_dims;\n\n    X_array = (PyArrayObject *) PyArray_SimpleNew(1, &X_dims, NPY_FLOAT64);\n    if (X_array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(X_array), X, K * sizeof(*X));\n    arglist = Py_BuildValue(\"(O)\", X_array);\n    if (arglist == NULL) {\n        goto out;\n    }\n    result = PyObject_CallObject(callable, arglist);\n    if (result == NULL) {\n        goto out;\n    }\n    Y_array = (PyArrayObject *) PyArray_FromAny(\n        result, PyArray_DescrFromType(NPY_FLOAT64), 0, 0, NPY_ARRAY_IN_ARRAY, NULL);\n    if (Y_array == NULL) {\n        goto out;\n    }\n    if (PyArray_NDIM(Y_array) != 1) {\n        PyErr_Format(PyExc_ValueError,\n            \"Array returned by general_stat callback is %d dimensional; \"\n            \"must be 1D\",\n            (int) PyArray_NDIM(Y_array));\n        goto out;\n    }\n    Y_dims = PyArray_DIMS(Y_array);\n    if (Y_dims[0] != (npy_intp) M) {\n        PyErr_Format(PyExc_ValueError,\n            \"Array returned by general_stat callback is of length %d; \"\n            \"must be %d\",\n            Y_dims[0], M);\n        goto out;\n    }\n    /* Copy the contents of the return Y array into Y */\n    memcpy(Y, PyArray_DATA(Y_array), M * sizeof(*Y));\n    ret = 0;\nout:\n    Py_XDECREF(X_array);\n    Py_XDECREF(arglist);\n    Py_XDECREF(result);\n    Py_XDECREF(Y_array);\n    return ret;\n}\n\nstatic int\nparse_stats_mode(char *mode, tsk_flags_t *ret)\n{\n    tsk_flags_t value = 0;\n\n    if (mode == NULL) {\n        value = TSK_STAT_SITE; /* defaults to site mode */\n    } else if (strcmp(mode, \"site\") == 0) {\n        value = TSK_STAT_SITE;\n    } else if (strcmp(mode, \"branch\") == 0) {\n        value = TSK_STAT_BRANCH;\n    } else if (strcmp(mode, \"node\") == 0) {\n        value = TSK_STAT_NODE;\n    } else {\n        PyErr_SetString(PyExc_ValueError, \"Unrecognised stats mode\");\n        return -1;\n    }\n    *ret = value;\n    return 0;\n}\n\nstatic int\nparse_windows(\n    PyObject *windows, PyArrayObject **ret_windows_array, tsk_size_t *ret_num_windows)\n{\n    int ret = -1;\n    tsk_size_t num_windows = 0;\n    PyArrayObject *windows_array = NULL;\n    npy_intp *shape;\n\n    windows_array = (PyArrayObject *) PyArray_FROMANY(\n        windows, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (windows_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(windows_array);\n    if (shape[0] < 2) {\n        PyErr_SetString(\n            PyExc_ValueError, \"Windows arrays must have at least 2 elements\");\n        goto out;\n    }\n    num_windows = shape[0] - 1;\n\n    ret = 0;\nout:\n    *ret_num_windows = num_windows;\n    *ret_windows_array = windows_array;\n    return ret;\n}\n\nstatic PyArrayObject *\nTreeSequence_allocate_results_array(\n    TreeSequence *self, tsk_flags_t mode, tsk_size_t num_windows, tsk_size_t output_dim)\n{\n    PyArrayObject *result_array = NULL;\n    npy_intp result_shape[3];\n\n    if (mode & TSK_STAT_NODE) {\n        result_shape[0] = num_windows;\n        result_shape[1] = tsk_treeseq_get_num_nodes(self->tree_sequence);\n        result_shape[2] = output_dim;\n        result_array = (PyArrayObject *) PyArray_SimpleNew(3, result_shape, NPY_FLOAT64);\n        if (result_array == NULL) {\n            goto out;\n        }\n    } else {\n        result_shape[0] = num_windows;\n        result_shape[1] = output_dim;\n        result_array = (PyArrayObject *) PyArray_SimpleNew(2, result_shape, NPY_FLOAT64);\n        if (result_array == NULL) {\n            goto out;\n        }\n    }\nout:\n    return result_array;\n}\n\nstatic PyObject *\nTreeSequence_general_stat(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"weights\", \"summary_func\", \"output_dim\", \"windows\", \"mode\",\n        \"polarised\", \"span_normalise\", NULL };\n    PyObject *weights = NULL;\n    PyObject *summary_func = NULL;\n    PyObject *windows = NULL;\n    PyArrayObject *weights_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    char *mode = NULL;\n    int polarised = 0;\n    int span_normalise = 0;\n    tsk_size_t num_windows;\n    unsigned int output_dim;\n    npy_intp *w_shape;\n    tsk_flags_t options = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOIO|sii\", kwlist, &weights,\n            &summary_func, &output_dim, &windows, &mode, &polarised, &span_normalise)) {\n        Py_XINCREF(summary_func);\n        goto out;\n    }\n    Py_INCREF(summary_func);\n    if (!PyCallable_Check(summary_func)) {\n        PyErr_SetString(PyExc_TypeError, \"summary_func must be callable\");\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n\n    weights_array = (PyArrayObject *) PyArray_FROMANY(\n        weights, NPY_FLOAT64, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (weights_array == NULL) {\n        goto out;\n    }\n    w_shape = PyArray_DIMS(weights_array);\n    if ((tsk_size_t) w_shape[0] != tsk_treeseq_get_num_samples(self->tree_sequence)) {\n        PyErr_SetString(PyExc_ValueError, \"First dimension must be num_samples\");\n        goto out;\n    }\n    result_array\n        = TreeSequence_allocate_results_array(self, options, num_windows, output_dim);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_general_stat(self->tree_sequence, w_shape[1],\n        PyArray_DATA(weights_array), output_dim, general_stat_func, summary_func,\n        num_windows, PyArray_DATA(windows_array), options, PyArray_DATA(result_array));\n    if (err == TSK_PYTHON_CALLBACK_ERROR) {\n        goto out;\n    } else if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(summary_func);\n    Py_XDECREF(weights_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_one_way_weighted_method(\n    TreeSequence *self, PyObject *args, PyObject *kwds, one_way_weighted_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[]\n        = { \"weights\", \"windows\", \"mode\", \"polarised\", \"span_normalise\", NULL };\n    PyObject *weights = NULL;\n    PyObject *windows = NULL;\n    PyArrayObject *weights_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    char *mode = NULL;\n    int polarised = 0;\n    int span_normalise = 0;\n    tsk_size_t num_windows;\n    npy_intp *w_shape;\n    tsk_flags_t options = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OO|sii\", kwlist, &weights, &windows,\n            &mode, &polarised, &span_normalise)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n\n    weights_array = (PyArrayObject *) PyArray_FROMANY(\n        weights, NPY_FLOAT64, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (weights_array == NULL) {\n        goto out;\n    }\n    w_shape = PyArray_DIMS(weights_array);\n    if ((tsk_size_t) w_shape[0] != tsk_treeseq_get_num_samples(self->tree_sequence)) {\n        PyErr_SetString(PyExc_ValueError, \"First dimension must be num_samples\");\n        goto out;\n    }\n    result_array\n        = TreeSequence_allocate_results_array(self, options, num_windows, w_shape[1]);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    err = method(self->tree_sequence, w_shape[1], PyArray_DATA(weights_array),\n        num_windows, PyArray_DATA(windows_array), options, PyArray_DATA(result_array));\n    if (err == TSK_PYTHON_CALLBACK_ERROR) {\n        goto out;\n    } else if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(weights_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_one_way_covariates_method(TreeSequence *self, PyObject *args,\n    PyObject *kwds, one_way_covariates_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"weights\", \"covariates\", \"windows\", \"mode\", \"polarised\",\n        \"span_normalise\", NULL };\n    PyObject *weights = NULL;\n    PyObject *covariates = NULL;\n    PyObject *windows = NULL;\n    PyArrayObject *weights_array = NULL;\n    PyArrayObject *covariates_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    char *mode = NULL;\n    int polarised = 0;\n    int span_normalise = 0;\n    tsk_size_t num_windows;\n    npy_intp *w_shape, *z_shape;\n    tsk_flags_t options = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOO|sii\", kwlist, &weights,\n            &covariates, &windows, &mode, &polarised, &span_normalise)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n\n    weights_array = (PyArrayObject *) PyArray_FROMANY(\n        weights, NPY_FLOAT64, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (weights_array == NULL) {\n        goto out;\n    }\n    w_shape = PyArray_DIMS(weights_array);\n    if ((tsk_size_t) w_shape[0] != tsk_treeseq_get_num_samples(self->tree_sequence)) {\n        PyErr_SetString(\n            PyExc_ValueError, \"First dimension of weights must be num_samples\");\n        goto out;\n    }\n    covariates_array = (PyArrayObject *) PyArray_FROMANY(\n        covariates, NPY_FLOAT64, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (covariates_array == NULL) {\n        goto out;\n    }\n    z_shape = PyArray_DIMS(covariates_array);\n    if ((tsk_size_t) z_shape[0] != tsk_treeseq_get_num_samples(self->tree_sequence)) {\n        PyErr_SetString(\n            PyExc_ValueError, \"First dimension of covariates must be num_samples\");\n        goto out;\n    }\n    result_array\n        = TreeSequence_allocate_results_array(self, options, num_windows, w_shape[1]);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    err = method(self->tree_sequence, w_shape[1], PyArray_DATA(weights_array),\n        z_shape[1], PyArray_DATA(covariates_array), num_windows,\n        PyArray_DATA(windows_array), options, PyArray_DATA(result_array));\n    if (err == TSK_PYTHON_CALLBACK_ERROR) {\n        goto out;\n    } else if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(weights_array);\n    Py_XDECREF(covariates_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_one_way_stat_method(TreeSequence *self, PyObject *args, PyObject *kwds,\n    one_way_sample_stat_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"sample_set_sizes\", \"sample_sets\", \"windows\", \"mode\",\n        \"span_normalise\", \"polarised\", NULL };\n    PyObject *sample_set_sizes = NULL;\n    PyObject *sample_sets = NULL;\n    PyObject *windows = NULL;\n    char *mode = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    tsk_size_t num_windows, num_sample_sets;\n    tsk_flags_t options = 0;\n    int span_normalise = 1;\n    int polarised = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOO|sii\", kwlist, &sample_set_sizes,\n            &sample_sets, &windows, &mode, &span_normalise, &polarised)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n\n    result_array = TreeSequence_allocate_results_array(\n        self, options, num_windows, num_sample_sets);\n    if (result_array == NULL) {\n        goto out;\n    }\n    err = method(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_windows, PyArray_DATA(windows_array), options, PyArray_DATA(result_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_allele_frequency_spectrum(\n    TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"sample_set_sizes\", \"sample_sets\", \"windows\",\n        \"time_windows\", \"mode\", \"span_normalise\", \"polarised\", NULL };\n    PyObject *sample_set_sizes = NULL;\n    PyObject *sample_sets = NULL;\n    PyObject *windows = NULL;\n    PyObject *time_windows = NULL;\n    char *mode = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *time_windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    tsk_size_t *sizes;\n    npy_intp *shape = NULL;\n    tsk_size_t k, num_windows, num_time_windows, num_sample_sets;\n    tsk_flags_t options = 0;\n    int polarised = 0;\n    int span_normalise = 1;\n    int err;\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOOO|sii\", kwlist, &sample_set_sizes,\n            &sample_sets, &windows, &time_windows, &mode, &span_normalise, &polarised)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    if (parse_windows(time_windows, &time_windows_array, &num_time_windows) != 0) {\n        goto out;\n    }\n    // dimensions are: time windows, genome windows, one for each sample set\n    shape = PyMem_Malloc((2 + num_sample_sets) * sizeof(*shape));\n    if (shape == NULL) {\n        goto out;\n    }\n    sizes = PyArray_DATA(sample_set_sizes_array);\n    shape[0] = num_windows;\n    shape[1] = num_time_windows;\n    for (k = 0; k < num_sample_sets; k++) {\n        shape[2 + k] = 1 + sizes[k];\n    }\n    result_array\n        = (PyArrayObject *) PyArray_SimpleNew(2 + num_sample_sets, shape, NPY_FLOAT64);\n    if (result_array == NULL) {\n        goto out;\n    }\n    err = tsk_treeseq_allele_frequency_spectrum(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_windows, PyArray_DATA(windows_array), num_time_windows,\n        PyArray_DATA(time_windows_array), options, PyArray_DATA(result_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    PyMem_Free(shape);\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(time_windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_diversity(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_one_way_stat_method(self, args, kwds, tsk_treeseq_diversity);\n}\n\nstatic PyObject *\nTreeSequence_trait_covariance(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_one_way_weighted_method(\n        self, args, kwds, tsk_treeseq_trait_covariance);\n}\n\nstatic PyObject *\nTreeSequence_trait_correlation(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_one_way_weighted_method(\n        self, args, kwds, tsk_treeseq_trait_correlation);\n}\n\nstatic PyObject *\nTreeSequence_trait_linear_model(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_one_way_covariates_method(\n        self, args, kwds, tsk_treeseq_trait_linear_model);\n}\n\nstatic PyObject *\nTreeSequence_segregating_sites(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_one_way_stat_method(\n        self, args, kwds, tsk_treeseq_segregating_sites);\n}\n\nstatic PyObject *\nTreeSequence_Y1(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_one_way_stat_method(self, args, kwds, tsk_treeseq_Y1);\n}\n\nstatic PyObject *\nTreeSequence_k_way_stat_method(TreeSequence *self, PyObject *args, PyObject *kwds,\n    npy_intp tuple_size, general_sample_stat_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"sample_set_sizes\", \"sample_sets\", \"indexes\", \"windows\",\n        \"mode\", \"span_normalise\", \"polarised\", \"centre\", NULL };\n    PyObject *sample_set_sizes = NULL;\n    PyObject *sample_sets = NULL;\n    PyObject *indexes = NULL;\n    PyObject *windows = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    PyArrayObject *indexes_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    tsk_size_t num_windows, num_sample_sets, num_set_index_tuples;\n    npy_intp *shape;\n    tsk_flags_t options = 0;\n    char *mode = NULL;\n    int span_normalise = true;\n    int polarised = false;\n    int centre = true;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOOO|siii\", kwlist, &sample_set_sizes,\n            &sample_sets, &indexes, &windows, &mode, &span_normalise, &polarised,\n            &centre)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (!centre) {\n        // only currently used by genetic_relatedness\n        options |= TSK_STAT_NONCENTRED;\n    }\n    if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n\n    indexes_array = (PyArrayObject *) PyArray_FROMANY(\n        indexes, NPY_INT32, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (indexes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(indexes_array);\n    if (shape[0] < 1 || shape[1] != tuple_size) {\n        PyErr_Format(\n            PyExc_ValueError, \"indexes must be a k x %d array.\", (int) tuple_size);\n        goto out;\n    }\n    num_set_index_tuples = shape[0];\n\n    result_array = TreeSequence_allocate_results_array(\n        self, options, num_windows, num_set_index_tuples);\n    if (result_array == NULL) {\n        goto out;\n    }\n    err = method(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_set_index_tuples, PyArray_DATA(indexes_array), num_windows,\n        PyArray_DATA(windows_array), options, PyArray_DATA(result_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(indexes_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_weighted_stat_vector_method(\n    TreeSequence *self, PyObject *args, PyObject *kwds, weighted_vector_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[]\n        = { \"weights\", \"windows\", \"mode\", \"span_normalise\", \"centre\", \"nodes\", NULL };\n    PyObject *weights = NULL;\n    PyObject *windows = NULL;\n    PyObject *focal_nodes = NULL;\n    PyArrayObject *weights_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    PyArrayObject *focal_nodes_array = NULL;\n    tsk_size_t num_windows;\n    tsk_size_t num_focal_nodes;\n    npy_intp *focal_nodes_shape, *w_shape, result_shape[3];\n    tsk_flags_t options = 0;\n    tsk_size_t num_samples;\n    char *mode = NULL;\n    int span_normalise = true;\n    int centre = true;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OO|siiO\", kwlist, &weights, &windows,\n            &mode, &span_normalise, &centre, &focal_nodes)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (!centre) {\n        options |= TSK_STAT_NONCENTRED;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    num_samples = tsk_treeseq_get_num_samples(self->tree_sequence);\n    weights_array = (PyArrayObject *) PyArray_FROMANY(\n        weights, NPY_FLOAT64, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (weights_array == NULL) {\n        goto out;\n    }\n    w_shape = PyArray_DIMS(weights_array);\n    if (w_shape[0] != (npy_intp) num_samples) {\n        PyErr_SetString(PyExc_ValueError, \"First dimension must be num_samples\");\n        goto out;\n    }\n    focal_nodes_array = (PyArrayObject *) PyArray_FROMANY(\n        focal_nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (focal_nodes_array == NULL) {\n        goto out;\n    }\n    focal_nodes_shape = PyArray_DIMS(focal_nodes_array);\n    num_focal_nodes = focal_nodes_shape[0];\n\n    result_shape[0] = num_windows;\n    result_shape[1] = num_focal_nodes;\n    result_shape[2] = w_shape[1];\n    result_array = (PyArrayObject *) PyArray_SimpleNew(3, result_shape, NPY_FLOAT64);\n    if (result_array == NULL) {\n        goto out;\n    }\n    Py_BEGIN_ALLOW_THREADS\n    err = method(self->tree_sequence, w_shape[1], PyArray_DATA(weights_array),\n        num_windows, PyArray_DATA(windows_array), num_focal_nodes,\n        PyArray_DATA(focal_nodes_array), PyArray_DATA(result_array), options);\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(weights_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(focal_nodes_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_k_way_weighted_stat_method(TreeSequence *self, PyObject *args,\n    PyObject *kwds, npy_intp tuple_size, two_way_weighted_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"weights\", \"indexes\", \"windows\", \"mode\", \"span_normalise\",\n        \"polarised\", \"centre\", NULL };\n    PyObject *weights = NULL;\n    PyObject *indexes = NULL;\n    PyObject *windows = NULL;\n    PyArrayObject *weights_array = NULL;\n    PyArrayObject *indexes_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *result_array = NULL;\n    tsk_size_t num_windows, num_index_tuples;\n    npy_intp *w_shape, *shape;\n    tsk_flags_t options = 0;\n    char *mode = NULL;\n    int span_normalise = true;\n    int polarised = false;\n    int centre = true;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOO|siii\", kwlist, &weights, &indexes,\n            &windows, &mode, &span_normalise, &polarised, &centre)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (polarised) {\n        options |= TSK_STAT_POLARISED;\n    }\n    if (!centre) {\n        // only currently used by genetic_relatedness_weighted\n        options |= TSK_STAT_NONCENTRED;\n    }\n    if (parse_windows(windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    weights_array = (PyArrayObject *) PyArray_FROMANY(\n        weights, NPY_FLOAT64, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (weights_array == NULL) {\n        goto out;\n    }\n    w_shape = PyArray_DIMS(weights_array);\n    if (w_shape[0] != (npy_intp) tsk_treeseq_get_num_samples(self->tree_sequence)) {\n        PyErr_SetString(PyExc_ValueError, \"First dimension must be num_samples\");\n        goto out;\n    }\n\n    indexes_array = (PyArrayObject *) PyArray_FROMANY(\n        indexes, NPY_INT32, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (indexes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(indexes_array);\n    if (shape[0] < 1 || shape[1] != tuple_size) {\n        PyErr_Format(\n            PyExc_ValueError, \"indexes must be a k x %d array.\", (int) tuple_size);\n        goto out;\n    }\n    num_index_tuples = shape[0];\n\n    result_array = TreeSequence_allocate_results_array(\n        self, options, num_windows, num_index_tuples);\n    if (result_array == NULL) {\n        goto out;\n    }\n    err = method(self->tree_sequence, w_shape[1], PyArray_DATA(weights_array),\n        num_index_tuples, PyArray_DATA(indexes_array), num_windows,\n        PyArray_DATA(windows_array), PyArray_DATA(result_array), options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(weights_array);\n    Py_XDECREF(indexes_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_divergence(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(self, args, kwds, 2, tsk_treeseq_divergence);\n}\n\nstatic PyObject *\nTreeSequence_genetic_relatedness(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(\n        self, args, kwds, 2, tsk_treeseq_genetic_relatedness);\n}\n\nstatic PyObject *\nTreeSequence_genetic_relatedness_weighted(\n    TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_weighted_stat_method(\n        self, args, kwds, 2, tsk_treeseq_genetic_relatedness_weighted);\n}\n\nstatic PyObject *\nTreeSequence_genetic_relatedness_vector(\n    TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_weighted_stat_vector_method(\n        self, args, kwds, tsk_treeseq_genetic_relatedness_vector);\n}\n\nstatic PyObject *\nTreeSequence_Y2(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(self, args, kwds, 2, tsk_treeseq_Y2);\n}\n\nstatic PyObject *\nTreeSequence_f2(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(self, args, kwds, 2, tsk_treeseq_f2);\n}\n\nstatic PyObject *\nTreeSequence_Y3(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(self, args, kwds, 3, tsk_treeseq_Y3);\n}\n\nstatic PyObject *\nTreeSequence_f3(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(self, args, kwds, 3, tsk_treeseq_f3);\n}\n\nstatic PyObject *\nTreeSequence_f4(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_stat_method(self, args, kwds, 4, tsk_treeseq_f4);\n}\n\nstatic PyObject *\nTreeSequence_divergence_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n\n    static char *kwlist[] = { \"windows\", \"sample_set_sizes\", \"sample_sets\", \"mode\",\n        \"span_normalise\", NULL };\n    char *mode = NULL;\n    PyArrayObject *result_array = NULL;\n    PyObject *py_sample_set_sizes = Py_None;\n    PyObject *py_sample_sets = Py_None;\n    PyObject *py_windows = Py_None;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    tsk_flags_t options = 0;\n    npy_intp dims[3];\n    tsk_size_t num_sample_sets = 0;\n    tsk_size_t num_windows = 0;\n    tsk_id_t *sample_sets = NULL;\n    tsk_size_t *sample_set_sizes = NULL;\n    int span_normalise = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOO|si\", kwlist, &py_windows,\n            &py_sample_set_sizes, &py_sample_sets, &mode, &span_normalise)) {\n        goto out;\n    }\n\n    if (parse_sample_sets(py_sample_set_sizes, &sample_set_sizes_array, py_sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    sample_set_sizes = PyArray_DATA(sample_set_sizes_array);\n    sample_sets = PyArray_DATA(sample_sets_array);\n    if (parse_windows(py_windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    dims[0] = num_windows;\n    dims[1] = num_sample_sets;\n    dims[2] = num_sample_sets;\n    result_array = (PyArrayObject *) PyArray_SimpleNew(3, dims, NPY_FLOAT64);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n\n    Py_BEGIN_ALLOW_THREADS\n    err = tsk_treeseq_divergence_matrix(self->tree_sequence, num_sample_sets,\n        sample_set_sizes, sample_sets, num_windows, PyArray_DATA(windows_array), options,\n        PyArray_DATA(result_array));\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic int\nparse_node_bin_map(PyObject *node_bin_map, PyArrayObject **ret_array,\n    tsk_size_t *ret_num_bins, tsk_size_t num_nodes)\n{\n    int ret = -1;\n    npy_int32 num_bins = 0;\n    PyArrayObject *node_bin_map_array = NULL;\n    npy_intp *shape;\n    npy_int32 *data;\n    npy_int32 max_index;\n    tsk_size_t i;\n\n    node_bin_map_array = (PyArrayObject *) PyArray_FROMANY(\n        node_bin_map, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (node_bin_map_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(node_bin_map_array);\n    if ((tsk_size_t) shape[0] != num_nodes) {\n        PyErr_SetString(PyExc_ValueError, \"Node-to-bin map must have a value per node\");\n        goto out;\n    }\n\n    max_index = TSK_NULL;\n    data = PyArray_DATA(node_bin_map_array);\n    for (i = 0; i < num_nodes; i++) {\n        if (data[i] > max_index) {\n            max_index = data[i];\n        }\n    }\n    if (max_index == TSK_NULL) {\n        PyErr_SetString(\n            PyExc_ValueError, \"Node-to-bin map has null values for all nodes\");\n        goto out;\n    }\n    num_bins = 1 + max_index;\n    ret = 0;\nout:\n    *ret_num_bins = (tsk_size_t) num_bins;\n    *ret_array = node_bin_map_array;\n    return ret;\n}\n\nstatic int\nparse_set_indexes(PyObject *indexes, PyArrayObject **ret_array,\n    tsk_size_t *ret_num_indexes, npy_intp tuple_size)\n{\n    int ret = -1;\n    tsk_size_t num_indexes = 0;\n    PyArrayObject *indexes_array = NULL;\n    npy_intp *shape;\n\n    indexes_array = (PyArrayObject *) PyArray_FROMANY(\n        indexes, NPY_INT32, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (indexes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(indexes_array);\n    if (shape[0] < 1 || shape[1] != tuple_size) {\n        PyErr_Format(\n            PyExc_ValueError, \"indexes must be a k x %d array.\", (int) tuple_size);\n        goto out;\n    }\n    num_indexes = shape[0];\n    ret = 0;\nout:\n    *ret_num_indexes = num_indexes;\n    *ret_array = indexes_array;\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_pair_coalescence_counts(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n\n    static char *kwlist[] = { \"windows\", \"sample_set_sizes\", \"sample_sets\", \"indexes\",\n        \"node_bin_map\", \"span_normalise\", \"pair_normalise\", NULL };\n    PyObject *py_sample_set_sizes = Py_None;\n    PyObject *py_sample_sets = Py_None;\n    PyObject *py_windows = Py_None;\n    PyObject *py_node_bin_map = Py_None;\n    PyObject *py_indexes = Py_None;\n    PyArrayObject *result_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *node_bin_map_array = NULL;\n    PyArrayObject *indexes_array = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    npy_intp dims[3];\n    tsk_flags_t options = 0;\n    tsk_size_t num_indexes = 0;\n    tsk_size_t num_sample_sets = 0;\n    tsk_size_t num_windows = 0;\n    tsk_size_t num_bins = 0;\n    int span_normalise = 0;\n    int pair_normalise = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOOOO|ii\", kwlist, &py_windows,\n            &py_sample_set_sizes, &py_sample_sets, &py_indexes, &py_node_bin_map,\n            &span_normalise, &pair_normalise)) {\n        goto out;\n    }\n    if (parse_sample_sets(py_sample_set_sizes, &sample_set_sizes_array, py_sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    if (parse_windows(py_windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    if (parse_set_indexes(py_indexes, &indexes_array, &num_indexes, 2) != 0) {\n        goto out;\n    }\n    if (parse_node_bin_map(py_node_bin_map, &node_bin_map_array, &num_bins,\n            tsk_treeseq_get_num_nodes(self->tree_sequence))\n        != 0) {\n        goto out;\n    }\n    if (span_normalise) {\n        options |= TSK_STAT_SPAN_NORMALISE;\n    }\n    if (pair_normalise) {\n        options |= TSK_STAT_PAIR_NORMALISE;\n    }\n\n    dims[0] = (npy_intp) num_windows;\n    dims[1] = (npy_intp) num_indexes;\n    dims[2] = (npy_intp) num_bins;\n    result_array = (PyArrayObject *) PyArray_SimpleNew(3, dims, NPY_FLOAT64);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_pair_coalescence_counts(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_indexes, PyArray_DATA(indexes_array), num_windows,\n        PyArray_DATA(windows_array), num_bins, PyArray_DATA(node_bin_map_array), options,\n        PyArray_DATA(result_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(indexes_array);\n    Py_XDECREF(node_bin_map_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic int\nparse_quantiles(\n    PyObject *quantiles, PyArrayObject **ret_array, tsk_size_t *ret_num_quantiles)\n{\n    int ret = -1;\n    tsk_size_t num_quantiles = 0;\n    PyArrayObject *quantiles_array = NULL;\n    npy_intp *shape;\n\n    quantiles_array = (PyArrayObject *) PyArray_FROMANY(\n        quantiles, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (quantiles_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(quantiles_array);\n    if (shape[0] < 1) {\n        PyErr_SetString(PyExc_ValueError, \"Must supply at least one quantile.\");\n        goto out;\n    }\n    num_quantiles = (tsk_size_t) shape[0];\n    ret = 0;\nout:\n    *ret_num_quantiles = num_quantiles;\n    *ret_array = quantiles_array;\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_pair_coalescence_quantiles(\n    TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n\n    static char *kwlist[] = { \"windows\", \"sample_set_sizes\", \"sample_sets\", \"indexes\",\n        \"node_bin_map\", \"quantiles\", NULL };\n    PyObject *py_sample_set_sizes = Py_None;\n    PyObject *py_sample_sets = Py_None;\n    PyObject *py_windows = Py_None;\n    PyObject *py_node_bin_map = Py_None;\n    PyObject *py_indexes = Py_None;\n    PyObject *py_quantiles = Py_None;\n    PyArrayObject *result_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *node_bin_map_array = NULL;\n    PyArrayObject *indexes_array = NULL;\n    PyArrayObject *quantiles_array = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    npy_intp dims[3];\n    tsk_flags_t options = 0;\n    tsk_size_t num_indexes = 0;\n    tsk_size_t num_sample_sets = 0;\n    tsk_size_t num_windows = 0;\n    tsk_size_t num_bins = 0;\n    tsk_size_t num_quantiles = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOOOOO\", kwlist, &py_windows,\n            &py_sample_set_sizes, &py_sample_sets, &py_indexes, &py_node_bin_map,\n            &py_quantiles)) {\n        goto out;\n    }\n    if (parse_sample_sets(py_sample_set_sizes, &sample_set_sizes_array, py_sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    if (parse_windows(py_windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    if (parse_set_indexes(py_indexes, &indexes_array, &num_indexes, 2) != 0) {\n        goto out;\n    }\n    if (parse_node_bin_map(py_node_bin_map, &node_bin_map_array, &num_bins,\n            tsk_treeseq_get_num_nodes(self->tree_sequence))\n        != 0) {\n        goto out;\n    }\n    if (parse_quantiles(py_quantiles, &quantiles_array, &num_quantiles) != 0) {\n        goto out;\n    }\n\n    dims[0] = (npy_intp) num_windows;\n    dims[1] = (npy_intp) num_indexes;\n    dims[2] = (npy_intp) num_quantiles;\n    result_array = (PyArrayObject *) PyArray_SimpleNew(3, dims, NPY_FLOAT64);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_pair_coalescence_quantiles(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_indexes, PyArray_DATA(indexes_array), num_windows,\n        PyArray_DATA(windows_array), num_bins, PyArray_DATA(node_bin_map_array),\n        num_quantiles, PyArray_DATA(quantiles_array), options,\n        PyArray_DATA(result_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(indexes_array);\n    Py_XDECREF(node_bin_map_array);\n    Py_XDECREF(quantiles_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic int\nparse_time_windows(\n    PyObject *time_windows, PyArrayObject **ret_array, tsk_size_t *ret_num_time_windows)\n{\n    int ret = -1;\n    tsk_size_t num_time_windows = 0;\n    PyArrayObject *time_windows_array = NULL;\n    npy_intp *shape;\n\n    time_windows_array = (PyArrayObject *) PyArray_FROMANY(\n        time_windows, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (time_windows_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(time_windows_array);\n    if (shape[0] < 2) {\n        PyErr_SetString(PyExc_ValueError, \"Must supply at least two breakpoints.\");\n        goto out;\n    }\n    num_time_windows = (tsk_size_t) shape[0] - 1;\n    ret = 0;\nout:\n    *ret_num_time_windows = num_time_windows;\n    *ret_array = time_windows_array;\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_pair_coalescence_rates(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n\n    static char *kwlist[] = { \"windows\", \"sample_set_sizes\", \"sample_sets\", \"indexes\",\n        \"node_bin_map\", \"time_windows\", NULL };\n    PyObject *py_sample_set_sizes = Py_None;\n    PyObject *py_sample_sets = Py_None;\n    PyObject *py_windows = Py_None;\n    PyObject *py_node_bin_map = Py_None;\n    PyObject *py_indexes = Py_None;\n    PyObject *py_time_windows = Py_None;\n    PyArrayObject *result_array = NULL;\n    PyArrayObject *windows_array = NULL;\n    PyArrayObject *node_bin_map_array = NULL;\n    PyArrayObject *indexes_array = NULL;\n    PyArrayObject *time_windows_array = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    npy_intp dims[3];\n    tsk_flags_t options = 0;\n    tsk_size_t num_indexes = 0;\n    tsk_size_t num_sample_sets = 0;\n    tsk_size_t num_windows = 0;\n    tsk_size_t num_bins = 0;\n    tsk_size_t num_time_windows = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOOOOO\", kwlist, &py_windows,\n            &py_sample_set_sizes, &py_sample_sets, &py_indexes, &py_node_bin_map,\n            &py_time_windows)) {\n        goto out;\n    }\n    if (parse_sample_sets(py_sample_set_sizes, &sample_set_sizes_array, py_sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n    if (parse_windows(py_windows, &windows_array, &num_windows) != 0) {\n        goto out;\n    }\n    if (parse_set_indexes(py_indexes, &indexes_array, &num_indexes, 2) != 0) {\n        goto out;\n    }\n    if (parse_node_bin_map(py_node_bin_map, &node_bin_map_array, &num_bins,\n            tsk_treeseq_get_num_nodes(self->tree_sequence))\n        != 0) {\n        goto out;\n    }\n    if (parse_time_windows(py_time_windows, &time_windows_array, &num_time_windows)\n        != 0) {\n        goto out;\n    }\n\n    dims[0] = (npy_intp) num_windows;\n    dims[1] = (npy_intp) num_indexes;\n    dims[2] = (npy_intp) num_time_windows;\n    result_array = (PyArrayObject *) PyArray_SimpleNew(3, dims, NPY_FLOAT64);\n    if (result_array == NULL) {\n        goto out;\n    }\n\n    err = tsk_treeseq_pair_coalescence_rates(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_indexes, PyArray_DATA(indexes_array), num_windows,\n        PyArray_DATA(windows_array), num_time_windows, PyArray_DATA(node_bin_map_array),\n        PyArray_DATA(time_windows_array), options, PyArray_DATA(result_array));\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_array;\n    result_array = NULL;\nout:\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(windows_array);\n    Py_XDECREF(indexes_array);\n    Py_XDECREF(node_bin_map_array);\n    Py_XDECREF(time_windows_array);\n    Py_XDECREF(result_array);\n    return ret;\n}\n\nstatic PyArrayObject *\nparse_sites(TreeSequence *self, PyObject *sites, npy_intp *out_dim)\n{\n    PyArrayObject *array;\n    tsk_size_t num_sites = tsk_treeseq_get_num_sites(self->tree_sequence);\n\n    if (sites == Py_None) {\n        array = (PyArrayObject *) PyArray_Arange(0, num_sites, 1, NPY_INT32);\n        if (array == NULL) {\n            goto out;\n        }\n        *out_dim = PyArray_DIM(array, 0);\n    } else {\n        array = (PyArrayObject *) PyArray_FROMANY(\n            sites, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n        if (array == NULL) {\n            goto out;\n        }\n        *out_dim = PyArray_DIM(array, 0);\n    }\n\nout:\n    return array;\n}\n\nstatic PyArrayObject *\nparse_positions(TreeSequence *self, PyObject *positions, npy_intp *out_dim)\n{\n    PyArrayObject *array;\n\n    if (positions == Py_None) {\n        array = (PyArrayObject *) TreeSequence_get_breakpoints(self);\n        if (array == NULL) {\n            goto out;\n        }\n        *out_dim = PyArray_DIM(array, 0) - 1; // NB the last element must be truncated\n    } else {\n        array = (PyArrayObject *) PyArray_FROMANY(\n            positions, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n        if (array == NULL) {\n            goto out;\n        }\n        *out_dim = PyArray_DIM(array, 0);\n    }\nout:\n    return array;\n}\n\nstatic PyObject *\nTreeSequence_ld_matrix(TreeSequence *self, PyObject *args, PyObject *kwds,\n    two_locus_count_stat_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"sample_set_sizes\", \"sample_sets\", \"row_sites\",\n        \"col_sites\", \"row_positions\", \"column_positions\", \"mode\", NULL };\n    PyObject *sample_set_sizes = NULL;\n    PyObject *sample_sets = NULL;\n    PyObject *row_sites = NULL;\n    PyObject *col_sites = NULL;\n    PyObject *row_positions = NULL;\n    PyObject *col_positions = NULL;\n    char *mode = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    PyArrayObject *row_sites_array = NULL;\n    PyArrayObject *col_sites_array = NULL;\n    PyArrayObject *row_positions_array = NULL;\n    PyArrayObject *col_positions_array = NULL;\n    PyArrayObject *result_matrix = NULL;\n    tsk_id_t *row_sites_parsed = NULL;\n    tsk_id_t *col_sites_parsed = NULL;\n    double *row_positions_parsed = NULL;\n    double *col_positions_parsed = NULL;\n    npy_intp result_dim[3] = { 0, 0, 0 };\n    tsk_size_t num_sample_sets;\n    tsk_flags_t options = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOOOOO|s\", kwlist, &sample_set_sizes,\n            &sample_sets, &row_sites, &col_sites, &row_positions, &col_positions,\n            &mode)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n\n    if (options & TSK_STAT_SITE) {\n        if (row_positions != Py_None || col_positions != Py_None) {\n            PyErr_SetString(PyExc_ValueError, \"Cannot specify positions in site mode\");\n            goto out;\n        }\n        row_sites_array = parse_sites(self, row_sites, &(result_dim[0]));\n        col_sites_array = parse_sites(self, col_sites, &(result_dim[1]));\n        if (row_sites_array == NULL || col_sites_array == NULL) {\n            goto out;\n        }\n        row_sites_parsed = PyArray_DATA(row_sites_array);\n        col_sites_parsed = PyArray_DATA(col_sites_array);\n    } else if (options & TSK_STAT_BRANCH) {\n        if (row_sites != Py_None || col_sites != Py_None) {\n            PyErr_SetString(PyExc_ValueError, \"Cannot specify sites in branch mode\");\n            goto out;\n        }\n        row_positions_array = parse_positions(self, row_positions, &(result_dim[0]));\n        col_positions_array = parse_positions(self, col_positions, &(result_dim[1]));\n        if (col_positions_array == NULL || row_positions_array == NULL) {\n            goto out;\n        }\n        row_positions_parsed = PyArray_DATA(row_positions_array);\n        col_positions_parsed = PyArray_DATA(col_positions_array);\n    }\n\n    result_dim[2] = num_sample_sets;\n    result_matrix = (PyArrayObject *) PyArray_ZEROS(3, result_dim, NPY_FLOAT64, 0);\n    if (result_matrix == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n\n    Py_BEGIN_ALLOW_THREADS\n    err = method(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        result_dim[0], row_sites_parsed, row_positions_parsed, result_dim[1],\n        col_sites_parsed, col_positions_parsed, options, PyArray_DATA(result_matrix));\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_matrix;\n    result_matrix = NULL;\nout:\n    Py_XDECREF(row_sites_array);\n    Py_XDECREF(col_sites_array);\n    Py_XDECREF(row_positions_array);\n    Py_XDECREF(col_positions_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(result_matrix);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_D_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_D);\n}\n\nstatic PyObject *\nTreeSequence_D2_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_D2);\n}\n\nstatic PyObject *\nTreeSequence_r2_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_r2);\n}\n\nstatic PyObject *\nTreeSequence_D_prime_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_D_prime);\n}\n\nstatic PyObject *\nTreeSequence_r_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_r);\n}\n\nstatic PyObject *\nTreeSequence_Dz_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_Dz);\n}\n\nstatic PyObject *\nTreeSequence_pi2_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_pi2);\n}\n\nstatic PyObject *\nTreeSequence_pi2_unbiased_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_pi2_unbiased);\n}\n\nstatic PyObject *\nTreeSequence_D2_unbiased_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_D2_unbiased);\n}\n\nstatic PyObject *\nTreeSequence_Dz_unbiased_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_ld_matrix(self, args, kwds, tsk_treeseq_Dz_unbiased);\n}\n\nstatic PyObject *\nTreeSequence_k_way_ld_matrix(TreeSequence *self, PyObject *args, PyObject *kwds,\n    npy_intp tuple_size, k_way_two_locus_count_stat_method *method)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"sample_set_sizes\", \"sample_sets\", \"indexes\", \"row_sites\",\n        \"col_sites\", \"row_positions\", \"column_positions\", \"mode\", NULL };\n    PyObject *sample_set_sizes = NULL;\n    PyObject *sample_sets = NULL;\n    PyObject *indexes = NULL;\n    PyObject *row_sites = NULL;\n    PyObject *col_sites = NULL;\n    PyObject *row_positions = NULL;\n    PyObject *col_positions = NULL;\n    char *mode = NULL;\n    PyArrayObject *sample_set_sizes_array = NULL;\n    PyArrayObject *sample_sets_array = NULL;\n    PyArrayObject *indexes_array = NULL;\n    PyArrayObject *row_sites_array = NULL;\n    PyArrayObject *col_sites_array = NULL;\n    PyArrayObject *row_positions_array = NULL;\n    PyArrayObject *col_positions_array = NULL;\n    PyArrayObject *result_matrix = NULL;\n    tsk_id_t *row_sites_parsed = NULL;\n    tsk_id_t *col_sites_parsed = NULL;\n    double *row_positions_parsed = NULL;\n    double *col_positions_parsed = NULL;\n    tsk_size_t num_sample_sets;\n    tsk_size_t num_set_index_tuples;\n    npy_intp *shape, result_dim[3] = { 0, 0, 0 };\n    tsk_flags_t options = 0;\n    int err;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"OOO|OOOOs\", kwlist, &sample_set_sizes,\n            &sample_sets, &indexes, &row_sites, &col_sites, &row_positions,\n            &col_positions, &mode)) {\n        goto out;\n    }\n    if (parse_stats_mode(mode, &options) != 0) {\n        goto out;\n    }\n    if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,\n            &sample_sets_array, &num_sample_sets)\n        != 0) {\n        goto out;\n    }\n\n    if (options & TSK_STAT_SITE) {\n        if (row_positions != Py_None || col_positions != Py_None) {\n            PyErr_SetString(PyExc_ValueError, \"Cannot specify positions in site mode\");\n            goto out;\n        }\n        row_sites_array = parse_sites(self, row_sites, &(result_dim[0]));\n        col_sites_array = parse_sites(self, col_sites, &(result_dim[1]));\n        if (row_sites_array == NULL || col_sites_array == NULL) {\n            goto out;\n        }\n        row_sites_parsed = PyArray_DATA(row_sites_array);\n        col_sites_parsed = PyArray_DATA(col_sites_array);\n    } else if (options & TSK_STAT_BRANCH) {\n        if (row_sites != Py_None || col_sites != Py_None) {\n            PyErr_SetString(PyExc_ValueError, \"Cannot specify sites in branch mode\");\n            goto out;\n        }\n        row_positions_array = parse_positions(self, row_positions, &(result_dim[0]));\n        col_positions_array = parse_positions(self, col_positions, &(result_dim[1]));\n        if (col_positions_array == NULL || row_positions_array == NULL) {\n            goto out;\n        }\n        row_positions_parsed = PyArray_DATA(row_positions_array);\n        col_positions_parsed = PyArray_DATA(col_positions_array);\n    }\n\n    indexes_array = (PyArrayObject *) PyArray_FROMANY(\n        indexes, NPY_INT32, 2, 2, NPY_ARRAY_IN_ARRAY);\n    if (indexes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(indexes_array);\n    if (shape[0] < 1 || shape[1] != tuple_size) {\n        PyErr_Format(\n            PyExc_ValueError, \"indexes must be a k x %d array.\", (int) tuple_size);\n        goto out;\n    }\n    num_set_index_tuples = shape[0];\n\n    result_dim[2] = num_set_index_tuples;\n    result_matrix = (PyArrayObject *) PyArray_ZEROS(3, result_dim, NPY_FLOAT64, 0);\n    if (result_matrix == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n\n    Py_BEGIN_ALLOW_THREADS\n    err = method(self->tree_sequence, num_sample_sets,\n        PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array),\n        num_set_index_tuples, PyArray_DATA(indexes_array), result_dim[0],\n        row_sites_parsed, row_positions_parsed, result_dim[1], col_sites_parsed,\n        col_positions_parsed, options, PyArray_DATA(result_matrix));\n    Py_END_ALLOW_THREADS\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) result_matrix;\n    result_matrix = NULL;\nout:\n    Py_XDECREF(row_sites_array);\n    Py_XDECREF(col_sites_array);\n    Py_XDECREF(row_positions_array);\n    Py_XDECREF(col_positions_array);\n    Py_XDECREF(sample_sets_array);\n    Py_XDECREF(sample_set_sizes_array);\n    Py_XDECREF(indexes_array);\n    Py_XDECREF(result_matrix);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_D2_ij_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_ld_matrix(self, args, kwds, 2, tsk_treeseq_D2_ij);\n}\n\nstatic PyObject *\nTreeSequence_D2_ij_unbiased_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_ld_matrix(self, args, kwds, 2, tsk_treeseq_D2_ij_unbiased);\n}\n\nstatic PyObject *\nTreeSequence_r2_ij_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    return TreeSequence_k_way_ld_matrix(self, args, kwds, 2, tsk_treeseq_r2_ij);\n}\n\nstatic PyObject *\nTreeSequence_get_num_mutations(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_mutations;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_mutations = tsk_treeseq_get_num_mutations(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_mutations);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_sites(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_sites;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_sites = tsk_treeseq_get_num_sites(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_sites);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_num_provenances(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_provenances;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    num_provenances = tsk_treeseq_get_num_provenances(self->tree_sequence);\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) num_provenances);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_split_edges(TreeSequence *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"time\", \"flags\", \"population\", \"metadata\", NULL };\n    double time;\n    tsk_flags_t flags;\n    tsk_id_t population;\n    PyObject *py_metadata = Py_None;\n    char *metadata;\n    Py_ssize_t metadata_length;\n    int err;\n    TreeSequence *output = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"dO&O&O\", kwlist, &time,\n            &uint32_converter, &flags, &tsk_id_converter, &population, &py_metadata)) {\n        goto out;\n    }\n\n    if (PyBytes_AsStringAndSize(py_metadata, &metadata, &metadata_length) < 0) {\n        goto out;\n    }\n\n    output = (TreeSequence *) _PyObject_New((PyTypeObject *) &TreeSequenceType);\n    if (output == NULL) {\n        goto out;\n    }\n    output->tree_sequence = PyMem_Malloc(sizeof(*output->tree_sequence));\n    if (output->tree_sequence == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    err = tsk_treeseq_split_edges(self->tree_sequence, time, flags, population, metadata,\n        metadata_length, 0, output->tree_sequence);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) output;\n    output = NULL;\nout:\n    Py_XDECREF(output);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_has_reference_sequence(TreeSequence *self)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"i\", (int) tsk_treeseq_has_reference_sequence(self->tree_sequence));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_reference_sequence(TreeSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    ret = ReferenceSequence_get_new(\n        &self->tree_sequence->tables->reference_sequence, (PyObject *) self, true);\nout:\n    return ret;\n}\n\n/* Make a new array that is owned by the specified object. */\nstatic PyObject *\nmake_owned_array(PyObject *self, tsk_size_t size, int dtype, void *data)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    npy_intp dims = (npy_intp) size;\n\n    array = (PyArrayObject *) PyArray_SimpleNewFromData(1, &dims, dtype, data);\n    if (array == NULL) {\n        goto out;\n    }\n    PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE);\n    if (PyArray_SetBaseObject(array, (PyObject *) self) != 0) {\n        goto out;\n    }\n    /* PyArray_SetBaseObject steals a reference, so we have to incref this\n     * object. This makes sure that the instance will stay alive if there\n     * are any arrays that refer to its memory. */\n    Py_INCREF(self);\n    ret = (PyObject *) array;\n    array = NULL;\nout:\n    Py_XDECREF(array);\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_make_array(TreeSequence *self, tsk_size_t size, int dtype, void *data)\n{\n    return make_owned_array((PyObject *) self, size, dtype, data);\n}\n\n#if HAVE_NUMPY_2\nPyObject *\nTreeSequence_decode_ragged_string_column(\n    TreeSequence *self, tsk_size_t num_rows, const char *data, const tsk_size_t *offset)\n{\n    PyObject *ret = NULL;\n    PyObject *array = NULL;\n    char *array_data = NULL;\n    npy_intp dims[1];\n    tsk_size_t i;\n    int pack_result;\n    npy_string_allocator *allocator = NULL;\n    PyArray_StringDTypeObject *string_dtype\n        = (PyArray_StringDTypeObject *) PyArray_DescrFromType(NPY_VSTRING);\n    /* This can only fail if an invalid dtype is passed */\n    assert(string_dtype != NULL);\n\n    dims[0] = (npy_intp) num_rows;\n    array = PyArray_Zeros(1, dims, (PyArray_Descr *) string_dtype, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    array_data = (char *) PyArray_DATA((PyArrayObject *) array);\n    allocator = NpyString_acquire_allocator(string_dtype);\n    for (i = 0; i < num_rows; i++) {\n        pack_result = NpyString_pack(allocator,\n            (npy_packed_static_string\n                    *) (array_data + (i * ((PyArray_Descr *) string_dtype)->elsize)),\n            data + offset[i], offset[i + 1] - offset[i]);\n        if (pack_result < 0) {\n            PyErr_SetString(PyExc_MemoryError, \"could not pack string.\");\n            goto out;\n        }\n    }\n    /* Release the allocator before we call any other Python C API functions\n     * which may require the GIL.\n     */\n    NpyString_release_allocator(allocator);\n    allocator = NULL;\n\n    /* Clear the writeable flag to match other arrays semantics */\n    PyArray_CLEARFLAGS((PyArrayObject *) array, NPY_ARRAY_WRITEABLE);\n\n    ret = array;\n    array = NULL;\nout:\n    if (allocator != NULL) {\n        NpyString_release_allocator(allocator);\n    }\n    Py_XDECREF(array);\n    return ret;\n}\n#endif\n\n#if HAVE_NUMPY_2\n\n#define DEFINE_RAGGED_STRING_ACCESSOR(table_name, field_name, table_type)               \\\n    static PyObject *TreeSequence_get_##table_name##_##field_name##_string(             \\\n        TreeSequence *self, void *closure)                                              \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        tsk_##table_type##_table_t table;                                               \\\n                                                                                        \\\n        if (TreeSequence_check_state(self) != 0) {                                      \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        table = self->tree_sequence->tables->table_name;                                \\\n        ret = TreeSequence_decode_ragged_string_column(                                 \\\n            self, table.num_rows, table.field_name, table.field_name##_offset);         \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\nDEFINE_RAGGED_STRING_ACCESSOR(sites, ancestral_state, site)\nDEFINE_RAGGED_STRING_ACCESSOR(provenances, timestamp, provenance)\nDEFINE_RAGGED_STRING_ACCESSOR(provenances, record, provenance)\nDEFINE_RAGGED_STRING_ACCESSOR(mutations, derived_state, mutation)\n\nstatic PyObject *\nTreeSequence_get_mutations_inherited_state_string(TreeSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n    tsk_treeseq_t *ts;\n    tsk_size_t num_mutations;\n    char *inherited_state_data = NULL;\n    tsk_size_t *inherited_state_offsets = NULL;\n    tsk_size_t total_length = 0;\n    tsk_size_t j, offset;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n\n    ts = self->tree_sequence;\n    num_mutations = ts->tables->mutations.num_rows;\n\n    /* Calculate total length needed for inherited state data */\n    for (j = 0; j < num_mutations; j++) {\n        total_length += ts->site_mutations_mem[j].inherited_state_length;\n    }\n\n    /* Allocate memory for the ragged array */\n    inherited_state_data = PyMem_Malloc(total_length * sizeof(char));\n    inherited_state_offsets = PyMem_Malloc((num_mutations + 1) * sizeof(tsk_size_t));\n    if (inherited_state_data == NULL || inherited_state_offsets == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n\n    /* Populate the ragged array data */\n    offset = 0;\n    for (j = 0; j < num_mutations; j++) {\n        inherited_state_offsets[j] = offset;\n        memcpy(inherited_state_data + offset, ts->site_mutations_mem[j].inherited_state,\n            ts->site_mutations_mem[j].inherited_state_length);\n        offset += ts->site_mutations_mem[j].inherited_state_length;\n    }\n    inherited_state_offsets[num_mutations] = offset;\n\n    ret = TreeSequence_decode_ragged_string_column(\n        self, num_mutations, inherited_state_data, inherited_state_offsets);\n\nout:\n    if (inherited_state_data != NULL) {\n        PyMem_Free(inherited_state_data);\n    }\n    if (inherited_state_offsets != NULL) {\n        PyMem_Free(inherited_state_offsets);\n    }\n    return ret;\n}\n#endif\n\n/* Universal macro for all array accessors */\n#define DEFINE_ARRAY_ACCESSOR(                                                          \\\n    table_name, field_name, table_type, numpy_type, count_expr)                         \\\n    static PyObject *TreeSequence_get_##table_name##_##field_name(                      \\\n        TreeSequence *self, void *closure)                                              \\\n    {                                                                                   \\\n        PyObject *ret = NULL;                                                           \\\n        tsk_##table_type##_table_t table;                                               \\\n                                                                                        \\\n        if (TreeSequence_check_state(self) != 0) {                                      \\\n            goto out;                                                                   \\\n        }                                                                               \\\n        table = self->tree_sequence->tables->table_name;                                \\\n        ret = TreeSequence_make_array(self, count_expr, numpy_type, table.field_name);  \\\n    out:                                                                                \\\n        return ret;                                                                     \\\n    }\n\nDEFINE_ARRAY_ACCESSOR(\n    sites, ancestral_state, site, NPY_INT8, table.ancestral_state_length)\nDEFINE_ARRAY_ACCESSOR(\n    sites, ancestral_state_offset, site, NPY_UINT64, table.num_rows + 1)\nDEFINE_ARRAY_ACCESSOR(\n    mutations, derived_state, mutation, NPY_INT8, table.derived_state_length)\nDEFINE_ARRAY_ACCESSOR(\n    mutations, derived_state_offset, mutation, NPY_UINT64, table.num_rows + 1)\nDEFINE_ARRAY_ACCESSOR(provenances, record, provenance, NPY_INT8, table.record_length)\nDEFINE_ARRAY_ACCESSOR(\n    provenances, record_offset, provenance, NPY_UINT64, table.num_rows + 1)\nDEFINE_ARRAY_ACCESSOR(\n    provenances, timestamp, provenance, NPY_INT8, table.timestamp_length)\nDEFINE_ARRAY_ACCESSOR(\n    provenances, timestamp_offset, provenance, NPY_UINT64, table.num_rows + 1)\nDEFINE_ARRAY_ACCESSOR(nodes, time, node, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(nodes, flags, node, NPY_UINT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(nodes, population, node, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(nodes, individual, node, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(nodes, metadata, node, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(nodes, metadata_offset, node, NPY_UINT64, table.num_rows + 1)\n\nDEFINE_ARRAY_ACCESSOR(edges, left, edge, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(edges, right, edge, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(edges, parent, edge, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(edges, child, edge, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(edges, metadata, edge, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(edges, metadata_offset, edge, NPY_UINT64, table.num_rows + 1)\n\nDEFINE_ARRAY_ACCESSOR(sites, position, site, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(sites, metadata, site, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(sites, metadata_offset, site, NPY_UINT64, table.num_rows + 1)\n\nDEFINE_ARRAY_ACCESSOR(mutations, site, mutation, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(mutations, node, mutation, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(mutations, parent, mutation, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(mutations, time, mutation, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(mutations, metadata, mutation, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(\n    mutations, metadata_offset, mutation, NPY_UINT64, table.num_rows + 1)\n\nDEFINE_ARRAY_ACCESSOR(migrations, left, migration, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(migrations, right, migration, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(migrations, node, migration, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(migrations, source, migration, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(migrations, dest, migration, NPY_INT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(migrations, time, migration, NPY_FLOAT64, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(migrations, metadata, migration, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(\n    migrations, metadata_offset, migration, NPY_UINT64, table.num_rows + 1)\n\nDEFINE_ARRAY_ACCESSOR(\n    populations, metadata, population, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(\n    populations, metadata_offset, population, NPY_UINT64, table.num_rows + 1)\n\nDEFINE_ARRAY_ACCESSOR(individuals, flags, individual, NPY_UINT32, table.num_rows)\nDEFINE_ARRAY_ACCESSOR(\n    individuals, location, individual, NPY_FLOAT64, table.location_length)\nDEFINE_ARRAY_ACCESSOR(\n    individuals, location_offset, individual, NPY_UINT64, table.num_rows + 1)\nDEFINE_ARRAY_ACCESSOR(individuals, parents, individual, NPY_INT32, table.parents_length)\nDEFINE_ARRAY_ACCESSOR(\n    individuals, parents_offset, individual, NPY_UINT64, table.num_rows + 1)\nDEFINE_ARRAY_ACCESSOR(\n    individuals, metadata, individual, NPY_UINT8, table.metadata_length)\nDEFINE_ARRAY_ACCESSOR(\n    individuals, metadata_offset, individual, NPY_UINT64, table.num_rows + 1)\n\nstatic PyObject *\nTreeSequence_get_indexes_edge_insertion_order(TreeSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n    tsk_table_collection_t *tables;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    tables = self->tree_sequence->tables;\n    ret = TreeSequence_make_array(\n        self, tables->edges.num_rows, NPY_INT32, tables->indexes.edge_insertion_order);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTreeSequence_get_indexes_edge_removal_order(TreeSequence *self, void *closure)\n{\n    PyObject *ret = NULL;\n    tsk_table_collection_t *tables;\n\n    if (TreeSequence_check_state(self) != 0) {\n        goto out;\n    }\n    tables = self->tree_sequence->tables;\n    ret = TreeSequence_make_array(\n        self, tables->edges.num_rows, NPY_INT32, tables->indexes.edge_removal_order);\nout:\n    return ret;\n}\n\nstatic PyMethodDef TreeSequence_methods[] = {\n    { .ml_name = \"dump\",\n        .ml_meth = (PyCFunction) TreeSequence_dump,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Writes the tree sequence out to the specified file.\" },\n    { .ml_name = \"load\",\n        .ml_meth = (PyCFunction) TreeSequence_load,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Loads a tree sequence from the specified file.\" },\n    { .ml_name = \"load_tables\",\n        .ml_meth = (PyCFunction) TreeSequence_load_tables,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Loads a tree sequence from the specified set of tables\" },\n    { .ml_name = \"dump_tables\",\n        .ml_meth = (PyCFunction) TreeSequence_dump_tables,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Dumps the tree sequence to the specified set of tables\" },\n    { .ml_name = \"link_ancestors\",\n        .ml_meth = (PyCFunction) TreeSequence_link_ancestors,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns an EdgeTable linking the specified samples and ancestors.\" },\n    { .ml_name = \"get_node\",\n        .ml_meth = (PyCFunction) TreeSequence_get_node,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the node record at the specified index.\" },\n    { .ml_name = \"get_edge\",\n        .ml_meth = (PyCFunction) TreeSequence_get_edge,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the edge record at the specified index.\" },\n    { .ml_name = \"get_migration\",\n        .ml_meth = (PyCFunction) TreeSequence_get_migration,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the migration record at the specified index.\" },\n    { .ml_name = \"get_site\",\n        .ml_meth = (PyCFunction) TreeSequence_get_site,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the mutation type record at the specified index.\" },\n    { .ml_name = \"get_mutation\",\n        .ml_meth = (PyCFunction) TreeSequence_get_mutation,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the mutation record at the specified index.\" },\n    { .ml_name = \"get_individual\",\n        .ml_meth = (PyCFunction) TreeSequence_get_individual,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the individual record at the specified index.\" },\n    { .ml_name = \"get_population\",\n        .ml_meth = (PyCFunction) TreeSequence_get_population,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the population record at the specified index.\" },\n    { .ml_name = \"get_provenance\",\n        .ml_meth = (PyCFunction) TreeSequence_get_provenance,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the provenance record at the specified index.\" },\n    { .ml_name = \"get_num_edges\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_edges,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of coalescence records.\" },\n    { .ml_name = \"get_num_migrations\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_migrations,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of migration records.\" },\n    { .ml_name = \"get_num_populations\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_populations,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of population records.\" },\n    { .ml_name = \"get_num_individuals\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_individuals,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of individual records.\" },\n    { .ml_name = \"get_num_trees\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_trees,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of trees in the tree sequence.\" },\n    { .ml_name = \"get_sequence_length\",\n        .ml_meth = (PyCFunction) TreeSequence_get_sequence_length,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the sequence length in bases.\" },\n    { .ml_name = \"get_discrete_genome\",\n        .ml_meth = (PyCFunction) TreeSequence_get_discrete_genome,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns True if this TreeSequence has discrete coordinates\" },\n    { .ml_name = \"get_discrete_time\",\n        .ml_meth = (PyCFunction) TreeSequence_get_discrete_time,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns True if this TreeSequence has discrete times\" },\n    { .ml_name = \"get_min_time\",\n        .ml_meth = (PyCFunction) TreeSequence_get_min_time,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the min time.\" },\n    { .ml_name = \"get_max_time\",\n        .ml_meth = (PyCFunction) TreeSequence_get_max_time,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the max time.\" },\n    { .ml_name = \"get_breakpoints\",\n        .ml_meth = (PyCFunction) TreeSequence_get_breakpoints,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the tree breakpoints as a numpy array.\" },\n    { .ml_name = \"get_file_uuid\",\n        .ml_meth = (PyCFunction) TreeSequence_get_file_uuid,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the UUID of the underlying file, if present.\" },\n    { .ml_name = \"get_metadata\",\n        .ml_meth = (PyCFunction) TreeSequence_get_metadata,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the metadata for the tree sequence\" },\n    { .ml_name = \"get_metadata_schema\",\n        .ml_meth = (PyCFunction) TreeSequence_get_metadata_schema,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the metadata schema for the tree sequence metadata\" },\n    { .ml_name = \"get_time_units\",\n        .ml_meth = (PyCFunction) TreeSequence_get_time_units,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the description of the units of the time dimension\" },\n    { .ml_name = \"get_num_sites\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_sites,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of sites\" },\n    { .ml_name = \"get_num_mutations\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_mutations,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of mutations\" },\n    { .ml_name = \"get_num_provenances\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_provenances,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of provenances\" },\n    { .ml_name = \"get_num_nodes\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_nodes,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of unique nodes in the tree sequence.\" },\n    { .ml_name = \"get_num_samples\",\n        .ml_meth = (PyCFunction) TreeSequence_get_num_samples,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the sample size\" },\n    { .ml_name = \"get_table_metadata_schemas\",\n        .ml_meth = (PyCFunction) TreeSequence_get_table_metadata_schemas,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the metadata schemas for the tree sequence tables\" },\n    { .ml_name = \"get_samples\",\n        .ml_meth = (PyCFunction) TreeSequence_get_samples,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the samples.\" },\n    { .ml_name = \"get_individuals_population\",\n        .ml_meth = (PyCFunction) TreeSequence_get_individuals_population,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the vector of per-individual populations.\" },\n    { .ml_name = \"get_individuals_time\",\n        .ml_meth = (PyCFunction) TreeSequence_get_individuals_time,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the vector of per-individual times.\" },\n    { .ml_name = \"get_individuals_nodes\",\n        .ml_meth = (PyCFunction) TreeSequence_get_individuals_nodes,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns an array of the node ids for each individual\" },\n    { .ml_name = \"decode_alignments\",\n        .ml_meth = (PyCFunction) TreeSequence_decode_alignments,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Decode full alignments for given nodes and interval.\" },\n    { .ml_name = \"get_mutations_edge\",\n        .ml_meth = (PyCFunction) TreeSequence_get_mutations_edge,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns an array of the edge ids of each mutation's edge\" },\n    { .ml_name = \"genealogical_nearest_neighbours\",\n        .ml_meth = (PyCFunction) TreeSequence_genealogical_nearest_neighbours,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the genealogical nearest neighbours statistic.\" },\n    { .ml_name = \"get_kc_distance\",\n        .ml_meth = (PyCFunction) TreeSequence_get_kc_distance,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the KC distance between this tree sequence and another.\" },\n    { .ml_name = \"mean_descendants\",\n        .ml_meth = (PyCFunction) TreeSequence_mean_descendants,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the mean number of nodes descending from each node.\" },\n    { .ml_name = \"general_stat\",\n        .ml_meth = (PyCFunction) TreeSequence_general_stat,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Runs the general stats algorithm for a given summary function.\" },\n    { .ml_name = \"diversity\",\n        .ml_meth = (PyCFunction) TreeSequence_diversity,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes diversity within sample sets.\" },\n    { .ml_name = \"allele_frequency_spectrum\",\n        .ml_meth = (PyCFunction) TreeSequence_allele_frequency_spectrum,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the K-dimensional joint AFS.\" },\n    { .ml_name = \"trait_covariance\",\n        .ml_meth = (PyCFunction) TreeSequence_trait_covariance,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes covariance with traits.\" },\n    { .ml_name = \"trait_correlation\",\n        .ml_meth = (PyCFunction) TreeSequence_trait_correlation,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes correlation with traits.\" },\n    { .ml_name = \"trait_linear_model\",\n        .ml_meth = (PyCFunction) TreeSequence_trait_linear_model,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes coefficients of a linear model for each trait.\" },\n    { .ml_name = \"segregating_sites\",\n        .ml_meth = (PyCFunction) TreeSequence_segregating_sites,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes density of segregating sites within sample sets.\" },\n    { .ml_name = \"divergence\",\n        .ml_meth = (PyCFunction) TreeSequence_divergence,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes diveregence between sample sets.\" },\n    { .ml_name = \"genetic_relatedness\",\n        .ml_meth = (PyCFunction) TreeSequence_genetic_relatedness,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes genetic relatedness between sample sets.\" },\n    { .ml_name = \"genetic_relatedness_weighted\",\n        .ml_meth = (PyCFunction) TreeSequence_genetic_relatedness_weighted,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes genetic relatedness between weighted sums of samples.\" },\n    { .ml_name = \"genetic_relatedness_vector\",\n        .ml_meth = (PyCFunction) TreeSequence_genetic_relatedness_vector,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes genetic relatedness matrix-vector products.\" },\n    { .ml_name = \"Y1\",\n        .ml_meth = (PyCFunction) TreeSequence_Y1,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the Y1 statistic.\" },\n    { .ml_name = \"Y2\",\n        .ml_meth = (PyCFunction) TreeSequence_Y2,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the Y2 statistic.\" },\n    { .ml_name = \"f2\",\n        .ml_meth = (PyCFunction) TreeSequence_f2,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the f2 statistic.\" },\n    { .ml_name = \"Y3\",\n        .ml_meth = (PyCFunction) TreeSequence_Y3,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the Y3 statistic.\" },\n    { .ml_name = \"f3\",\n        .ml_meth = (PyCFunction) TreeSequence_f3,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the f3 statistic.\" },\n    { .ml_name = \"f4\",\n        .ml_meth = (PyCFunction) TreeSequence_f4,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the f4 statistic.\" },\n    { .ml_name = \"divergence_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_divergence_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the pairwise divergence matrix.\" },\n    { .ml_name = \"pair_coalescence_counts\",\n        .ml_meth = (PyCFunction) TreeSequence_pair_coalescence_counts,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the number of coalescing pairs per node.\" },\n    { .ml_name = \"pair_coalescence_quantiles\",\n        .ml_meth = (PyCFunction) TreeSequence_pair_coalescence_quantiles,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Estimates quantiles of pair coalescence times.\" },\n    { .ml_name = \"pair_coalescence_rates\",\n        .ml_meth = (PyCFunction) TreeSequence_pair_coalescence_rates,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Estimates rate of pair coalescence within time intervals.\" },\n    { .ml_name = \"split_edges\",\n        .ml_meth = (PyCFunction) TreeSequence_split_edges,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns a copy of this tree sequence edges split at time t\" },\n    { .ml_name = \"extend_haplotypes\",\n        .ml_meth = (PyCFunction) TreeSequence_extend_haplotypes,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Extends ancestral haplotypes, creating unary nodes.\" },\n    { .ml_name = \"has_reference_sequence\",\n        .ml_meth = (PyCFunction) TreeSequence_has_reference_sequence,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns True if the TreeSequence has a reference sequence.\" },\n    { .ml_name = \"D_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_D_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the D matrix.\" },\n    { .ml_name = \"D2_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_D2_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the D2 matrix.\" },\n    { .ml_name = \"r2_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_r2_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the r2 matrix.\" },\n    { .ml_name = \"D_prime_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_D_prime_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the D_prime matrix.\" },\n    { .ml_name = \"r_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_r_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the r matrix.\" },\n    { .ml_name = \"Dz_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_Dz_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the Dz matrix.\" },\n    { .ml_name = \"pi2_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_pi2_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the pi2 matrix.\" },\n    { .ml_name = \"D2_unbiased_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_D2_unbiased_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the unbiased D2 matrix.\" },\n    { .ml_name = \"Dz_unbiased_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_Dz_unbiased_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the unbiased Dz matrix.\" },\n    { .ml_name = \"pi2_unbiased_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_pi2_unbiased_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the unbiased pi2 matrix.\" },\n    { .ml_name = \"D2_ij_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_D2_ij_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the two-way D^2 matrix.\" },\n    { .ml_name = \"D2_ij_unbiased_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_D2_ij_unbiased_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the two-way unbiased D^2 matrix.\" },\n    { .ml_name = \"r2_ij_matrix\",\n        .ml_meth = (PyCFunction) TreeSequence_r2_ij_matrix,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Computes the two-way r^2 matrix.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyGetSetDef TreeSequence_getsetters[] = {\n    { .name = \"reference_sequence\",\n        .get = (getter) TreeSequence_get_reference_sequence,\n        .doc = \"The reference sequence.\" },\n    { .name = \"individuals_flags\",\n        .get = (getter) TreeSequence_get_individuals_flags,\n        .doc = \"The individual flags array\" },\n    { .name = \"individuals_metadata\",\n        .get = (getter) TreeSequence_get_individuals_metadata,\n        .doc = \"The individual metadata array\" },\n    { .name = \"individuals_metadata_offset\",\n        .get = (getter) TreeSequence_get_individuals_metadata_offset,\n        .doc = \"The individual metadata offset array\" },\n    { .name = \"individuals_location\",\n        .get = (getter) TreeSequence_get_individuals_location,\n        .doc = \"The individual location array\" },\n    { .name = \"individuals_location_offset\",\n        .get = (getter) TreeSequence_get_individuals_location_offset,\n        .doc = \"The individual location offset array\" },\n    { .name = \"individuals_parents\",\n        .get = (getter) TreeSequence_get_individuals_parents,\n        .doc = \"The individual parents array\" },\n    { .name = \"individuals_parents_offset\",\n        .get = (getter) TreeSequence_get_individuals_parents_offset,\n        .doc = \"The individual parents offset array\" },\n    { .name = \"nodes_time\",\n        .get = (getter) TreeSequence_get_nodes_time,\n        .doc = \"The node time array\" },\n    { .name = \"nodes_flags\",\n        .get = (getter) TreeSequence_get_nodes_flags,\n        .doc = \"The node flags array\" },\n    { .name = \"nodes_population\",\n        .get = (getter) TreeSequence_get_nodes_population,\n        .doc = \"The node population array\" },\n    { .name = \"nodes_individual\",\n        .get = (getter) TreeSequence_get_nodes_individual,\n        .doc = \"The node individual array\" },\n    { .name = \"nodes_metadata\",\n        .get = (getter) TreeSequence_get_nodes_metadata,\n        .doc = \"The node metadata array\" },\n    { .name = \"nodes_metadata_offset\",\n        .get = (getter) TreeSequence_get_nodes_metadata_offset,\n        .doc = \"The node metadata offset array\" },\n    { .name = \"edges_left\",\n        .get = (getter) TreeSequence_get_edges_left,\n        .doc = \"The edge left array\" },\n    { .name = \"edges_right\",\n        .get = (getter) TreeSequence_get_edges_right,\n        .doc = \"The edge right array\" },\n    { .name = \"edges_parent\",\n        .get = (getter) TreeSequence_get_edges_parent,\n        .doc = \"The edge parent array\" },\n    { .name = \"edges_child\",\n        .get = (getter) TreeSequence_get_edges_child,\n        .doc = \"The edge child array\" },\n    { .name = \"edges_metadata\",\n        .get = (getter) TreeSequence_get_edges_metadata,\n        .doc = \"The edge metadata array\" },\n    { .name = \"edges_metadata_offset\",\n        .get = (getter) TreeSequence_get_edges_metadata_offset,\n        .doc = \"The edge metadata offset array\" },\n    { .name = \"sites_position\",\n        .get = (getter) TreeSequence_get_sites_position,\n        .doc = \"The site position array\" },\n#if HAVE_NUMPY_2\n    { .name = \"sites_ancestral_state_string\",\n        .get = (getter) TreeSequence_get_sites_ancestral_state_string,\n        .doc = \"The site ancestral state array - StringDType\" },\n#endif\n    { .name = \"sites_ancestral_state\",\n        .get = (getter) TreeSequence_get_sites_ancestral_state,\n        .doc = \"The site ancestral state data array\" },\n    { .name = \"sites_ancestral_state_offset\",\n        .get = (getter) TreeSequence_get_sites_ancestral_state_offset,\n        .doc = \"The site ancestral state offset array\" },\n    { .name = \"sites_metadata\",\n        .get = (getter) TreeSequence_get_sites_metadata,\n        .doc = \"The site metadata array\" },\n    { .name = \"sites_metadata_offset\",\n        .get = (getter) TreeSequence_get_sites_metadata_offset,\n        .doc = \"The site metadata offset array\" },\n    { .name = \"mutations_site\",\n        .get = (getter) TreeSequence_get_mutations_site,\n        .doc = \"The mutation site array\" },\n    { .name = \"mutations_node\",\n        .get = (getter) TreeSequence_get_mutations_node,\n        .doc = \"The mutation node array\" },\n    { .name = \"mutations_parent\",\n        .get = (getter) TreeSequence_get_mutations_parent,\n        .doc = \"The mutation parent array\" },\n    { .name = \"mutations_time\",\n        .get = (getter) TreeSequence_get_mutations_time,\n        .doc = \"The mutation time array\" },\n#if HAVE_NUMPY_2\n    { .name = \"mutations_derived_state_string\",\n        .get = (getter) TreeSequence_get_mutations_derived_state_string,\n        .doc = \"The mutation derived state array - StringDType\" },\n    { .name = \"mutations_inherited_state_string\",\n        .get = (getter) TreeSequence_get_mutations_inherited_state_string,\n        .doc = \"The mutation inherited state array - StringDType\" },\n#endif\n    { .name = \"mutations_derived_state\",\n        .get = (getter) TreeSequence_get_mutations_derived_state,\n        .doc = \"The mutation derived state data array\" },\n    { .name = \"mutations_derived_state_offset\",\n        .get = (getter) TreeSequence_get_mutations_derived_state_offset,\n        .doc = \"The mutation derived state offset array\" },\n    { .name = \"mutations_metadata\",\n        .get = (getter) TreeSequence_get_mutations_metadata,\n        .doc = \"The mutation metadata array\" },\n    { .name = \"mutations_metadata_offset\",\n        .get = (getter) TreeSequence_get_mutations_metadata_offset,\n        .doc = \"The mutation metadata offset array\" },\n    { .name = \"migrations_left\",\n        .get = (getter) TreeSequence_get_migrations_left,\n        .doc = \"The migration left array\" },\n    { .name = \"migrations_right\",\n        .get = (getter) TreeSequence_get_migrations_right,\n        .doc = \"The migration right array\" },\n    { .name = \"migrations_node\",\n        .get = (getter) TreeSequence_get_migrations_node,\n        .doc = \"The migration node array\" },\n    { .name = \"migrations_source\",\n        .get = (getter) TreeSequence_get_migrations_source,\n        .doc = \"The migration source array\" },\n    { .name = \"migrations_dest\",\n        .get = (getter) TreeSequence_get_migrations_dest,\n        .doc = \"The migration dest array\" },\n    { .name = \"migrations_time\",\n        .get = (getter) TreeSequence_get_migrations_time,\n        .doc = \"The migration time array\" },\n    { .name = \"migrations_metadata\",\n        .get = (getter) TreeSequence_get_migrations_metadata,\n        .doc = \"The migration metadata array\" },\n    { .name = \"migrations_metadata_offset\",\n        .get = (getter) TreeSequence_get_migrations_metadata_offset,\n        .doc = \"The migration metadata offset array\" },\n    { .name = \"populations_metadata\",\n        .get = (getter) TreeSequence_get_populations_metadata,\n        .doc = \"The populations metadata array\" },\n    { .name = \"populations_metadata_offset\",\n        .get = (getter) TreeSequence_get_populations_metadata_offset,\n        .doc = \"The populations metadata offset array\" },\n    { .name = \"indexes_edge_insertion_order\",\n        .get = (getter) TreeSequence_get_indexes_edge_insertion_order,\n        .doc = \"The edge insertion order array\" },\n    { .name = \"indexes_edge_removal_order\",\n        .get = (getter) TreeSequence_get_indexes_edge_removal_order,\n        .doc = \"The edge removal order array\" },\n#if HAVE_NUMPY_2\n    { .name = \"provenances_timestamp_string\",\n        .get = (getter) TreeSequence_get_provenances_timestamp_string,\n        .doc = \"The provenance timestamp array - StringDType\" },\n    { .name = \"provenances_record_string\",\n        .get = (getter) TreeSequence_get_provenances_record_string,\n        .doc = \"The provenance record array - StringDType\" },\n#endif\n    { .name = \"provenances_record\",\n        .get = (getter) TreeSequence_get_provenances_record,\n        .doc = \"The provenance record data array\" },\n    { .name = \"provenances_record_offset\",\n        .get = (getter) TreeSequence_get_provenances_record_offset,\n        .doc = \"The provenance record offset array\" },\n    { .name = \"provenances_timestamp\",\n        .get = (getter) TreeSequence_get_provenances_timestamp,\n        .doc = \"The provenance timestamp data array)\" },\n    { .name = \"provenances_timestamp_offset\",\n        .get = (getter) TreeSequence_get_provenances_timestamp_offset,\n        .doc = \"The provenance timestamp offset array\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject TreeSequenceType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.TreeSequence\",\n    .tp_basicsize = sizeof(TreeSequence),\n    .tp_dealloc = (destructor) TreeSequence_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"TreeSequence objects\",\n    .tp_methods = TreeSequence_methods,\n    .tp_getset = TreeSequence_getsetters,\n    .tp_init = (initproc) TreeSequence_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * Tree\n *===================================================================\n */\n\nstatic int\nTree_check_state(Tree *self)\n{\n    int ret = 0;\n    if (self->tree == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"tree not initialised\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic int\nTree_check_bounds(Tree *self, int node)\n{\n    int ret = 0;\n    if (node < 0 || node > (int) self->tree->num_nodes) {\n        PyErr_SetString(PyExc_ValueError, \"Node index out of bounds\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic void\nTree_dealloc(Tree *self)\n{\n    if (self->tree != NULL) {\n        tsk_tree_free(self->tree);\n        PyMem_Free(self->tree);\n        self->tree = NULL;\n    }\n    Py_XDECREF(self->tree_sequence);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nTree_init(Tree *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"tree_sequence\", \"options\", \"tracked_samples\", NULL };\n    PyObject *py_tracked_samples = NULL;\n    TreeSequence *tree_sequence = NULL;\n    tsk_id_t *tracked_samples = NULL;\n    unsigned int options = 0;\n    tsk_size_t j, num_tracked_samples, num_nodes;\n    PyObject *item;\n\n    self->tree = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|IO!\", kwlist, &TreeSequenceType,\n            &tree_sequence, &options, &PyList_Type, &py_tracked_samples)) {\n        goto out;\n    }\n    self->tree_sequence = tree_sequence;\n    Py_INCREF(self->tree_sequence);\n    if (TreeSequence_check_state(tree_sequence) != 0) {\n        goto out;\n    }\n    num_nodes = tsk_treeseq_get_num_nodes(tree_sequence->tree_sequence);\n    num_tracked_samples = 0;\n    if (py_tracked_samples != NULL) {\n        if ((options & TSK_NO_SAMPLE_COUNTS)) {\n            PyErr_SetString(PyExc_ValueError,\n                \"Cannot specified tracked_samples without count_samples flag\");\n            goto out;\n        }\n        num_tracked_samples = PyList_Size(py_tracked_samples);\n    }\n    tracked_samples = PyMem_Malloc(num_tracked_samples * sizeof(tsk_id_t));\n    if (tracked_samples == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    for (j = 0; j < num_tracked_samples; j++) {\n        item = PyList_GetItem(py_tracked_samples, j);\n        if (!PyNumber_Check(item)) {\n            PyErr_SetString(PyExc_TypeError, \"sample must be a number\");\n            goto out;\n        }\n        tracked_samples[j] = (tsk_id_t) PyLong_AsLong(item);\n        if (tracked_samples[j] < 0 || tracked_samples[j] >= (tsk_id_t) num_nodes) {\n            PyErr_SetString(PyExc_ValueError, \"samples must be valid nodes\");\n            goto out;\n        }\n    }\n    self->tree = PyMem_Malloc(sizeof(tsk_tree_t));\n    if (self->tree == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    err = tsk_tree_init(self->tree, tree_sequence->tree_sequence, (tsk_flags_t) options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    if (!(options & TSK_NO_SAMPLE_COUNTS)) {\n        err = tsk_tree_set_tracked_samples(\n            self->tree, num_tracked_samples, tracked_samples);\n        if (err != 0) {\n            handle_library_error(err);\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    if (tracked_samples != NULL) {\n        PyMem_Free(tracked_samples);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTree_first(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_tree_first(self->tree);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_last(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_tree_last(self->tree);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_next(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_tree_next(self->tree);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err == 1);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_prev(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_tree_prev(self->tree);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", err == 1);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_seek(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_flags_t options = 0;\n    int skip = false;\n    double position;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"d|i\", &position, &skip)) {\n        goto out;\n    }\n    if (skip) {\n        options |= TSK_SEEK_SKIP;\n    }\n    err = tsk_tree_seek(self->tree, position, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_seek_index(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t index = 0;\n    tsk_flags_t options = 0;\n    int skip = false;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"O&|i\", tsk_id_converter, &index, &skip)) {\n        goto out;\n    }\n    if (skip) {\n        options |= TSK_SEEK_SKIP;\n    }\n    err = tsk_tree_seek_index(self->tree, index, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_clear(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_tree_clear(self->tree);\n    if (err < 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_sample_size(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->tree->tree_sequence->num_samples);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_roots(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) tsk_tree_get_num_roots(self->tree));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_virtual_root(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->tree->virtual_root);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_edges(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->tree->num_edges);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_total_branch_length(Tree *self)\n{\n    PyObject *ret = NULL;\n    double length;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    err = tsk_tree_get_total_branch_length(self->tree, TSK_NULL, &length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", length);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_index(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->tree->index);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_left(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", self->tree->interval.left);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_right(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", self->tree->interval.right);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_options(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", self->tree->options);\nout:\n    return ret;\n}\n\nstatic int\nTree_get_node_argument(Tree *self, PyObject *args, int *node)\n{\n    int ret = -1;\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"I\", node)) {\n        goto out;\n    }\n    if (Tree_check_bounds(self, *node)) {\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_is_sample(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", tsk_tree_is_sample(self->tree, (tsk_id_t) node));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_is_descendant(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int u, v;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"II\", &u, &v)) {\n        goto out;\n    }\n    if (Tree_check_bounds(self, (tsk_id_t) u)) {\n        goto out;\n    }\n    if (Tree_check_bounds(self, (tsk_id_t) v)) {\n        goto out;\n    }\n    ret = Py_BuildValue(\n        \"i\", tsk_tree_is_descendant(self->tree, (tsk_id_t) u, (tsk_id_t) v));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_parent(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t parent;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    parent = self->tree->parent[node];\n    ret = Py_BuildValue(\"i\", (int) parent);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_population(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_node_t node;\n    int node_id, err;\n\n    if (Tree_get_node_argument(self, args, &node_id) != 0) {\n        goto out;\n    }\n    err = tsk_treeseq_get_node(self->tree->tree_sequence, node_id, &node);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", (int) node.population);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_time(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    double time;\n    int node_id, err;\n\n    if (Tree_get_node_argument(self, args, &node_id) != 0) {\n        goto out;\n    }\n    err = tsk_tree_get_time(self->tree, node_id, &time);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", time);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_left_child(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t child;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    child = self->tree->left_child[node];\n    ret = Py_BuildValue(\"i\", (int) child);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_right_child(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t child;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    child = self->tree->right_child[node];\n    ret = Py_BuildValue(\"i\", (int) child);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_left_sib(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t sib;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    sib = self->tree->left_sib[node];\n    ret = Py_BuildValue(\"i\", (int) sib);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_right_sib(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t sib;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    sib = self->tree->right_sib[node];\n    ret = Py_BuildValue(\"i\", (int) sib);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_edge(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t edge_id;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    edge_id = self->tree->edge[node];\n    ret = Py_BuildValue(\"i\", (int) edge_id);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_children(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int node;\n    tsk_id_t u;\n    tsk_size_t j, num_children;\n    tsk_id_t *children = NULL;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    num_children = 0;\n    for (u = self->tree->left_child[node]; u != TSK_NULL; u = self->tree->right_sib[u]) {\n        num_children++;\n    }\n    children = PyMem_Malloc(num_children * sizeof(tsk_id_t));\n    if (children == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    j = 0;\n    for (u = self->tree->left_child[node]; u != TSK_NULL; u = self->tree->right_sib[u]) {\n        children[j] = u;\n        j++;\n    }\n    ret = convert_node_id_list(children, num_children);\nout:\n    if (children != NULL) {\n        PyMem_Free(children);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTree_depth(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int depth;\n    int node, err;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    err = tsk_tree_get_depth(self->tree, node, &depth);\n    if (ret != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", depth);\nout:\n    return ret;\n}\n\nstatic bool\nTree_check_sample_list(Tree *self)\n{\n    bool ret = tsk_tree_has_sample_lists(self->tree);\n    if (!ret) {\n        PyErr_SetString(PyExc_ValueError,\n            \"Sample lists not supported. Please set sample_lists=True.\");\n    }\n    return ret;\n}\n\nstatic PyObject *\nTree_get_right_sample(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t sample_index;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    if (!Tree_check_sample_list(self)) {\n        goto out;\n    }\n    sample_index = self->tree->right_sample[node];\n    ret = Py_BuildValue(\"i\", (int) sample_index);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_left_sample(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t sample_index;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    if (!Tree_check_sample_list(self)) {\n        goto out;\n    }\n    sample_index = self->tree->left_sample[node];\n    ret = Py_BuildValue(\"i\", (int) sample_index);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_next_sample(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_id_t out_index;\n    int in_index, num_samples;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"I\", &in_index)) {\n        goto out;\n    }\n    num_samples = (int) tsk_treeseq_get_num_samples(self->tree->tree_sequence);\n    if (in_index < 0 || in_index >= num_samples) {\n        PyErr_SetString(PyExc_ValueError, \"Sample index out of bounds\");\n        goto out;\n    }\n    if (!Tree_check_sample_list(self)) {\n        goto out;\n    }\n    out_index = self->tree->next_sample[in_index];\n    ret = Py_BuildValue(\"i\", (int) out_index);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_mrca(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_id_t mrca;\n    int u, v;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"ii\", &u, &v)) {\n        goto out;\n    }\n    if (Tree_check_bounds(self, u)) {\n        goto out;\n    }\n    if (Tree_check_bounds(self, v)) {\n        goto out;\n    }\n    err = tsk_tree_get_mrca(self->tree, (tsk_id_t) u, (tsk_id_t) v, &mrca);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", (int) mrca);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_children(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_children;\n    int node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    num_children = self->tree->num_children[node];\n    ret = Py_BuildValue(\"i\", (int) num_children);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_samples(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_samples;\n    int err, node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    err = tsk_tree_get_num_samples(self->tree, (tsk_id_t) node, &num_samples);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"I\", (unsigned int) num_samples);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_tracked_samples(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    tsk_size_t num_tracked_samples;\n    int err, node;\n\n    if (Tree_get_node_argument(self, args, &node) != 0) {\n        goto out;\n    }\n    err = tsk_tree_get_num_tracked_samples(\n        self->tree, (tsk_id_t) node, &num_tracked_samples);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"I\", (unsigned int) num_tracked_samples);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_sites(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = convert_sites(self->tree->sites, self->tree->sites_length);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_sites(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->tree->sites_length);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_newick(Tree *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[]\n        = { \"root\", \"precision\", \"buffer_size\", \"legacy_ms_labels\", NULL };\n    int precision = 14;\n    /* We have a default bufsize for convenience, but the high-level code\n     * should set this by computing an upper bound. */\n    Py_ssize_t buffer_size = 1024;\n    int root, err;\n    char *buffer = NULL;\n    int legacy_ms_labels = false;\n    tsk_flags_t options = 0;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"i|ini\", kwlist, &root, &precision,\n            &buffer_size, &legacy_ms_labels)) {\n        goto out;\n    }\n    if (precision < 0 || precision > 17) {\n        PyErr_SetString(\n            PyExc_ValueError, \"Precision must be between 0 and 17, inclusive\");\n        goto out;\n    }\n    if (buffer_size <= 0) {\n        PyErr_SetString(PyExc_ValueError, \"Buffer size must be > 0\");\n        goto out;\n    }\n    buffer = PyMem_Malloc(buffer_size);\n    if (buffer == NULL) {\n        PyErr_NoMemory();\n    }\n    if (legacy_ms_labels) {\n        options |= TSK_NEWICK_LEGACY_MS_LABELS;\n    }\n    err = tsk_convert_newick(\n        self->tree, (tsk_id_t) root, precision, options, (size_t) buffer_size, buffer);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = PyUnicode_FromString(buffer);\nout:\n    if (buffer != NULL) {\n        PyMem_Free(buffer);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTree_map_mutations(Tree *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    PyObject *genotypes = NULL;\n    PyObject *py_transitions = NULL;\n    PyObject *py_ancestral_state = Py_None;\n    PyArrayObject *genotypes_array = NULL;\n    static char *kwlist[] = { \"genotypes\", \"ancestral_state\", NULL };\n    int32_t ancestral_state;\n    tsk_state_transition_t *transitions = NULL;\n    tsk_size_t num_transitions;\n    npy_intp *shape;\n    tsk_flags_t options = 0;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O|O\", kwlist, &genotypes, &py_ancestral_state)) {\n        goto out;\n    }\n    genotypes_array = (PyArrayObject *) PyArray_FROMANY(\n        genotypes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (genotypes_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(genotypes_array);\n    if ((tsk_size_t) shape[0]\n        != tsk_treeseq_get_num_samples(self->tree->tree_sequence)) {\n        PyErr_SetString(\n            PyExc_ValueError, \"Genotypes array must have 1D (num_samples,) array\");\n        goto out;\n    }\n    if (py_ancestral_state != Py_None) {\n        options = TSK_MM_FIXED_ANCESTRAL_STATE;\n        if (!PyNumber_Check(py_ancestral_state)) {\n            PyErr_SetString(PyExc_TypeError, \"ancestral_state must be a number\");\n            goto out;\n        }\n        /* Note this does allow large numbers to overflow, but higher levels\n         * should be checking for these error anyway. */\n        ancestral_state = (int32_t) PyLong_AsLong(py_ancestral_state);\n    }\n\n    err = tsk_tree_map_mutations(self->tree, (int32_t *) PyArray_DATA(genotypes_array),\n        NULL, options, &ancestral_state, &num_transitions, &transitions);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    py_transitions = convert_transitions(transitions, num_transitions);\n    if (py_transitions == NULL) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"iO\", ancestral_state, py_transitions);\nout:\n    if (transitions != NULL) {\n        free(transitions);\n    }\n    Py_XDECREF(genotypes_array);\n    Py_XDECREF(py_transitions);\n    return ret;\n}\n\n/* Forward declaration */\nstatic PyTypeObject TreeType;\n\nstatic PyObject *\nTree_equals(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    Tree *other = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"O!\", &TreeType, &other)) {\n        goto out;\n    }\n    if (Tree_check_state(other) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"i\", tsk_tree_equals(self->tree, other->tree));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_copy(Tree *self)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *args = NULL;\n    Tree *copy = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    args = Py_BuildValue(\"(O,i)\", self->tree_sequence, self->tree->options);\n    if (args == NULL) {\n        goto out;\n    }\n    copy = (Tree *) PyObject_CallObject((PyObject *) &TreeType, args);\n    if (copy == NULL) {\n        goto out;\n    }\n    err = tsk_tree_copy(self->tree, copy->tree, TSK_NO_INIT);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) copy;\n    copy = NULL;\nout:\n    Py_XDECREF(args);\n    Py_XDECREF(copy);\n    return ret;\n}\n\nstatic PyObject *\nTree_get_kc_distance(Tree *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    Tree *other = NULL;\n    static char *kwlist[] = { \"other\", \"lambda_\", NULL };\n    double lambda = 0;\n    double result;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O!d\", kwlist, &TreeType, &other, &lambda)) {\n        goto out;\n    }\n    err = tsk_tree_kc_distance(self->tree, other->tree, lambda, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_sackin_index(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_size_t result;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n\n    err = tsk_tree_sackin_index(self->tree, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"K\", (unsigned long long) result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_colless_index(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n    tsk_size_t result;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n\n    err = tsk_tree_colless_index(self->tree, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"K\", (unsigned long long) result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_b1_index(Tree *self)\n{\n    PyObject *ret = NULL;\n    int err;\n    double result;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n\n    err = tsk_tree_b1_index(self->tree, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_b2_index(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int err;\n    double base;\n    double result;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"d\", &base)) {\n        goto out;\n    }\n    err = tsk_tree_b2_index(self->tree, base, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_lineages(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int err;\n    double t;\n    tsk_size_t result;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"d\", &t)) {\n        goto out;\n    }\n    err = tsk_tree_num_lineages(self->tree, t, &result);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"K\", (unsigned long long) result);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_root_threshold(Tree *self)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"I\", (unsigned int) tsk_tree_get_root_threshold(self->tree));\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_set_root_threshold(Tree *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    int err;\n    unsigned int threshold = 0;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"I\", &threshold)) {\n        goto out;\n    }\n\n    err = tsk_tree_set_root_threshold(self->tree, threshold);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\ntypedef int tsk_traversal_func(\n    const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);\n\nstatic PyObject *\nTree_get_traversal_array(Tree *self, PyObject *args, tsk_traversal_func *func)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    int32_t *data = NULL;\n    int root = TSK_NULL;\n    npy_intp dims;\n    tsk_size_t length;\n    int err;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"i\", &root)) {\n        goto out;\n    }\n    data = PyDataMem_NEW(tsk_tree_get_size_bound(self->tree) * sizeof(*data));\n    if (data == NULL) {\n        ret = PyErr_NoMemory();\n        goto out;\n    }\n    err = func(self->tree, (tsk_id_t) root, data, &length);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    dims = (npy_intp) length;\n    array = (PyArrayObject *) PyArray_SimpleNewFromData(1, &dims, NPY_INT32, data);\n    if (array == NULL) {\n        goto out;\n    }\n    /* Set the OWNDATA flag on that the data will be freed with the array */\n    PyArray_ENABLEFLAGS(array, NPY_ARRAY_OWNDATA);\n    /* Not strictly necessary since we're creating a new array, but let's\n     * keep the door open to future optimisations. */\n    PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE);\n\n    ret = (PyObject *) array;\n    data = NULL;\n    array = NULL;\nout:\n    Py_XDECREF(array);\n    if (data != NULL) {\n        PyDataMem_FREE(data);\n    }\n    return ret;\n}\n\nstatic PyObject *\nTree_get_preorder(Tree *self, PyObject *args)\n{\n    return Tree_get_traversal_array(self, args, tsk_tree_preorder_from);\n}\n\nstatic PyObject *\nTree_get_postorder(Tree *self, PyObject *args)\n{\n    return Tree_get_traversal_array(self, args, tsk_tree_postorder_from);\n}\n\n/* The x_array properties are the high-performance zero-copy interface to the\n * corresponding arrays in the tsk_tree object. We use properties and\n * return a new array each time rather than trying to create a single array\n * at Tree initialisation time to avoid a circular reference counting loop,\n * which (it seems) even cyclic garbage collection support can't resolve.\n */\nstatic PyObject *\nTree_make_array(Tree *self, int dtype, void *data)\n{\n    return make_owned_array((PyObject *) self, self->tree->num_nodes + 1, dtype, data);\n}\n\nstatic PyObject *\nTree_get_parent_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->parent);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_left_child_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->left_child);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_right_child_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->right_child);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_left_sib_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->left_sib);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_right_sib_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->right_sib);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_num_children_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->num_children);\nout:\n    return ret;\n}\n\nstatic PyObject *\nTree_get_edge_array(Tree *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Tree_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Tree_make_array(self, NPY_INT32, self->tree->edge);\nout:\n    return ret;\n}\n\nstatic PyGetSetDef Tree_getsetters[]\n    = { { .name = \"parent_array\",\n            .get = (getter) Tree_get_parent_array,\n            .doc = \"The parent array in the quintuply linked tree.\" },\n          { .name = \"left_child_array\",\n              .get = (getter) Tree_get_left_child_array,\n              .doc = \"The left_child array in the quintuply linked tree.\" },\n          { .name = \"right_child_array\",\n              .get = (getter) Tree_get_right_child_array,\n              .doc = \"The right_child array in the quintuply linked tree.\" },\n          { .name = \"left_sib_array\",\n              .get = (getter) Tree_get_left_sib_array,\n              .doc = \"The left_sib array in the quintuply linked tree.\" },\n          { .name = \"right_sib_array\",\n              .get = (getter) Tree_get_right_sib_array,\n              .doc = \"The right_sib array in the quintuply linked tree.\" },\n          { .name = \"num_children_array\",\n              .get = (getter) Tree_get_num_children_array,\n              .doc = \"The num_children array in the quintuply linked tree.\" },\n          { .name = \"edge_array\",\n              .get = (getter) Tree_get_edge_array,\n              .doc = \"The edge array in the quintuply linked tree.\" },\n          { NULL } };\n\nstatic PyMethodDef Tree_methods[] = {\n    { .ml_name = \"first\",\n        .ml_meth = (PyCFunction) Tree_first,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Sets this tree to the first in the sequence.\" },\n    { .ml_name = \"last\",\n        .ml_meth = (PyCFunction) Tree_last,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Sets this tree to the last in the sequence.\" },\n    { .ml_name = \"prev\",\n        .ml_meth = (PyCFunction) Tree_prev,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Sets this tree to the previous one in the sequence.\" },\n    { .ml_name = \"next\",\n        .ml_meth = (PyCFunction) Tree_next,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Sets this tree to the next one in the sequence.\" },\n    { .ml_name = \"seek\",\n        .ml_meth = (PyCFunction) Tree_seek,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Seeks to the tree at the specified position\" },\n    { .ml_name = \"seek_index\",\n        .ml_meth = (PyCFunction) Tree_seek_index,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Seeks to the tree at the specified index\" },\n    { .ml_name = \"clear\",\n        .ml_meth = (PyCFunction) Tree_clear,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Resets this tree back to the cleared null state.\" },\n    { .ml_name = \"get_sample_size\",\n        .ml_meth = (PyCFunction) Tree_get_sample_size,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of samples in this tree.\" },\n    { .ml_name = \"get_num_roots\",\n        .ml_meth = (PyCFunction) Tree_get_num_roots,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of roots in this tree.\" },\n    { .ml_name = \"get_index\",\n        .ml_meth = (PyCFunction) Tree_get_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the index this tree occupies within the tree sequence.\" },\n    { .ml_name = \"get_virtual_root\",\n        .ml_meth = (PyCFunction) Tree_get_virtual_root,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the virtual root of the tree.\" },\n    { .ml_name = \"get_num_edges\",\n        .ml_meth = (PyCFunction) Tree_get_num_edges,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of branches in this tree.\" },\n    { .ml_name = \"get_total_branch_length\",\n        .ml_meth = (PyCFunction) Tree_get_total_branch_length,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the sum of the branch lengths reachable from roots\" },\n    { .ml_name = \"get_left\",\n        .ml_meth = (PyCFunction) Tree_get_left,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the left-most coordinate (inclusive).\" },\n    { .ml_name = \"get_right\",\n        .ml_meth = (PyCFunction) Tree_get_right,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the right-most coordinate (exclusive).\" },\n    { .ml_name = \"get_sites\",\n        .ml_meth = (PyCFunction) Tree_get_sites,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the list of sites on this tree.\" },\n    { .ml_name = \"get_options\",\n        .ml_meth = (PyCFunction) Tree_get_options,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the value of the options variable.\" },\n    { .ml_name = \"get_num_sites\",\n        .ml_meth = (PyCFunction) Tree_get_num_sites,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the number of sites on this tree.\" },\n    { .ml_name = \"is_sample\",\n        .ml_meth = (PyCFunction) Tree_is_sample,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns True if the specified node is a sample.\" },\n    { .ml_name = \"is_descendant\",\n        .ml_meth = (PyCFunction) Tree_is_descendant,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns True if u is a descendant of v.\" },\n    { .ml_name = \"depth\",\n        .ml_meth = (PyCFunction) Tree_depth,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the depth of node u\" },\n    { .ml_name = \"get_parent\",\n        .ml_meth = (PyCFunction) Tree_get_parent,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the parent of node u\" },\n    { .ml_name = \"get_time\",\n        .ml_meth = (PyCFunction) Tree_get_time,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the time of node u\" },\n    { .ml_name = \"get_population\",\n        .ml_meth = (PyCFunction) Tree_get_population,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the population of node u\" },\n    { .ml_name = \"get_left_child\",\n        .ml_meth = (PyCFunction) Tree_get_left_child,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the left-most child of node u\" },\n    { .ml_name = \"get_right_child\",\n        .ml_meth = (PyCFunction) Tree_get_right_child,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the right-most child of node u\" },\n    { .ml_name = \"get_left_sib\",\n        .ml_meth = (PyCFunction) Tree_get_left_sib,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the left-most sib of node u\" },\n    { .ml_name = \"get_right_sib\",\n        .ml_meth = (PyCFunction) Tree_get_right_sib,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the right-most sib of node u\" },\n    { .ml_name = \"get_edge\",\n        .ml_meth = (PyCFunction) Tree_get_edge,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the edge id connecting node u to its parent\" },\n    { .ml_name = \"get_children\",\n        .ml_meth = (PyCFunction) Tree_get_children,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the children of u in left-right order.\" },\n    { .ml_name = \"get_left_sample\",\n        .ml_meth = (PyCFunction) Tree_get_left_sample,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the index of the left-most sample descending from u.\" },\n    { .ml_name = \"get_right_sample\",\n        .ml_meth = (PyCFunction) Tree_get_right_sample,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the index of the right-most sample descending from u.\" },\n    { .ml_name = \"get_next_sample\",\n        .ml_meth = (PyCFunction) Tree_get_next_sample,\n        .ml_flags = METH_VARARGS,\n        .ml_doc\n        = \"Returns the index of the next sample after the specified sample index.\" },\n    { .ml_name = \"get_mrca\",\n        .ml_meth = (PyCFunction) Tree_get_mrca,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the MRCA of nodes u and v\" },\n    { .ml_name = \"get_num_children\",\n        .ml_meth = (PyCFunction) Tree_get_num_children,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the number of children of node u.\" },\n    { .ml_name = \"get_num_samples\",\n        .ml_meth = (PyCFunction) Tree_get_num_samples,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the number of samples below node u.\" },\n    { .ml_name = \"get_num_tracked_samples\",\n        .ml_meth = (PyCFunction) Tree_get_num_tracked_samples,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the number of tracked samples below node u.\" },\n    { .ml_name = \"get_newick\",\n        .ml_meth = (PyCFunction) Tree_get_newick,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the newick representation of this tree.\" },\n    { .ml_name = \"map_mutations\",\n        .ml_meth = (PyCFunction) Tree_map_mutations,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc\n        = \"Returns a parsimonious state reconstruction for the specified genotypes.\" },\n    { .ml_name = \"equals\",\n        .ml_meth = (PyCFunction) Tree_equals,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns True if this tree is equal to the parameter tree.\" },\n    { .ml_name = \"copy\",\n        .ml_meth = (PyCFunction) Tree_copy,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns a copy of this tree.\" },\n    { .ml_name = \"get_kc_distance\",\n        .ml_meth = (PyCFunction) Tree_get_kc_distance,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the KC distance between this tree and another.\" },\n    { .ml_name = \"set_root_threshold\",\n        .ml_meth = (PyCFunction) Tree_set_root_threshold,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Sets the root threshold to the specified value.\" },\n    { .ml_name = \"get_root_threshold\",\n        .ml_meth = (PyCFunction) Tree_get_root_threshold,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the root threshold for this tree.\" },\n    { .ml_name = \"get_preorder\",\n        .ml_meth = (PyCFunction) Tree_get_preorder,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the nodes in this tree in preorder.\" },\n    { .ml_name = \"get_postorder\",\n        .ml_meth = (PyCFunction) Tree_get_postorder,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the nodes in this tree in postorder.\" },\n    { .ml_name = \"get_sackin_index\",\n        .ml_meth = (PyCFunction) Tree_get_sackin_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the Sackin index for this tree.\" },\n    { .ml_name = \"get_colless_index\",\n        .ml_meth = (PyCFunction) Tree_get_colless_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the Colless index for this tree.\" },\n    { .ml_name = \"get_b1_index\",\n        .ml_meth = (PyCFunction) Tree_get_b1_index,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the B1 index for this tree.\" },\n    { .ml_name = \"get_b2_index\",\n        .ml_meth = (PyCFunction) Tree_get_b2_index,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the B2 index for this tree.\" },\n    { .ml_name = \"get_num_lineages\",\n        .ml_meth = (PyCFunction) Tree_get_num_lineages,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns number of lineages at time t.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject TreeType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.Tree\",\n    .tp_basicsize = sizeof(Tree),\n    .tp_dealloc = (destructor) Tree_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"Tree objects\",\n    .tp_methods = Tree_methods,\n    .tp_getset = Tree_getsetters,\n    .tp_init = (initproc) Tree_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * Variant\n *===================================================================\n */\n\n/* Forward declaration */\nstatic PyTypeObject VariantType;\n\nstatic int\nVariant_check_state(Variant *self)\n{\n    int ret = 0;\n    if (self->variant == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"variant not initialised\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic void\nVariant_dealloc(Variant *self)\n{\n    if (self->variant != NULL) {\n        tsk_variant_free(self->variant);\n        PyMem_Free(self->variant);\n        self->variant = NULL;\n    }\n    Py_XDECREF(self->tree_sequence);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nVariant_init(Variant *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[]\n        = { \"tree_sequence\", \"samples\", \"isolated_as_missing\", \"alleles\", NULL };\n    TreeSequence *tree_sequence = NULL;\n    PyObject *samples_input = Py_None;\n    PyObject *py_alleles = Py_None;\n    PyArrayObject *samples_array = NULL;\n    tsk_id_t *samples = NULL;\n    tsk_size_t num_samples = 0;\n    int isolated_as_missing = 1;\n    const char **alleles = NULL;\n    npy_intp *shape;\n    tsk_flags_t options = 0;\n\n    self->variant = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|OiO\", kwlist, &TreeSequenceType,\n            &tree_sequence, &samples_input, &isolated_as_missing, &py_alleles)) {\n        goto out;\n    }\n    if (!isolated_as_missing) {\n        options |= TSK_ISOLATED_NOT_MISSING;\n    }\n    /* tsk_variant_t holds a reference to the tree sequence so we must too*/\n    self->tree_sequence = tree_sequence;\n    Py_INCREF(self->tree_sequence);\n    if (TreeSequence_check_state(self->tree_sequence) != 0) {\n        goto out;\n    }\n    if (samples_input != Py_None) {\n        samples_array = (PyArrayObject *) PyArray_FROMANY(\n            samples_input, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n        if (samples_array == NULL) {\n            goto out;\n        }\n        shape = PyArray_DIMS(samples_array);\n        num_samples = (tsk_size_t) shape[0];\n        samples = PyArray_DATA(samples_array);\n    }\n    if (py_alleles != Py_None) {\n        alleles = parse_allele_list(py_alleles);\n        if (alleles == NULL) {\n            goto out;\n        }\n    }\n    self->variant = PyMem_Malloc(sizeof(tsk_variant_t));\n    if (self->variant == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    /* Note: the variant currently takes a copy of the samples list. If we wanted\n     * to avoid this we would INCREF the samples array above and keep a reference\n     * to in the object struct */\n    err = tsk_variant_init(self->variant, self->tree_sequence->tree_sequence, samples,\n        num_samples, alleles, options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    PyMem_Free(alleles);\n    Py_XDECREF(samples_array);\n    return ret;\n}\n\nstatic PyObject *\nVariant_decode(Variant *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    tsk_id_t site_id;\n\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"O&\", &tsk_id_converter, &site_id)) {\n        goto out;\n    }\n    err = tsk_variant_decode(self->variant, site_id, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyObject *\nVariant_restricted_copy(Variant *self)\n{\n    int err;\n    PyObject *ret = NULL;\n    Variant *copy = NULL;\n\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n    copy = (Variant *) _PyObject_New((PyTypeObject *) &VariantType);\n    if (copy == NULL) {\n        goto out;\n    }\n    /* Copies have no ts as a way of indicating they shouldn't be decoded\n       This is safe as the copy has no reference to the mutation state strings */\n    copy->tree_sequence = NULL;\n    copy->variant = PyMem_Malloc(sizeof(tsk_variant_t));\n    if (copy->variant == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    err = tsk_variant_restricted_copy(self->variant, copy->variant);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) copy;\n    copy = NULL;\nout:\n    Py_XDECREF(copy);\n    return ret;\n}\n\nstatic PyObject *\nVariant_get_site_id(Variant *self, void *closure)\n{\n    PyObject *ret = NULL;\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->variant->site.id);\nout:\n    return ret;\n}\n\nstatic PyObject *\nVariant_get_alleles(Variant *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n    ret = make_alleles(self->variant);\nout:\n    return ret;\n}\n\nstatic PyObject *\nVariant_get_samples(Variant *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n    ret = make_samples(self->variant);\nout:\n    return ret;\n}\n\nstatic PyObject *\nVariant_get_isolated_as_missing(Variant *self, void *closure)\n{\n    bool isolated_as_missing;\n    PyObject *ret = NULL;\n\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n    isolated_as_missing = !(self->variant->options & TSK_ISOLATED_NOT_MISSING);\n    ret = Py_BuildValue(\"i\", (int) isolated_as_missing);\nout:\n    return ret;\n}\n\nstatic PyObject *\nVariant_get_genotypes(Variant *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    npy_intp dims;\n\n    if (Variant_check_state(self) != 0) {\n        goto out;\n    }\n\n    dims = self->variant->num_samples;\n    array = (PyArrayObject *) PyArray_SimpleNewFromData(\n        1, &dims, NPY_INT32, self->variant->genotypes);\n    if (array == NULL) {\n        goto out;\n    }\n    PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE);\n    if (PyArray_SetBaseObject(array, (PyObject *) self) != 0) {\n        goto out;\n    }\n    /* PyArray_SetBaseObject steals a reference, so we have to incref the variant\n     * object. This makes sure that the Variant instance will stay alive if there\n     * are any arrays that refer to its memory. */\n    Py_INCREF(self);\n    ret = (PyObject *) array;\n    array = NULL;\nout:\n    Py_XDECREF(array);\n    return ret;\n}\n\nstatic PyGetSetDef Variant_getsetters[]\n    = { { .name = \"site_id\",\n            .get = (getter) Variant_get_site_id,\n            .doc = \"The site id that the Variant is decoded at\" },\n          { .name = \"alleles\",\n              .get = (getter) Variant_get_alleles,\n              .doc = \"The alleles of the Variant\" },\n          { .name = \"samples\",\n              .get = (getter) Variant_get_samples,\n              .doc = \"The samples of the Variant\" },\n          { .name = \"isolated_as_missing\",\n              .get = (getter) Variant_get_isolated_as_missing,\n              .doc = \"The samples of the Variant\" },\n          { .name = \"genotypes\",\n              .get = (getter) Variant_get_genotypes,\n              .doc = \"The genotypes of the Variant\" },\n          { NULL } };\n\nstatic PyMethodDef Variant_methods[] = {\n    { .ml_name = \"decode\",\n        .ml_meth = (PyCFunction) Variant_decode,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Sets the variant's genotypes to those of a given tree and site\" },\n    { .ml_name = \"restricted_copy\",\n        .ml_meth = (PyCFunction) Variant_restricted_copy,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Copies the variant\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject VariantType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.Variant\",\n    .tp_basicsize = sizeof(Variant),\n    .tp_dealloc = (destructor) Variant_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"Variant objects\",\n    .tp_methods = Variant_methods,\n    .tp_getset = Variant_getsetters,\n    .tp_init = (initproc) Variant_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * LdCalculator\n *===================================================================\n */\n\nstatic int\nLdCalculator_check_state(LdCalculator *self)\n{\n    int ret = 0;\n    if (self->ld_calc == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"converter not initialised\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic void\nLdCalculator_dealloc(LdCalculator *self)\n{\n    if (self->ld_calc != NULL) {\n        tsk_ld_calc_free(self->ld_calc);\n        PyMem_Free(self->ld_calc);\n        self->ld_calc = NULL;\n    }\n    Py_XDECREF(self->tree_sequence);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nLdCalculator_init(LdCalculator *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"tree_sequence\", NULL };\n    TreeSequence *tree_sequence;\n\n    self->ld_calc = NULL;\n    self->tree_sequence = NULL;\n    if (!PyArg_ParseTupleAndKeywords(\n            args, kwds, \"O!\", kwlist, &TreeSequenceType, &tree_sequence)) {\n        goto out;\n    }\n    self->tree_sequence = tree_sequence;\n    Py_INCREF(self->tree_sequence);\n    if (TreeSequence_check_state(self->tree_sequence) != 0) {\n        goto out;\n    }\n    self->ld_calc = PyMem_Malloc(sizeof(tsk_ld_calc_t));\n    if (self->ld_calc == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    memset(self->ld_calc, 0, sizeof(tsk_ld_calc_t));\n    err = tsk_ld_calc_init(self->ld_calc, self->tree_sequence->tree_sequence);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nLdCalculator_get_r2(LdCalculator *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    Py_ssize_t a, b;\n    double r2;\n\n    if (LdCalculator_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"nn\", &a, &b)) {\n        goto out;\n    }\n    err = tsk_ld_calc_get_r2(self->ld_calc, (tsk_id_t) a, (tsk_id_t) b, &r2);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"d\", r2);\nout:\n    return ret;\n}\n\nstatic PyObject *\nLdCalculator_get_r2_array(LdCalculator *self, PyObject *args, PyObject *kwds)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    static char *kwlist[]\n        = { \"source_index\", \"direction\", \"max_sites\", \"max_distance\", NULL };\n    Py_ssize_t source_index;\n    Py_ssize_t max_sites = -1;\n    double max_distance = DBL_MAX;\n    int direction = TSK_DIR_FORWARD;\n    double *data = NULL;\n    tsk_size_t num_r2_values = 0;\n    npy_intp dims;\n\n    if (LdCalculator_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"n|ind\", kwlist, &source_index,\n            &direction, &max_sites, &max_distance)) {\n        goto out;\n    }\n    if (direction != TSK_DIR_FORWARD && direction != TSK_DIR_REVERSE) {\n        PyErr_SetString(PyExc_ValueError, \"direction must be FORWARD or REVERSE\");\n        goto out;\n    }\n    if (max_distance < 0) {\n        PyErr_SetString(PyExc_ValueError, \"max_distance must be >= 0\");\n        goto out;\n    }\n    if (max_sites == -1) {\n        max_sites = tsk_treeseq_get_num_sites(self->ld_calc->tree_sequence);\n    } else if (max_sites < 0) {\n        PyErr_SetString(PyExc_ValueError, \"max_sites cannot be negative\");\n        goto out;\n    }\n\n    data = PyDataMem_NEW(max_sites * sizeof(*data));\n    if (data == NULL) {\n        ret = PyErr_NoMemory();\n        goto out;\n    }\n    err = tsk_ld_calc_get_r2_array(self->ld_calc, (tsk_id_t) source_index, direction,\n        (tsk_size_t) max_sites, max_distance, data, &num_r2_values);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    dims = (npy_intp) num_r2_values;\n    array = (PyArrayObject *) PyArray_SimpleNewFromData(1, &dims, NPY_FLOAT64, data);\n    if (array == NULL) {\n        goto out;\n    }\n    /* Set the OWNDATA flag on that the data will be freed with the array */\n    PyArray_ENABLEFLAGS(array, NPY_ARRAY_OWNDATA);\n    /* Not strictly necessary since we're creating a new array, but let's\n     * keep the door open to future optimisations. */\n    PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE);\n\n    ret = (PyObject *) array;\n    data = NULL;\n    array = NULL;\nout:\n    Py_XDECREF(array);\n    if (data != NULL) {\n        PyDataMem_FREE(data);\n    }\n\n    return ret;\n}\n\nstatic PyMethodDef LdCalculator_methods[] = {\n    { .ml_name = \"get_r2\",\n        .ml_meth = (PyCFunction) LdCalculator_get_r2,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the value of the r2 statistic between the specified pair of \"\n                  \"mutation indexes\" },\n    { .ml_name = \"get_r2_array\",\n        .ml_meth = (PyCFunction) LdCalculator_get_r2_array,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns r2 statistic for a given mutation over specified range\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject LdCalculatorType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.LdCalculator\",\n    .tp_basicsize = sizeof(LdCalculator),\n    .tp_dealloc = (destructor) LdCalculator_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"LdCalculator objects\",\n    .tp_methods = LdCalculator_methods,\n    .tp_init = (initproc) LdCalculator_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * CompressedMatrix\n *===================================================================\n */\n\nstatic int\nCompressedMatrix_check_state(CompressedMatrix *self)\n{\n    int ret = -1;\n    if (self->compressed_matrix == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"CompressedMatrix not initialised\");\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nCompressedMatrix_dealloc(CompressedMatrix *self)\n{\n    if (self->compressed_matrix != NULL) {\n        tsk_compressed_matrix_free(self->compressed_matrix);\n        PyMem_Free(self->compressed_matrix);\n        self->compressed_matrix = NULL;\n    }\n    Py_XDECREF(self->tree_sequence);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nCompressedMatrix_init(CompressedMatrix *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"tree_sequence\", \"block_size\", NULL };\n    TreeSequence *tree_sequence = NULL;\n    Py_ssize_t block_size = 0;\n\n    self->compressed_matrix = NULL;\n    self->tree_sequence = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|n\", kwlist, &TreeSequenceType,\n            &tree_sequence, &block_size)) {\n        goto out;\n    }\n    self->tree_sequence = tree_sequence;\n    Py_INCREF(self->tree_sequence);\n    if (TreeSequence_check_state(self->tree_sequence) != 0) {\n        goto out;\n    }\n    self->compressed_matrix = PyMem_Malloc(sizeof(tsk_compressed_matrix_t));\n    if (self->compressed_matrix == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    memset(self->compressed_matrix, 0, sizeof(tsk_compressed_matrix_t));\n\n    err = tsk_compressed_matrix_init(\n        self->compressed_matrix, self->tree_sequence->tree_sequence, block_size, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nCompressedMatrix_get_num_sites(CompressedMatrix *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (CompressedMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->compressed_matrix->num_sites);\nout:\n    return ret;\n}\n\nstatic PyObject *\nCompressedMatrix_get_normalisation_factor(CompressedMatrix *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array;\n    tsk_size_t num_sites;\n    npy_intp dims;\n\n    if (CompressedMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    num_sites = self->compressed_matrix->num_sites;\n    dims = (npy_intp) num_sites;\n    array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_FLOAT64, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), self->compressed_matrix->normalisation_factor,\n        num_sites * sizeof(*self->compressed_matrix->normalisation_factor));\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nCompressedMatrix_get_num_transitions(CompressedMatrix *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array;\n    tsk_size_t num_sites;\n    npy_intp dims;\n\n    if (CompressedMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    num_sites = self->compressed_matrix->num_sites;\n    dims = (npy_intp) num_sites;\n    array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_UINT64, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), self->compressed_matrix->num_transitions,\n        num_sites * sizeof(*self->compressed_matrix->num_transitions));\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nCompressedMatrix_get_site(CompressedMatrix *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    unsigned int site;\n\n    if (CompressedMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"I\", &site)) {\n        goto out;\n    }\n    ret = convert_compressed_matrix_site(self->compressed_matrix, site);\nout:\n    return ret;\n}\n\nstatic PyObject *\nCompressedMatrix_decode(CompressedMatrix *self)\n{\n    PyObject *ret = NULL;\n    if (CompressedMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    ret = decode_compressed_matrix(self->compressed_matrix);\nout:\n    return ret;\n}\n\nstatic PyGetSetDef CompressedMatrix_getsetters[] = {\n    { .name = \"num_sites\",\n        .get = (getter) CompressedMatrix_get_num_sites,\n        .doc = \"The number of sites.\" },\n    { .name = \"normalisation_factor\",\n        .get = (getter) CompressedMatrix_get_normalisation_factor,\n        .doc = \"The per-site normalisation factor.\" },\n    { .name = \"num_transitions\",\n        .get = (getter) CompressedMatrix_get_num_transitions,\n        .doc = \"The per-site number of transitions in the compressed matrix.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef CompressedMatrix_methods[] = {\n    { .ml_name = \"get_site\",\n        .ml_meth = (PyCFunction) CompressedMatrix_get_site,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the list of (node, value) tuples for the specified site.\" },\n    { .ml_name = \"decode\",\n        .ml_meth = (PyCFunction) CompressedMatrix_decode,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the full decoded forward matrix.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject CompressedMatrixType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.CompressedMatrix\",\n    .tp_basicsize = sizeof(CompressedMatrix),\n    .tp_dealloc = (destructor) CompressedMatrix_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"CompressedMatrix objects\",\n    .tp_methods = CompressedMatrix_methods,\n    .tp_getset = CompressedMatrix_getsetters,\n    .tp_init = (initproc) CompressedMatrix_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * ViterbiMatrix\n *===================================================================\n */\n\nstatic int\nViterbiMatrix_check_state(ViterbiMatrix *self)\n{\n    int ret = -1;\n    if (self->viterbi_matrix == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"ViterbiMatrix not initialised\");\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nViterbiMatrix_dealloc(ViterbiMatrix *self)\n{\n    if (self->viterbi_matrix != NULL) {\n        tsk_viterbi_matrix_free(self->viterbi_matrix);\n        PyMem_Free(self->viterbi_matrix);\n        self->viterbi_matrix = NULL;\n    }\n    Py_XDECREF(self->tree_sequence);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nViterbiMatrix_init(ViterbiMatrix *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"tree_sequence\", \"num_records\", NULL };\n    TreeSequence *tree_sequence = NULL;\n    Py_ssize_t num_records = 0;\n\n    self->viterbi_matrix = NULL;\n    self->tree_sequence = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!|n\", kwlist, &TreeSequenceType,\n            &tree_sequence, &num_records)) {\n        goto out;\n    }\n    self->tree_sequence = tree_sequence;\n    Py_INCREF(self->tree_sequence);\n    if (TreeSequence_check_state(self->tree_sequence) != 0) {\n        goto out;\n    }\n    self->viterbi_matrix = PyMem_Malloc(sizeof(tsk_viterbi_matrix_t));\n    if (self->viterbi_matrix == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    memset(self->viterbi_matrix, 0, sizeof(tsk_viterbi_matrix_t));\n\n    err = tsk_viterbi_matrix_init(\n        self->viterbi_matrix, self->tree_sequence->tree_sequence, num_records, 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nViterbiMatrix_traceback(ViterbiMatrix *self)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *path = NULL;\n    npy_intp dims;\n    int err;\n\n    if (ViterbiMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    dims = self->viterbi_matrix->matrix.num_sites;\n    path = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32);\n    if (path == NULL) {\n        goto out;\n    }\n\n    err = tsk_viterbi_matrix_traceback(self->viterbi_matrix, PyArray_DATA(path), 0);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = (PyObject *) path;\n    path = NULL;\nout:\n    Py_XDECREF(path);\n    return ret;\n}\n\nstatic PyObject *\nViterbiMatrix_get_num_sites(ViterbiMatrix *self, void *closure)\n{\n    PyObject *ret = NULL;\n\n    if (ViterbiMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"n\", (Py_ssize_t) self->viterbi_matrix->matrix.num_sites);\nout:\n    return ret;\n}\n\n/* NOTE: We're doing something pretty ugly here in that we're duplicating the\n * methods from the CompressedMatrix class to provide access to the\n * viterbi_matrix struct's embedded compressed_matrix. It would be more\n * elegant if the ViterbiMatrix class had a CompressedMatrix member,\n * but the memory management is tricky, so it doesn't seem worth the\n * hassle.\n */\n\nstatic PyObject *\nViterbiMatrix_get_normalisation_factor(ViterbiMatrix *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array;\n    tsk_size_t num_sites;\n    npy_intp dims;\n\n    if (ViterbiMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    num_sites = self->viterbi_matrix->matrix.num_sites;\n    dims = (npy_intp) num_sites;\n    array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_FLOAT64, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), self->viterbi_matrix->matrix.normalisation_factor,\n        num_sites * sizeof(*self->viterbi_matrix->matrix.normalisation_factor));\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nViterbiMatrix_get_num_transitions(ViterbiMatrix *self, void *closure)\n{\n    PyObject *ret = NULL;\n    PyArrayObject *array;\n    tsk_size_t num_sites;\n    npy_intp dims;\n\n    if (ViterbiMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    num_sites = self->viterbi_matrix->matrix.num_sites;\n    dims = (npy_intp) num_sites;\n\n    array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_UINT64, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), self->viterbi_matrix->matrix.num_transitions,\n        num_sites * sizeof(*self->viterbi_matrix->matrix.num_transitions));\n    ret = (PyObject *) array;\nout:\n    return ret;\n}\n\nstatic PyObject *\nViterbiMatrix_get_site(ViterbiMatrix *self, PyObject *args)\n{\n    PyObject *ret = NULL;\n    unsigned int site;\n\n    if (ViterbiMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"I\", &site)) {\n        goto out;\n    }\n    ret = convert_compressed_matrix_site(&self->viterbi_matrix->matrix, site);\nout:\n    return ret;\n}\n\nstatic PyObject *\nViterbiMatrix_decode(ViterbiMatrix *self)\n{\n    PyObject *ret = NULL;\n    if (ViterbiMatrix_check_state(self) != 0) {\n        goto out;\n    }\n    ret = decode_compressed_matrix(&self->viterbi_matrix->matrix);\nout:\n    return ret;\n}\n\nstatic PyGetSetDef ViterbiMatrix_getsetters[] = {\n    { .name = \"num_sites\",\n        .get = (getter) ViterbiMatrix_get_num_sites,\n        .doc = \"The number of sites.\" },\n    { .name = \"normalisation_factor\",\n        .get = (getter) ViterbiMatrix_get_normalisation_factor,\n        .doc = \"The per-site normalisation factor.\" },\n    { .name = \"num_transitions\",\n        .get = (getter) ViterbiMatrix_get_num_transitions,\n        .doc = \"The per-site number of transitions in the compressed matrix.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyMethodDef ViterbiMatrix_methods[] = {\n    { .ml_name = \"traceback\",\n        .ml_meth = (PyCFunction) ViterbiMatrix_traceback,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns a path for a given haplotype.\" },\n    { .ml_name = \"get_site\",\n        .ml_meth = (PyCFunction) ViterbiMatrix_get_site,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the list of (node, value) tuples for the specified site.\" },\n    { .ml_name = \"decode\",\n        .ml_meth = (PyCFunction) ViterbiMatrix_decode,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the full decoded forward matrix.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject ViterbiMatrixType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.ViterbiMatrix\",\n    .tp_basicsize = sizeof(ViterbiMatrix),\n    .tp_dealloc = (destructor) ViterbiMatrix_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"ViterbiMatrix objects\",\n    .tp_methods = ViterbiMatrix_methods,\n    .tp_getset = ViterbiMatrix_getsetters,\n    .tp_init = (initproc) ViterbiMatrix_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * LsHmm\n *===================================================================\n */\n\nstatic int\nLsHmm_check_state(LsHmm *self)\n{\n    int ret = -1;\n    if (self->ls_hmm == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"LsHmm not initialised\");\n        goto out;\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic void\nLsHmm_dealloc(LsHmm *self)\n{\n    if (self->ls_hmm != NULL) {\n        tsk_ls_hmm_free(self->ls_hmm);\n        PyMem_Free(self->ls_hmm);\n        self->ls_hmm = NULL;\n    }\n    Py_XDECREF(self->tree_sequence);\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nLsHmm_init(LsHmm *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"tree_sequence\", \"recombination_rate\", \"mutation_rate\",\n        \"precision\", \"acgt_alleles\", NULL };\n    PyObject *recombination_rate = NULL;\n    PyArrayObject *recombination_rate_array = NULL;\n    PyObject *mutation_rate = NULL;\n    PyArrayObject *mutation_rate_array = NULL;\n    TreeSequence *tree_sequence = NULL;\n    unsigned int precision = 23;\n    int acgt_alleles = 0;\n    tsk_flags_t options = 0;\n    npy_intp *shape, num_sites;\n\n    self->ls_hmm = NULL;\n    self->tree_sequence = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O!OO|Ii\", kwlist, &TreeSequenceType,\n            &tree_sequence, &recombination_rate, &mutation_rate, &precision,\n            &acgt_alleles)) {\n        goto out;\n    }\n    self->tree_sequence = tree_sequence;\n    Py_INCREF(self->tree_sequence);\n    if (TreeSequence_check_state(self->tree_sequence) != 0) {\n        goto out;\n    }\n    self->ls_hmm = PyMem_Malloc(sizeof(tsk_ls_hmm_t));\n    if (self->ls_hmm == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    memset(self->ls_hmm, 0, sizeof(tsk_ls_hmm_t));\n\n    num_sites = (npy_intp) tsk_treeseq_get_num_sites(self->tree_sequence->tree_sequence);\n    recombination_rate_array = (PyArrayObject *) PyArray_FROMANY(\n        recombination_rate, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (recombination_rate_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(recombination_rate_array);\n    if (shape[0] != num_sites) {\n        PyErr_SetString(PyExc_ValueError,\n            \"recombination_rate array must have dimension (num_sites,)\");\n        goto out;\n    }\n    mutation_rate_array = (PyArrayObject *) PyArray_FROMANY(\n        mutation_rate, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (mutation_rate_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(mutation_rate_array);\n    if (shape[0] != num_sites) {\n        PyErr_SetString(\n            PyExc_ValueError, \"mutation_rate array must have dimension (num_sites,)\");\n        goto out;\n    }\n    if (acgt_alleles) {\n        options |= TSK_ALLELES_ACGT;\n    }\n\n    err = tsk_ls_hmm_init(self->ls_hmm, self->tree_sequence->tree_sequence,\n        PyArray_DATA(recombination_rate_array), PyArray_DATA(mutation_rate_array),\n        options);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    tsk_ls_hmm_set_precision(self->ls_hmm, precision);\n    ret = 0;\nout:\n    Py_XDECREF(recombination_rate_array);\n    Py_XDECREF(mutation_rate_array);\n    return ret;\n}\n\nstatic PyObject *\nLsHmm_forward_matrix(LsHmm *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *haplotype = NULL;\n    CompressedMatrix *compressed_matrix = NULL;\n    PyArrayObject *haplotype_array = NULL;\n    npy_intp *shape, num_sites;\n\n    if (LsHmm_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(\n            args, \"OO!\", &haplotype, &CompressedMatrixType, &compressed_matrix)) {\n        goto out;\n    }\n    num_sites = (npy_intp) tsk_treeseq_get_num_sites(self->tree_sequence->tree_sequence);\n    haplotype_array = (PyArrayObject *) PyArray_FROMANY(\n        haplotype, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (haplotype_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(haplotype_array);\n    if (shape[0] != num_sites) {\n        PyErr_SetString(\n            PyExc_ValueError, \"haplotype array must have dimension (num_sites,)\");\n        goto out;\n    }\n    err = tsk_ls_hmm_forward(self->ls_hmm, PyArray_DATA(haplotype_array),\n        compressed_matrix->compressed_matrix, TSK_NO_INIT);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    Py_XDECREF(haplotype_array);\n    return ret;\n}\n\nstatic PyObject *\nLsHmm_backward_matrix(LsHmm *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *haplotype = NULL;\n    PyObject *forward_norm = NULL;\n    CompressedMatrix *compressed_matrix = NULL;\n    PyArrayObject *haplotype_array = NULL;\n    PyArrayObject *forward_norm_array = NULL;\n    npy_intp *shape, num_sites;\n\n    if (LsHmm_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"OOO!\", &haplotype, &forward_norm, &CompressedMatrixType,\n            &compressed_matrix)) {\n        goto out;\n    }\n    num_sites = (npy_intp) tsk_treeseq_get_num_sites(self->tree_sequence->tree_sequence);\n\n    haplotype_array = (PyArrayObject *) PyArray_FROMANY(\n        haplotype, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (haplotype_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(haplotype_array);\n    if (shape[0] != num_sites) {\n        PyErr_SetString(\n            PyExc_ValueError, \"haplotype array must have dimension (num_sites,)\");\n        goto out;\n    }\n\n    forward_norm_array = (PyArrayObject *) PyArray_FROMANY(\n        forward_norm, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (forward_norm_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(forward_norm_array);\n    if (shape[0] != num_sites) {\n        PyErr_SetString(\n            PyExc_ValueError, \"forward_norm array must have dimension (num_sites,)\");\n        goto out;\n    }\n    err = tsk_ls_hmm_backward(self->ls_hmm, PyArray_DATA(haplotype_array),\n        PyArray_DATA(forward_norm_array), compressed_matrix->compressed_matrix,\n        TSK_NO_INIT);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    Py_XDECREF(haplotype_array);\n    Py_XDECREF(forward_norm_array);\n    return ret;\n}\n\nstatic PyObject *\nLsHmm_viterbi_matrix(LsHmm *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *haplotype = NULL;\n    ViterbiMatrix *viterbi_matrix = NULL;\n    PyArrayObject *haplotype_array = NULL;\n    npy_intp *shape, num_sites;\n\n    if (LsHmm_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(\n            args, \"OO!\", &haplotype, &ViterbiMatrixType, &viterbi_matrix)) {\n        goto out;\n    }\n    num_sites = (npy_intp) tsk_treeseq_get_num_sites(self->tree_sequence->tree_sequence);\n    haplotype_array = (PyArrayObject *) PyArray_FROMANY(\n        haplotype, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (haplotype_array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(haplotype_array);\n    if (shape[0] != num_sites) {\n        PyErr_SetString(\n            PyExc_ValueError, \"haplotype array must have dimension (num_sites,)\");\n        goto out;\n    }\n    err = tsk_ls_hmm_viterbi(self->ls_hmm, PyArray_DATA(haplotype_array),\n        viterbi_matrix->viterbi_matrix, TSK_NO_INIT);\n    if (err != 0) {\n        handle_library_error(err);\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    Py_XDECREF(haplotype_array);\n    return ret;\n}\n\nstatic PyMethodDef LsHmm_methods[] = {\n    { .ml_name = \"forward_matrix\",\n        .ml_meth = (PyCFunction) LsHmm_forward_matrix,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the tree encoded forward matrix for a given haplotype\" },\n    { .ml_name = \"backward_matrix\",\n        .ml_meth = (PyCFunction) LsHmm_backward_matrix,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the tree encoded backward matrix for a given haplotype\" },\n    { .ml_name = \"viterbi_matrix\",\n        .ml_meth = (PyCFunction) LsHmm_viterbi_matrix,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Returns the tree encoded Viterbi matrix for a given haplotype\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject LsHmmType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"_tskit.LsHmm\",\n    .tp_basicsize = sizeof(LsHmm),\n    .tp_dealloc = (destructor) LsHmm_dealloc,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"LsHmm objects\",\n    .tp_methods = LsHmm_methods,\n    .tp_init = (initproc) LsHmm_init,\n    .tp_new = PyType_GenericNew,\n    // clang-format on\n};\n\n/*===================================================================\n * Module level functions\n *===================================================================\n */\n\nstatic PyObject *\ntskit_get_kastore_version(PyObject *self)\n{\n    return Py_BuildValue(\"iii\", KAS_VERSION_MAJOR, KAS_VERSION_MINOR, KAS_VERSION_PATCH);\n}\n\nstatic PyObject *\ntskit_get_tskit_version(PyObject *self)\n{\n    return Py_BuildValue(\"iii\", TSK_VERSION_MAJOR, TSK_VERSION_MINOR, TSK_VERSION_PATCH);\n}\n\nstatic PyMethodDef tskit_methods[] = {\n    { .ml_name = \"get_kastore_version\",\n        .ml_meth = (PyCFunction) tskit_get_kastore_version,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the version of kastore we have built in.\" },\n    { .ml_name = \"get_tskit_version\",\n        .ml_meth = (PyCFunction) tskit_get_tskit_version,\n        .ml_flags = METH_NOARGS,\n        .ml_doc = \"Returns the version of the tskit C API we have built in.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic struct PyModuleDef tskitmodule = {\n    .m_base = PyModuleDef_HEAD_INIT,\n    .m_name = \"_tskit\",\n    .m_doc = \"Low level interface for tskit\",\n    .m_size = -1,\n    .m_methods = tskit_methods,\n};\n\nPyObject *\nPyInit__tskit(void)\n{\n    PyObject *module;\n\n#if HAVE_NUMPY_2\n    if (PyArray_ImportNumPyAPI() < 0) {\n        return NULL;\n    }\n#else\n    import_array();\n#endif\n\n    module = PyModule_Create(&tskitmodule);\n    if (module == NULL) {\n        return NULL;\n    }\n\n    if (PyModule_AddIntConstant(module, \"HAS_NUMPY_2\", HAVE_NUMPY_2)) {\n        return NULL;\n    }\n\n    if (register_lwt_class(module) != 0) {\n        return NULL;\n    }\n\n    /* IndividualTable type */\n    if (PyType_Ready(&IndividualTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&IndividualTableType);\n    PyModule_AddObject(module, \"IndividualTable\", (PyObject *) &IndividualTableType);\n\n    /* NodeTable type */\n    if (PyType_Ready(&NodeTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&NodeTableType);\n    PyModule_AddObject(module, \"NodeTable\", (PyObject *) &NodeTableType);\n\n    /* EdgeTable type */\n    if (PyType_Ready(&EdgeTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&EdgeTableType);\n    PyModule_AddObject(module, \"EdgeTable\", (PyObject *) &EdgeTableType);\n\n    /* MigrationTable type */\n    if (PyType_Ready(&MigrationTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&MigrationTableType);\n    PyModule_AddObject(module, \"MigrationTable\", (PyObject *) &MigrationTableType);\n\n    /* SiteTable type */\n    if (PyType_Ready(&SiteTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&SiteTableType);\n    PyModule_AddObject(module, \"SiteTable\", (PyObject *) &SiteTableType);\n\n    /* MutationTable type */\n    if (PyType_Ready(&MutationTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&MutationTableType);\n    PyModule_AddObject(module, \"MutationTable\", (PyObject *) &MutationTableType);\n\n    /* PopulationTable type */\n    if (PyType_Ready(&PopulationTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&PopulationTableType);\n    PyModule_AddObject(module, \"PopulationTable\", (PyObject *) &PopulationTableType);\n\n    /* ProvenanceTable type */\n    if (PyType_Ready(&ProvenanceTableType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&ProvenanceTableType);\n    PyModule_AddObject(module, \"ProvenanceTable\", (PyObject *) &ProvenanceTableType);\n\n    /* TableCollectionTable type */\n    if (PyType_Ready(&TableCollectionType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&TableCollectionType);\n    PyModule_AddObject(module, \"TableCollection\", (PyObject *) &TableCollectionType);\n\n    /* TreeSequence type */\n    if (PyType_Ready(&TreeSequenceType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&TreeSequenceType);\n    PyModule_AddObject(module, \"TreeSequence\", (PyObject *) &TreeSequenceType);\n\n    /* Tree type */\n    if (PyType_Ready(&TreeType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&TreeType);\n    PyModule_AddObject(module, \"Tree\", (PyObject *) &TreeType);\n\n    /* Variant type */\n    if (PyType_Ready(&VariantType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&VariantType);\n    PyModule_AddObject(module, \"Variant\", (PyObject *) &VariantType);\n\n    /* LdCalculator type */\n    if (PyType_Ready(&LdCalculatorType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&LdCalculatorType);\n    PyModule_AddObject(module, \"LdCalculator\", (PyObject *) &LdCalculatorType);\n\n    /* CompressedMatrix type */\n    if (PyType_Ready(&CompressedMatrixType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&CompressedMatrixType);\n    PyModule_AddObject(module, \"CompressedMatrix\", (PyObject *) &CompressedMatrixType);\n\n    /* ViterbiMatrix type */\n    if (PyType_Ready(&ViterbiMatrixType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&ViterbiMatrixType);\n    PyModule_AddObject(module, \"ViterbiMatrix\", (PyObject *) &ViterbiMatrixType);\n\n    /* LsHmm type */\n    if (PyType_Ready(&LsHmmType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&LsHmmType);\n    PyModule_AddObject(module, \"LsHmm\", (PyObject *) &LsHmmType);\n\n    /* IdentitySegments type */\n    if (PyType_Ready(&IdentitySegmentsType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&IdentitySegmentsType);\n    PyModule_AddObject(module, \"IdentitySegments\", (PyObject *) &IdentitySegmentsType);\n\n    /* IdentitySegmentList type */\n    if (PyType_Ready(&IdentitySegmentListType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&IdentitySegmentListType);\n    PyModule_AddObject(\n        module, \"IdentitySegmentList\", (PyObject *) &IdentitySegmentListType);\n\n    /* ReferenceSequence type */\n    if (PyType_Ready(&ReferenceSequenceType) < 0) {\n        return NULL;\n    }\n    Py_INCREF(&ReferenceSequenceType);\n    PyModule_AddObject(module, \"ReferenceSequence\", (PyObject *) &ReferenceSequenceType);\n\n    /* Metadata schemas namedtuple type*/\n    if (PyStructSequence_InitType2(&MetadataSchemas, &metadata_schemas_desc) < 0) {\n        return NULL;\n    };\n    Py_INCREF(&MetadataSchemas);\n    PyModule_AddObject(module, \"MetadataSchemas\", (PyObject *) &MetadataSchemas);\n\n    /* Errors and constants */\n    TskitException = PyErr_NewException(\"_tskit.TskitException\", NULL, NULL);\n    Py_INCREF(TskitException);\n    PyModule_AddObject(module, \"TskitException\", TskitException);\n    TskitLibraryError = PyErr_NewException(\"_tskit.LibraryError\", TskitException, NULL);\n    Py_INCREF(TskitLibraryError);\n    PyModule_AddObject(module, \"LibraryError\", TskitLibraryError);\n    TskitFileFormatError = PyErr_NewException(\"_tskit.FileFormatError\", NULL, NULL);\n    Py_INCREF(TskitFileFormatError);\n    PyModule_AddObject(module, \"FileFormatError\", TskitFileFormatError);\n    TskitVersionTooNewError\n        = PyErr_NewException(\"_tskit.VersionTooNewError\", TskitException, NULL);\n    Py_INCREF(TskitVersionTooNewError);\n    PyModule_AddObject(module, \"VersionTooNewError\", TskitVersionTooNewError);\n    TskitVersionTooOldError\n        = PyErr_NewException(\"_tskit.VersionTooOldError\", TskitException, NULL);\n    Py_INCREF(TskitVersionTooOldError);\n    PyModule_AddObject(module, \"VersionTooOldError\", TskitVersionTooOldError);\n    TskitIdentityPairsNotStoredError\n        = PyErr_NewException(\"_tskit.IdentityPairsNotStoredError\", TskitException, NULL);\n    Py_INCREF(TskitIdentityPairsNotStoredError);\n    PyModule_AddObject(\n        module, \"IdentityPairsNotStoredError\", TskitIdentityPairsNotStoredError);\n    TskitIdentitySegmentsNotStoredError = PyErr_NewException(\n        \"_tskit.IdentitySegmentsNotStoredError\", TskitException, NULL);\n    Py_INCREF(TskitIdentitySegmentsNotStoredError);\n    PyModule_AddObject(\n        module, \"IdentitySegmentsNotStoredError\", TskitIdentitySegmentsNotStoredError);\n    TskitNoSampleListsError\n        = PyErr_NewException(\"_tskit.NoSampleListsError\", TskitException, NULL);\n    Py_INCREF(TskitNoSampleListsError);\n    PyModule_AddObject(module, \"NoSampleListsError\", TskitNoSampleListsError);\n\n    PyModule_AddIntConstant(module, \"NULL\", TSK_NULL);\n    PyModule_AddIntConstant(module, \"MISSING_DATA\", TSK_MISSING_DATA);\n\n    PyObject *unknown_time = PyFloat_FromDouble(TSK_UNKNOWN_TIME);\n    PyModule_AddObject(module, \"UNKNOWN_TIME\", unknown_time);\n\n    /* Node flags */\n    PyModule_AddIntConstant(module, \"NODE_IS_SAMPLE\", TSK_NODE_IS_SAMPLE);\n    /* Tree flags */\n    PyModule_AddIntConstant(module, \"NO_SAMPLE_COUNTS\", TSK_NO_SAMPLE_COUNTS);\n    PyModule_AddIntConstant(module, \"SAMPLE_LISTS\", TSK_SAMPLE_LISTS);\n    /* Directions */\n    PyModule_AddIntConstant(module, \"FORWARD\", TSK_DIR_FORWARD);\n    PyModule_AddIntConstant(module, \"REVERSE\", TSK_DIR_REVERSE);\n\n    PyModule_AddStringConstant(module, \"TIME_UNITS_UNKNOWN\", TSK_TIME_UNITS_UNKNOWN);\n    PyModule_AddStringConstant(\n        module, \"TIME_UNITS_UNCALIBRATED\", TSK_TIME_UNITS_UNCALIBRATED);\n\n    return module;\n}\n"
  },
  {
    "path": "python/benchmark/config.yaml",
    "content": "setup: |\n  import tskit\n\nbenchmarks:\n  - code: ts = tskit.load(\"{filename}\")\n    parameters:\n      filename: &files\n        - \"tiny.trees\"\n        - \"bench.trees\"\n\n  - code: ts.dump(\"/dev/null\");\"{filename}\"\n    setup: |\n      ts = tskit.load(\"{filename}\")\n    parameters:\n      filename: *files\n\n  - code: ts.write_vcf(null)\n    #, site_mask=site_mask, sample_mask=sample_mask)\n    setup: |\n      import numpy\n      ts = tskit.load(\"bench.trees\")\n      tables = ts.tables\n      tables.migrations.clear()\n      ts = tables.tree_sequence()\n      ts = ts.simplify(samples=list(range(1000)))\n      null = open(\"/dev/null\", \"w\")\n\n  - code: tree = ts.first();\"{filename}\"\n    setup: ts = tskit.load(\"{filename}\")\n    parameters:\n      filename: *files\n\n  - name: tree.seek()\n# We can't just repeatedly seek to the same position as this will be a noop,\n# so we go back and forth.\n    code: |\n      tree.seek(pos)\n      pos = 0 if pos == 500_000 else 500_000\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n      tree = ts.first()\n      pos = 500_000\n\n  - code: \"for _ in ts.trees(): pass;'{filename}'\"\n    setup: ts = tskit.load(\"{filename}\")\n    parameters:\n      filename: *files\n\n  - code: tree.{array}\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n      tree = ts.first()\n    parameters:\n      array: &tree_arrays\n        - parent_array\n        - left_child_array\n        - right_child_array\n        - left_sib_array\n        - right_sib_array\n        - num_children_array\n        - edge_array\n\n  - code: tree.{array}(42);\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n      tree = ts.first()\n    parameters:\n      array:\n        - parent\n        - left_child\n        - right_child\n        - left_sib\n        - right_sib\n        - num_children\n        - edge\n\n  - code: tree.{traversal_order}()\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n      tree = ts.first()\n    parameters:\n      traversal_order: &traversal_orders\n        - postorder\n        - preorder\n        - timeasc\n        - timedesc\n\n  - code: \"for v in ts.variants(): pass;'{filename}'\"\n    setup: ts = tskit.load(\"{filename}\")\n    parameters:\n      filename: *files\n\n  - code: \"ts.genotype_matrix();'{filename}'\"\n    setup: |\n      ts = tskit.load(\"{filename}\")\n      if ts.num_samples > 10_000:\n          tables = ts.tables\n          tables.migrations.clear()\n          ts = tables.tree_sequence()\n          ts = ts.simplify(samples=list(range(1000)))\n    parameters:\n      filename: *files\n\n  - code: \"for row in ts.{table}(): pass\"\n    setup: ts = tskit.load(\"bench.trees\")\n    parameters:\n      table: &tables\n        - nodes\n        - edges\n        - sites\n        - mutations\n        - populations\n        - individuals\n        - migrations\n        - provenances\n\n  - code: \"for row in ts.populations(): {decode_metadata}\"\n    setup : |\n      tc = tskit.TableCollection(1)\n      tc.populations.metadata_schema = tskit.MetadataSchema({{'codec':'json'}})\n      for i in range(1000):\n        tc.populations.add_row(metadata={{'a': i}})\n      ts = tc.tree_sequence()\n    parameters:\n      decode_metadata:\n        - \"pass\"\n        - \"row.metadata\"\n\n  - code: ts.{table}(1)\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n    parameters:\n      table:\n        - node\n        - edge\n        - site\n        - mutation\n        - population\n        - individual\n        - migration\n        - provenance\n\n  - code: ts.tables\n    setup: ts = tskit.load(\"bench.trees\")\n\n  - code: tables.{table}\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n      tables = ts.tables\n    parameters:\n      table: *tables\n\n  - code: x = {table}.{column}\n    setup: |\n      ts = tskit.load(\"bench.trees\")\n      tables = ts.tables\n      {table} = tables.{table}\n    parameters: &table_columns\n      table:\n        nodes:\n          column:\n            - flags\n            - time\n            - population\n            - individual\n            - metadata\n            - metadata_offset\n        individuals:\n          column:\n            - flags\n            - location\n            - location_offset\n            - parents\n            - metadata\n        edges:\n          column:\n            - left\n            - right\n            - parent\n            - child\n            - metadata\n            - metadata_offset\n        sites:\n          column:\n            - position\n            - ancestral_state\n            - ancestral_state_offset\n            - metadata\n            - metadata_offset\n        mutations:\n          column:\n            - site\n            - node\n            - parent\n            - time\n            - derived_state\n            - derived_state_offset\n            - metadata\n            - metadata_offset\n        migrations:\n          column:\n            - left\n            - right\n            - node\n            - source\n            - dest\n            - time\n            - metadata\n            - metadata_offset\n        populations:\n          column:\n            - metadata\n            - metadata_offset\n        provenances:\n          column:\n            - timestamp\n            - timestamp_offset\n            - record\n            - record_offset\n"
  },
  {
    "path": "python/benchmark/run-for-all-releases.py",
    "content": "import json\nimport subprocess\nfrom distutils.version import StrictVersion\nfrom urllib.request import urlopen\n\nimport tqdm\n\n\ndef versions(package_name):\n    url = f\"https://pypi.org/pypi/{package_name}/json\"\n    data = json.load(urlopen(url))\n    return sorted(data[\"releases\"].keys(), key=StrictVersion)\n\n\ndef sh(command):\n    subprocess.run(command, check=True, shell=True)\n\n\nif __name__ == \"__main__\":\n    try:\n        sh(\"python -m venv _bench-temp-venv\")\n        sh(\"_bench-temp-venv/bin/pip install -r ../requirements/development.txt\")\n        versions = [\n            v\n            for v in versions(\"tskit\")\n            # We don't want alphas, betas or two broken versions:\n            if \"a\" not in v and \"b\" not in v and v not in (\"0.0.0\", \"0.1.0\")\n        ]\n        for v in tqdm.tqdm(versions):\n            sh(f\"_bench-temp-venv/bin/pip install tskit=={v}\")\n            sh(\"_bench-temp-venv/bin/python run.py\")\n    finally:\n        sh(\"rm -rf _bench-temp-venv\")\n"
  },
  {
    "path": "python/benchmark/run.py",
    "content": "import json\nimport os.path\nimport platform\nimport sys\nimport timeit\nfrom pathlib import Path\n\nimport click\nimport psutil\nimport tqdm\nimport yaml\nfrom matplotlib.colors import LinearSegmentedColormap\nfrom si_prefix import si_format\n\ntskit_dir = Path(__file__).parent.parent\nsys.path.append(str(tskit_dir))\nimport msprime  # noqa: E402\n\nimport tskit  # noqa: E402\n\nwith open(\"config.yaml\") as f:\n    config = yaml.load(f, Loader=yaml.FullLoader)\n\n\ndef system_info():\n    ret = {}\n    uname = platform.uname()\n    for attr in [\"system\", \"node\", \"release\", \"version\", \"machine\", \"processor\"]:\n        ret[attr] = getattr(uname, attr)\n    ret[\"python_version\"] = sys.version\n    cpufreq = psutil.cpu_freq()\n    ret[\"physical_cores\"] = psutil.cpu_count(logical=False)\n    ret[\"total_cores\"] = psutil.cpu_count(logical=True)\n    ret[\"max_frequency\"] = cpufreq.max\n    ret[\"min_frequency\"] = cpufreq.min\n    ret[\"current_frequency\"] = cpufreq.current\n    ret[\"cpu_usage_per_core\"] = [\n        percentage for percentage in psutil.cpu_percent(percpu=True, interval=1)\n    ]\n    ret[\"total_cpu_usage\"] = psutil.cpu_percent()\n    return ret\n\n\ndef make_file():\n    benchmark_trees = tskit_dir / \"benchmark\" / \"bench.trees\"\n    if not os.path.exists(benchmark_trees):\n        print(\"Generating benchmark trees...\")\n        demography = msprime.Demography()\n        demography.add_population(name=\"A\", initial_size=10_000)\n        demography.add_population(name=\"B\", initial_size=5_000)\n        demography.add_population(name=\"C\", initial_size=1_000)\n        demography.add_population_split(time=1000, derived=[\"A\", \"B\"], ancestral=\"C\")\n        ts = msprime.sim_ancestry(\n            samples={\"A\": 25000, \"B\": 25000},\n            demography=demography,\n            sequence_length=1_000_000,\n            random_seed=42,\n            recombination_rate=0.0000001,\n            record_migrations=True,\n            record_provenance=True,\n        )\n        ts = msprime.sim_mutations(ts, rate=0.000001, random_seed=42)\n        ts.dump(benchmark_trees)\n        ts = msprime.sim_ancestry(\n            samples={\"A\": 1, \"B\": 1},\n            demography=demography,\n            sequence_length=1,\n            random_seed=42,\n            recombination_rate=0,\n            record_migrations=True,\n            record_provenance=True,\n        )\n        ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42)\n        ts.dump(tskit_dir / \"benchmark\" / \"tiny.trees\")\n\n\ndef autotime(setup, code):\n    t = timeit.Timer(setup=setup, stmt=code)\n    try:\n        one_run = t.timeit(number=1)\n    except Exception as e:\n        print(f\"{code}: Error running benchmark: {e}\")\n        return None\n    num_trials = int(max(1, 2 / one_run))\n    return one_run, num_trials, t.timeit(number=num_trials) / num_trials\n\n\ndef run_benchmarks(keyword_filter):\n    results = {}\n    for benchmark in tqdm.tqdm(config[\"benchmarks\"]):\n        bench_name = benchmark.get(\"name\", benchmark[\"code\"])\n        if keyword_filter not in bench_name:\n            continue\n        params = benchmark.get(\"parameters\", {\"noop\": [None]})\n\n        # Expand the parameters\n        def sub_expand(context, name, d):\n            if isinstance(d, dict):\n                ret = []\n                for k, v in d.items():\n                    new_context = {**{k: v for k, v in context.items()}, name: k}\n                    for k2, v2 in v.items():\n                        ret += sub_expand(new_context, k2, v2)\n                return ret\n            elif isinstance(d, list):\n                return [\n                    {**{k: v for k, v in context.items()}, name: value} for value in d\n                ]\n            else:\n                raise ValueError(f\"Invalid parameter type: {type(d)}-{d}\")\n\n        expanded_params = []\n        for k, v in params.items():\n            expanded_params += sub_expand({}, k, v)\n\n        for values in expanded_params:\n            setup = (\n                f\"import sys;sys.path.append('{tskit_dir}');\"\n                + config[\"setup\"].replace(\"\\n\", \"\\n\")\n                + benchmark.get(\"setup\", \"\").replace(\"\\n\", \"\\n\").format(**values)\n            )\n            code = benchmark[\"code\"].replace(\"\\n\", \"\\n\").format(**values)\n            result = autotime(setup, code)\n            if result is not None:\n                one_run, num_trials, avg = result\n                results.setdefault(bench_name, {})[code] = {\n                    \"one_run\": one_run,\n                    \"num_trials\": num_trials,\n                    \"avg\": avg,\n                }\n\n    return results\n\n\ndef generate_report(all_versions_results):\n    all_benchmarks = {}\n    for _version, results in all_versions_results.items():\n        for benchmark, values in results[\"tskit_benchmarks\"].items():\n            for code in values.keys():\n                all_benchmarks.setdefault(benchmark, set()).add(code)\n\n    all_versions = sorted(all_versions_results.keys())\n\n    cmap = LinearSegmentedColormap.from_list(\"rg\", [\"g\", \"w\", \"r\"], N=256)\n\n    with open(tskit_dir / \"benchmark\" / \"bench-results.html\", \"w\") as f:\n        f.write(\"<html><body>\\n\")\n        f.write(\"<h1>tskit benchmark results</h1>\\n\")\n        f.write(\"<table>\\n\")\n        f.write(\"<tr><th></th>\")\n        for version in all_versions:\n            f.write(f\"<th>{version}</th>\")\n        f.write(\"</tr>\\n\")\n        for benchmark in sorted(all_benchmarks.keys()):\n            values = all_benchmarks[benchmark]\n            indent = False\n            if len(values) > 1:\n                indent = True\n                f.write(\n                    f\"<tr>\"\n                    f\"  <td style='font-family: monospace'>\"\n                    f\"    {benchmark}\"\n                    f\"  </td>\"\n                    f\"</tr>\\n\"\n                )\n            for code in sorted(values):\n                f.write(\n                    f\"<tr><td style='font-family: monospace;\"\n                    f\"padding-left: {'10px' if indent else 'inherit'}'>{code}</td>\"\n                )\n                last_avg = None\n                for version in all_versions:\n                    try:\n                        avg = all_versions_results[version][\"tskit_benchmarks\"][\n                            benchmark\n                        ][code][\"avg\"]\n                        if last_avg is not None:\n                            percent_change = 100 * ((avg - last_avg) / last_avg)\n                            col = cmap(int(((percent_change / 100) * 128) + 128))\n                            f.write(\n                                f\"<td style='background-color: rgba({col[0] * 255},\"\n                                f\" {col[1] * 255}, {col[2] * 255}, 1)'>\"\n                            )\n\n                            f.write(f\"{si_format(avg)} ({percent_change:.1f}%)\")\n                        else:\n                            f.write(f\"<td>{si_format(avg)}</td>\")\n                        last_avg = avg\n                    except KeyError:\n                        f.write(\"<td>N/A</td>\")\n\n                f.write(\"</tr>\\n\")\n        f.write(\"</table>\\n\")\n\n\ndef print_result(results):\n    max_name_length = max(len(name) for name in results.keys()) + 1\n    for _bench, param_results in results.items():\n        for name, data in param_results.items():\n            print(name.ljust(max_name_length), si_format(data[\"avg\"]))\n\n\n@click.command()\n@click.option(\n    \"--keyword_filter\",\n    \"-k\",\n    type=str,\n    default=\"\",\n    help=\"Only benchmarks with a name containing this string will be run\",\n)\n@click.option(\"--print_results\", \"-p\", is_flag=True, help=\"Print results to STDOUT\")\ndef run_benchmark_and_save(keyword_filter, print_results):\n    print(\"Benchmarking tskit version:\", tskit._version.tskit_version)\n    make_file()\n    results = {}\n    results[\"system\"] = system_info()\n    results[\"tskit_benchmarks\"] = run_benchmarks(keyword_filter)\n\n    if print_results:\n        print_result(results[\"tskit_benchmarks\"])\n\n    all_versions_results = {}\n    results_json = tskit_dir / \"benchmark\" / \"bench-results.json\"\n    if os.path.exists(results_json):\n        with open(results_json) as f:\n            all_versions_results = json.load(f)\n\n    all_versions_results[tskit._version.tskit_version] = results\n    with open(results_json, \"w\") as f:\n        json.dump(all_versions_results, f, indent=2)\n    generate_report(all_versions_results)\n\n    sys.exit(0)\n\n\nif __name__ == \"__main__\":\n    run_benchmark_and_save()\n"
  },
  {
    "path": "python/lwt_interface/CHANGELOG.rst",
    "content": "--------------------\n[0.1.4] - 2021-09-02\n--------------------\n\n- Offset columns are now 64 bit in tskit. For compatibility, offset arrays that fit into\n  32bits will be a 32bit array in the dict encoding. Large arrays that require 64 bit\n  will fail to ``fromdict`` in previous versions with the error:\n  ``TypeError: Cannot cast array data from dtype('uint64') to dtype('uint32') according\n  to the rule 'safe'``\n  A ``force_offset_64`` option on ``asdict`` allows the easy creation of 64bit arrays for\n  testing.\n\n--------------------\n[0.1.3] - 2021-02-01\n--------------------\n\n- Added optional ``parents`` to individual table.\n\n--------------------\n[0.1.2] - 2020-10-22\n--------------------\n\n - Added optional top-level key ``indexes`` which has contains ``edge_insertion_order`` and\n   ``edge_removal_order``"
  },
  {
    "path": "python/lwt_interface/Makefile",
    "content": "\nall: cmodule\n\nallchecks: example_c_module.c\n\tCFLAGS=\"-std=c99 --coverage -Wall -Wextra -Werror -Wno-unused-parameter -Wno-cast-function-type\" \\\n\tuv run --project=../ python setup.py build_ext --inplace\n\ncmodule: example_c_module.c\n\tuv run --project=../ python setup.py build_ext --inplace\n\nclean:\n\trm -f *.so *.o tags\n\trm -fR build\n"
  },
  {
    "path": "python/lwt_interface/README.md",
    "content": "# LightweightTableCollection interface\n\nThe files in this directory define the LightweightTableCollection\ninterface used to safely interchange table collection data between \ndifferent compiled instances of the tskit C library. This is a \n*very* specialised use-case, and unless you are using the tskit\nC API in your own compiled Python module (either via Cython\nor the Python C API), you almost certainly don't need to use\nthis code.\n\n## Overview\n\nTo allow a tskit table collection to be transferred from one compiled Python\nextension module to another the table collection is converted to a `dict` of\nbasic python types and numpy arrays. This is then converted back in the receiving\nmodule. `tskit_lwt_interface.h` provides a function `register_lwt_class` that \ndefines a Python class `LightweightTableCollection` that performs these conversions\nwith methods `asdict` and `fromdict`. These methods mirror the `asdict` and `fromdict`\nmethods on `tskit.TableCollection`.\n\n## Usage\nAn example C module skeleton `example_c_module.c` is provided, which shows passing tables\nto the C module. See `test_example_c_module.py` for the python example usage\nof the example module.\n\nTo add the \n`LightweightTableCollection` type to your module you include `tskit_lwt_interface.h`\nand then call `register_lwt_class` on your C Python module object. You can then convert\nto and from the lightweight table collection in Python, for example to convert a tskit\n`TableCollection` to a `LightweightTableCollection`:\n```python\ntables = tskit.TableCollection(1)\nlwt = example_c_module.LightweightTableCollection()\nlwt.fromdict(tables.asdict())\n```\nand vice-versa:\n```python\ntc = tskit.TableCollection(lwt.asdict())\n```\nIn C you can access the tables in a `LightweightTableCollection` instance that is passed \nto your function, as shown in the `example_receiving` function in `example_c_module.c`. \nNote the requirement to check for errors from tskit functions and to call\n`handle_tskit_error` to set a Python error, returning `NULL` to Python to indicate error.\n\nTables can also be modified in the extension code as in `example_modifying`. We recommend\ncreating table collections in Python then passing them to C for modification rather than\ncreating them in C and returning them. This avoids complex managing of object lifecycles\nin C code.\n\n\n\n\n    \n\n\n\n"
  },
  {
    "path": "python/lwt_interface/cython_example/Makefile",
    "content": "all: compile run\n\ncompile:\n\tuv run --project=../../ --group=test-lwt setup.py build_ext --inplace\n\nrun:\n\tuv run --project=../../ --group=test-lwt python -c \"import example; example.main()\"\n\nclean:\n\trm -rf build/\n\trm -rf tskit_cython_example.egg-info/\n\trm -f example.c\n\trm -f *.so\n"
  },
  {
    "path": "python/lwt_interface/cython_example/_lwtc.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2020 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n// Turn off clang-formatting for this file as turning off formatting\n// for specific bits will make it more confusing.\n// clang-format off\n\n#define PY_SSIZE_T_CLEAN\n#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION\n\n#include <Python.h>\n#include <structmember.h>\n#include <numpy/arrayobject.h>\n\n#include \"kastore.h\"\n#include \"tskit.h\"\n\n#include \"tskit_lwt_interface.h\"\n\nstatic PyMethodDef lwt_methods[] = {\n    { NULL, NULL, 0, NULL } /* sentinel */\n};\n\nstatic struct PyModuleDef lwt_module = {\n    .m_base = PyModuleDef_HEAD_INIT,\n    .m_name = \"_lwt\",\n    .m_doc = \"tskit LightweightTableCollection\",\n    .m_size = -1,\n    .m_methods = lwt_methods };\n\nPyMODINIT_FUNC\nPyInit__lwtc(void)\n{\n    PyObject *module = PyModule_Create(&lwt_module);\n    if (module == NULL) {\n        return NULL;\n    }\n    import_array();\n    if (register_lwt_class(module) != 0) {\n        return NULL;\n    }\n    return module;\n}\n"
  },
  {
    "path": "python/lwt_interface/cython_example/example.pyx",
    "content": "from libc.stdint cimport uint32_t\nimport _lwtc\nimport tskit\n\ncdef extern from \"tskit.h\" nogil:\n    ctypedef uint32_t tsk_flags_t\n    ctypedef struct tsk_table_collection_t:\n        pass\n    ctypedef struct tsk_treeseq_t:\n        pass\n    int tsk_treeseq_init(tsk_treeseq_t *self, const tsk_table_collection_t *tables, tsk_flags_t options)\n    int tsk_treeseq_free(tsk_treeseq_t *self)\n    int tsk_table_collection_build_index(tsk_table_collection_t *self, tsk_flags_t options)\n    ctypedef struct tsk_tree_t:\n        pass\n    int tsk_tree_init(tsk_tree_t *self, const tsk_treeseq_t *ts, tsk_flags_t options)\n    int tsk_tree_first(tsk_tree_t *self)\n    int tsk_tree_next(tsk_tree_t *self)\n    int tsk_tree_last(tsk_tree_t *self)\n    int tsk_tree_prev(tsk_tree_t *self)\n    int tsk_tree_get_num_roots(tsk_tree_t *self)\n    int tsk_tree_free(tsk_tree_t *self)\n    const char *tsk_strerror(int err)\n\ncdef extern:\n    ctypedef class _lwtc.LightweightTableCollection [object LightweightTableCollection]:\n        cdef tsk_table_collection_t *tables\n\ndef check_tsk_error(val):\n    if val < 0:\n        raise RuntimeError(tsk_strerror(val))\n\ndef iterate_trees(pyts: tskit.TreeSequence):\n    lwtc = LightweightTableCollection()\n    lwtc.fromdict(pyts.dump_tables().asdict())        \n    cdef tsk_treeseq_t ts\n    err = tsk_treeseq_init(&ts, lwtc.tables, 0)\n    check_tsk_error(err)\n    cdef tsk_tree_t tree\n    ret = tsk_tree_init(&tree, &ts, 0)\n    check_tsk_error(ret)\n\n    print(\"Iterate forwards\")\n    cdef int tree_iter = tsk_tree_first(&tree)\n    while tree_iter == 1:\n        print(\"\\ttree has %d roots\" % (tsk_tree_get_num_roots(&tree)))\n        tree_iter = tsk_tree_next(&tree)\n    check_tsk_error(tree_iter)\n\n    print(\"Iterate backwards\")\n    tree_iter = tsk_tree_last(&tree)\n    while tree_iter == 1:\n        print(\"\\ttree has %d roots\" % (tsk_tree_get_num_roots(&tree)))\n        tree_iter = tsk_tree_prev(&tree)\n    check_tsk_error(tree_iter)\n\n    tsk_tree_free(&tree)\n    tsk_treeseq_free(&ts)\n\ndef main():\n    import msprime as msp  # (msprime could be compiled against a different version of tskit)\n    ts = msp.simulate(sample_size=5, length=100, recombination_rate=.01)  \n    iterate_trees(ts)\n"
  },
  {
    "path": "python/lwt_interface/cython_example/pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools>=64\", \"wheel\", \"Cython\", \"numpy\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"tskit_cython_example\"\nversion = \"0.0.1\"\ndescription = \"Cython example for tskit\"\nauthors = [{name = \"tskit developers\"}]\ndependencies = [\"numpy\", \"Cython\"]\n\n[tool.setuptools]\npackages = []\n"
  },
  {
    "path": "python/lwt_interface/cython_example/setup.py",
    "content": "import glob\nimport os\n\nimport numpy as np\nfrom Cython.Build import cythonize\nfrom setuptools import setup\nfrom setuptools.extension import Extension\n\nTSKIT_BASE = os.path.join(os.path.dirname(__file__), \"..\", \"..\", \"..\")\nTSKIT_C_PATH = os.path.join(TSKIT_BASE, \"c\")\nTSKIT_PY_PATH = os.path.join(TSKIT_BASE, \"python/lwt_interface\")\nKASTORE_PATH = os.path.join(TSKIT_BASE, \"c\", \"subprojects\", \"kastore\")\ninclude_dirs = [TSKIT_C_PATH, TSKIT_PY_PATH, KASTORE_PATH, np.get_include()]\n\ntskit_sourcefiles = list(glob.glob(os.path.join(TSKIT_C_PATH, \"tskit\", \"*.c\"))) + [\n    os.path.join(KASTORE_PATH, \"kastore.c\")\n]\n\nextensions = [\n    Extension(\n        \"_lwtc\",\n        [\"_lwtc.c\"] + tskit_sourcefiles,\n        language=\"c\",\n        include_dirs=include_dirs,\n    ),\n    Extension(\n        \"example\",\n        [\"example.pyx\"] + tskit_sourcefiles,\n        language=\"c\",\n        include_dirs=include_dirs,\n    ),\n]\n\nextensions = cythonize(extensions, language_level=3)\n\nsetup(\n    name=\"tskit_cython_example\",\n    version=\"0.0.1\",\n    ext_modules=extensions,\n)\n"
  },
  {
    "path": "python/lwt_interface/dict_encoding_testlib.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest definitions for the low-level LightweightTableCollection class\ndefined here. These tests are not intended to be executed directly,\nbut should be imported into another test module that imports a\ncompiled module exporting the LightweightTableCollection class.\nSee the test_example_c_module file for an example.\n\"\"\"\n\nimport copy\n\nimport kastore\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\nimport tskit.util as util\n\nlwt_module = None\n\nNON_UTF8_STRING = \"\\ud861\\udd37\"\n\n\n@pytest.fixture(scope=\"session\")\ndef full_ts():\n    \"\"\"\n    A tree sequence with data in all fields - duplicated from tskit's conftest.py\n    as other test suites using this file will not have that fixture defined.\n    \"\"\"\n    demography = msprime.Demography()\n    demography.add_population(initial_size=100, name=\"A\")\n    demography.add_population(initial_size=100, name=\"B\")\n    demography.add_population(initial_size=100, name=\"C\")\n    demography.add_population_split(time=10, ancestral=\"C\", derived=[\"A\", \"B\"])\n\n    ts = msprime.sim_ancestry(\n        {\"A\": 5, \"B\": 5},\n        demography=demography,\n        random_seed=1,\n        sequence_length=10,\n        record_migrations=True,\n    )\n    assert ts.num_migrations > 0\n    assert ts.num_individuals > 0\n    ts = msprime.sim_mutations(ts, rate=0.1, random_seed=2)\n    assert ts.num_mutations > 0\n    tables = ts.dump_tables()\n    tables.individuals.clear()\n\n    for ind in ts.individuals():\n        tables.individuals.add_row(flags=0, location=[ind.id, ind.id], parents=[-1, -1])\n\n    for name, table in tables.table_name_map.items():\n        if name != \"provenances\":\n            table.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n            metadatas = [f\"n_{name}_{u}\" for u in range(len(table))]\n            metadata, metadata_offset = tskit.pack_strings(metadatas)\n            table.set_columns(\n                **{\n                    **table.asdict(),\n                    \"metadata\": metadata,\n                    \"metadata_offset\": metadata_offset,\n                }\n            )\n    tables.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n    tables.metadata = {\"A\": \"Test metadata\"}\n\n    tables.reference_sequence.data = \"A\" * int(tables.sequence_length)\n    tables.reference_sequence.url = \"https://example.com/sequence\"\n    tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json()\n    tables.reference_sequence.metadata = {\"A\": \"Test metadata\"}\n\n    # Add some more provenance so we have enough rows for the offset deletion test.\n    for j in range(10):\n        tables.provenances.add_row(timestamp=\"x\" * j, record=\"y\" * j)\n    return tables.tree_sequence()\n\n\n# The ts above is used for the whole test session, but our tests need fresh tables to\n# modify\n@pytest.fixture\ndef tables(full_ts):\n    return full_ts.dump_tables()\n\n\ndef test_check_ts_full(tmp_path, full_ts):\n    \"\"\"\n    Check that the example ts has data in all fields\n    \"\"\"\n    full_ts.dump(tmp_path / \"tables\")\n    store = kastore.load(tmp_path / \"tables\")\n    for v in store.values():\n        assert v.nbytes > 0\n\n\nclass TestEncodingVersion:\n    def test_version(self):\n        lwt = lwt_module.LightweightTableCollection()\n        assert lwt.asdict()[\"encoding_version\"] == (1, 6)\n\n\nclass TestRoundTrip:\n    \"\"\"\n    Tests if we can do a simple round trip on simulated data.\n    \"\"\"\n\n    def verify(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        other_tables = tskit.TableCollection.fromdict(lwt.asdict())\n        tables.assert_equals(other_tables)\n\n    def test_simple(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=2)\n        self.verify(ts.dump_tables())\n\n    def test_empty(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        self.verify(tables)\n\n    def test_individuals(self):\n        n = 10\n        ts = msprime.simulate(n, mutation_rate=1, random_seed=2)\n        tables = ts.dump_tables()\n        for j in range(n):\n            tables.individuals.add_row(\n                flags=j, location=(j, j), parents=(j, j), metadata=b\"x\" * j\n            )\n        self.verify(tables)\n\n    def test_sequence_length(self):\n        ts = msprime.simulate(\n            10, recombination_rate=0.1, mutation_rate=1, length=0.99, random_seed=2\n        )\n        self.verify(ts.dump_tables())\n\n    def test_migration(self):\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 1], [1, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        )\n        self.verify(ts.dump_tables())\n\n    def test_example(self, tables):\n        tables.metadata_schema = tskit.MetadataSchema(\n            {\n                \"codec\": \"struct\",\n                \"type\": \"object\",\n                \"properties\": {\"top-level\": {\"type\": \"string\", \"binaryFormat\": \"50p\"}},\n            }\n        )\n        tables.metadata = {\"top-level\": \"top-level-metadata\"}\n        for table in tskit.TABLE_NAMES:\n            t = getattr(tables, table)\n            if hasattr(t, \"metadata_schema\"):\n                t.packset_metadata([f\"{table}-{i}\".encode() for i in range(t.num_rows)])\n                t.metadata_schema = tskit.MetadataSchema(\n                    {\n                        \"codec\": \"struct\",\n                        \"type\": \"object\",\n                        \"properties\": {table: {\"type\": \"string\", \"binaryFormat\": \"50p\"}},\n                    }\n                )\n\n        self.verify(tables)\n\n\nclass TestMissingData:\n    \"\"\"\n    Tests what happens when we have missing data in the encoded dict.\n    \"\"\"\n\n    def test_missing_sequence_length(self, tables):\n        d = tables.asdict()\n        del d[\"sequence_length\"]\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError):\n            lwt.fromdict(d)\n\n    def test_missing_time_units(self, tables):\n        assert tables.time_units != \"\"\n        d = tables.asdict()\n        del d[\"time_units\"]\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        tables = tskit.TableCollection.fromdict(lwt.asdict())\n        assert tables.time_units == tskit.TIME_UNITS_UNKNOWN\n\n    def test_missing_metadata(self, tables):\n        assert tables.metadata != b\"\"\n        d = tables.asdict()\n        del d[\"metadata\"]\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        tables = tskit.TableCollection.fromdict(lwt.asdict())\n        # Empty byte field still gets interpreted by schema\n        assert tables.metadata == {}\n\n    def test_missing_metadata_schema(self, tables):\n        assert repr(tables.metadata_schema) != \"\"\n        d = tables.asdict()\n        del d[\"metadata_schema\"]\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        tables = tskit.TableCollection.fromdict(lwt.asdict())\n        assert repr(tables.metadata_schema) == \"\"\n\n    def test_missing_tables(self, tables):\n        d = tables.asdict()\n        table_names = d.keys() - {\n            \"sequence_length\",\n            \"time_units\",\n            \"metadata\",\n            \"metadata_schema\",\n            \"encoding_version\",\n            \"indexes\",\n            \"reference_sequence\",\n        }\n        for table_name in table_names:\n            d = tables.asdict()\n            del d[table_name]\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(TypeError):\n                lwt.fromdict(d)\n\n\nclass TestBadTypes:\n    \"\"\"\n    Tests for setting each column to a type that can't be converted to 1D numpy array.\n    \"\"\"\n\n    def verify_columns(self, value, tables):\n        d = tables.asdict()\n        table_names = set(d.keys()) - {\n            \"sequence_length\",\n            \"time_units\",\n            \"metadata\",\n            \"metadata_schema\",\n            \"encoding_version\",\n            \"indexes\",\n            \"reference_sequence\",\n        }\n        for table_name in table_names:\n            table_dict = d[table_name]\n            for colname in set(table_dict.keys()) - {\"metadata_schema\"}:\n                d_copy = dict(table_dict)\n                d_copy[colname] = value\n                lwt = lwt_module.LightweightTableCollection()\n                d = tables.asdict()\n                d[table_name] = d_copy\n                with pytest.raises(ValueError):\n                    lwt.fromdict(d)\n\n    def test_2d_array(self, tables):\n        self.verify_columns([[1, 2], [3, 4]], tables)\n\n    def test_str(self, tables):\n        self.verify_columns(\"aserg\", tables)\n\n    def test_bad_top_level_types(self, tables):\n        d = tables.asdict()\n        for key in set(d.keys()) - {\"encoding_version\", \"indexes\"}:\n            bad_type_dict = tables.asdict()\n            # A list should be a ValueError for both the tables and sequence_length\n            bad_type_dict[key] = [\"12345\"]\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(TypeError):\n                lwt.fromdict(bad_type_dict)\n\n\nclass TestBadLengths:\n    \"\"\"\n    Tests for setting each column to a length incompatible with the table.\n    \"\"\"\n\n    def verify(self, num_rows, tables):\n        d = tables.asdict()\n        table_names = set(d.keys()) - {\n            \"sequence_length\",\n            \"time_units\",\n            \"metadata\",\n            \"metadata_schema\",\n            \"encoding_version\",\n            \"indexes\",\n            \"reference_sequence\",\n        }\n        for table_name in sorted(table_names):\n            table_dict = d[table_name]\n            for colname in set(table_dict.keys()) - {\"metadata_schema\"}:\n                d_copy = dict(table_dict)\n                d_copy[colname] = table_dict[colname][:num_rows].copy()\n                lwt = lwt_module.LightweightTableCollection()\n                d = tables.asdict()\n                d[table_name] = d_copy\n                with pytest.raises(ValueError):\n                    lwt.fromdict(d)\n\n    def test_two_rows(self, tables):\n        self.verify(2, tables)\n\n    def test_zero_rows(self, tables):\n        self.verify(0, tables)\n\n    def test_bad_index_length(self, tables):\n        for col in (\"insertion\", \"removal\"):\n            d = tables.asdict()\n            d[\"indexes\"][f\"edge_{col}_order\"] = d[\"indexes\"][f\"edge_{col}_order\"][:-1]\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(\n                ValueError,\n                match=\"^edge_insertion_order and\"\n                \" edge_removal_order must be the same\"\n                \" length$\",\n            ):\n                lwt.fromdict(d)\n        d = tables.asdict()\n        for col in (\"insertion\", \"removal\"):\n            d[\"indexes\"][f\"edge_{col}_order\"] = d[\"indexes\"][f\"edge_{col}_order\"][:-1]\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(\n            ValueError,\n            match=\"^edge_insertion_order and edge_removal_order must be\"\n            \" the same length as the number of edges$\",\n        ):\n            lwt.fromdict(d)\n\n\nclass TestParsingUtilities:\n    def test_missing_required(self, tables):\n        d = tables.asdict()\n        del d[\"sequence_length\"]\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError, match=\"'sequence_length' is required\"):\n            lwt.fromdict(d)\n\n    def test_string_bad_type(self, tables):\n        d = tables.asdict()\n        d[\"time_units\"] = b\"sdf\"\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError, match=\"'time_units' is not a string\"):\n            lwt.fromdict(d)\n\n    def test_bytes_bad_type(self, tables):\n        d = tables.asdict()\n        d[\"metadata\"] = 1234\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError, match=\"'metadata' is not bytes\"):\n            lwt.fromdict(d)\n\n    def test_dict_bad_type(self, tables):\n        d = tables.asdict()\n        d[\"nodes\"] = b\"sdf\"\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError, match=\"'nodes' is not a dict\"):\n            lwt.fromdict(d)\n\n    def test_bad_strings(self, tables):\n        def verify_unicode_error(d):\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(UnicodeEncodeError):\n                lwt.fromdict(d)\n\n        def verify_bad_string_type(d):\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(TypeError):\n                lwt.fromdict(d)\n\n        d = tables.asdict()\n        for k, v in d.items():\n            if isinstance(v, str):\n                d_copy = copy.deepcopy(d)\n                d_copy[k] = NON_UTF8_STRING\n                verify_unicode_error(d_copy)\n                d_copy[k] = 12345\n                verify_bad_string_type(d_copy)\n            if isinstance(v, dict):\n                for kp, vp in v.items():\n                    if isinstance(vp, str):\n                        d_copy = copy.deepcopy(d)\n                        d_copy[k][kp] = NON_UTF8_STRING\n                        verify_unicode_error(d_copy)\n                        d_copy[k][kp] = 12345\n                        verify_bad_string_type(d_copy)\n\n\nclass TestRequiredAndOptionalColumns:\n    \"\"\"\n    Tests that specifying None for some columns will give the intended\n    outcome.\n    \"\"\"\n\n    def verify_required_columns(self, tables, table_name, required_cols):\n        d = tables.asdict()\n        table_dict = {col: None for col in d[table_name].keys()}\n        for col in required_cols:\n            table_dict[col] = d[table_name][col]\n        lwt = lwt_module.LightweightTableCollection()\n        d[table_name] = table_dict\n        lwt.fromdict(d)\n        other = lwt.asdict()\n        for col in required_cols:\n            assert np.array_equal(other[table_name][col], table_dict[col])\n\n        # Any one of these required columns as None gives an error.\n        for col in required_cols:\n            d = tables.asdict()\n            d_copy = copy.deepcopy(table_dict)\n            d_copy[col] = None\n            d[table_name] = d_copy\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(TypeError):\n                lwt.fromdict(d)\n\n        # Removing any one of these required columns gives an error.\n        for col in required_cols:\n            d = tables.asdict()\n            d_copy = copy.deepcopy(table_dict)\n            del d_copy[col]\n            d[table_name] = d_copy\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(TypeError):\n                lwt.fromdict(d)\n\n    def verify_optional_column(self, tables, table_len, table_name, col_name):\n        d = tables.asdict()\n        table_dict = d[table_name]\n        table_dict[col_name] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        assert np.array_equal(\n            out[table_name][col_name], np.zeros(table_len, dtype=np.int32) - 1\n        )\n\n    def verify_offset_pair(\n        self, tables, table_len, table_name, col_name, required=False\n    ):\n        offset_col = col_name + \"_offset\"\n\n        if not required:\n            d = tables.asdict()\n            table_dict = d[table_name]\n            table_dict[col_name] = None\n            table_dict[offset_col] = None\n            lwt = lwt_module.LightweightTableCollection()\n            lwt.fromdict(d)\n            out = lwt.asdict()\n            assert out[table_name][col_name].shape == (0,)\n            assert np.array_equal(\n                out[table_name][offset_col],\n                np.zeros(table_len + 1, dtype=np.uint32),\n            )\n            d = tables.asdict()\n            table_dict = d[table_name]\n            del table_dict[col_name]\n            del table_dict[offset_col]\n            lwt = lwt_module.LightweightTableCollection()\n            lwt.fromdict(d)\n            out = lwt.asdict()\n            assert out[table_name][col_name].shape == (0,)\n            assert np.array_equal(\n                out[table_name][offset_col],\n                np.zeros(table_len + 1, dtype=np.uint32),\n            )\n\n        # Setting one or the other raises a TypeError\n        d = tables.asdict()\n        table_dict = d[table_name]\n        table_dict[col_name] = None\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError):\n            lwt.fromdict(d)\n\n        d = tables.asdict()\n        table_dict = d[table_name]\n        del table_dict[col_name]\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError):\n            lwt.fromdict(d)\n\n        d = tables.asdict()\n        table_dict = d[table_name]\n        table_dict[offset_col] = None\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError):\n            lwt.fromdict(d)\n\n        d = tables.asdict()\n        table_dict = d[table_name]\n        del table_dict[offset_col]\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(TypeError):\n            lwt.fromdict(d)\n\n        d = tables.asdict()\n        table_dict = d[table_name]\n        bad_offset = np.zeros_like(table_dict[offset_col])\n        bad_offset[:-1] = table_dict[offset_col][:-1][::-1]\n        bad_offset[-1] = table_dict[offset_col][-1]\n        table_dict[offset_col] = bad_offset\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(ValueError):\n            lwt.fromdict(d)\n\n    def verify_metadata_schema(self, tables, table_name):\n        d = tables.asdict()\n        d[table_name][\"metadata_schema\"] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        assert \"metadata_schema\" not in out[table_name]\n        tables = tskit.TableCollection.fromdict(out)\n        assert repr(getattr(tables, table_name).metadata_schema) == \"\"\n\n    def test_individuals(self, tables):\n        self.verify_required_columns(tables, \"individuals\", [\"flags\"])\n        self.verify_offset_pair(\n            tables, len(tables.individuals), \"individuals\", \"location\"\n        )\n        self.verify_offset_pair(\n            tables, len(tables.individuals), \"individuals\", \"parents\"\n        )\n        self.verify_offset_pair(\n            tables, len(tables.individuals), \"individuals\", \"metadata\"\n        )\n        self.verify_metadata_schema(tables, \"individuals\")\n        # Verify optional parents column\n        d = tables.asdict()\n        d[\"individuals\"][\"parents\"] = None\n        d[\"individuals\"][\"parents_offset\"] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        assert all(val == [] for val in out[\"individuals\"][\"parents\"])\n\n    def test_nodes(self, tables):\n        self.verify_offset_pair(tables, len(tables.nodes), \"nodes\", \"metadata\")\n        self.verify_optional_column(tables, len(tables.nodes), \"nodes\", \"population\")\n        self.verify_optional_column(tables, len(tables.nodes), \"nodes\", \"individual\")\n        self.verify_required_columns(tables, \"nodes\", [\"flags\", \"time\"])\n        self.verify_metadata_schema(tables, \"nodes\")\n\n    def test_edges(self, tables):\n        self.verify_required_columns(\n            tables, \"edges\", [\"left\", \"right\", \"parent\", \"child\"]\n        )\n        self.verify_offset_pair(tables, len(tables.edges), \"edges\", \"metadata\")\n        self.verify_metadata_schema(tables, \"edges\")\n\n    def test_migrations(self, tables):\n        self.verify_required_columns(\n            tables, \"migrations\", [\"left\", \"right\", \"node\", \"source\", \"dest\", \"time\"]\n        )\n        self.verify_offset_pair(tables, len(tables.migrations), \"migrations\", \"metadata\")\n        self.verify_optional_column(tables, len(tables.nodes), \"nodes\", \"individual\")\n        self.verify_metadata_schema(tables, \"migrations\")\n\n    def test_sites(self, tables):\n        self.verify_required_columns(\n            tables, \"sites\", [\"position\", \"ancestral_state\", \"ancestral_state_offset\"]\n        )\n        self.verify_offset_pair(tables, len(tables.sites), \"sites\", \"metadata\")\n        self.verify_metadata_schema(tables, \"sites\")\n\n    def test_mutations(self, tables):\n        self.verify_required_columns(\n            tables,\n            \"mutations\",\n            [\"site\", \"node\", \"derived_state\", \"derived_state_offset\"],\n        )\n        self.verify_offset_pair(tables, len(tables.mutations), \"mutations\", \"metadata\")\n        self.verify_metadata_schema(tables, \"mutations\")\n        # Verify optional time column\n        d = tables.asdict()\n        d[\"mutations\"][\"time\"] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        assert all(util.is_unknown_time(val) for val in out[\"mutations\"][\"time\"])\n\n    def test_populations(self, tables):\n        self.verify_required_columns(\n            tables, \"populations\", [\"metadata\", \"metadata_offset\"]\n        )\n        self.verify_metadata_schema(tables, \"populations\")\n        self.verify_offset_pair(tables, len(tables.nodes), \"nodes\", \"metadata\", True)\n\n    def test_provenances(self, tables):\n        self.verify_required_columns(\n            tables,\n            \"provenances\",\n            [\"record\", \"record_offset\", \"timestamp\", \"timestamp_offset\"],\n        )\n\n    def test_index(self, tables):\n        d = tables.asdict()\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        other = lwt.asdict()\n        assert np.array_equal(\n            d[\"indexes\"][\"edge_insertion_order\"],\n            other[\"indexes\"][\"edge_insertion_order\"],\n        )\n        assert np.array_equal(\n            d[\"indexes\"][\"edge_removal_order\"], other[\"indexes\"][\"edge_removal_order\"]\n        )\n\n        # index is optional\n        d = tables.asdict()\n        del d[\"indexes\"]\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        # and a tc without indexes has empty dict\n        assert lwt.asdict()[\"indexes\"] == {}\n\n        # Both columns must be provided, if one is\n        for col in (\"insertion\", \"removal\"):\n            d = tables.asdict()\n            del d[\"indexes\"][f\"edge_{col}_order\"]\n            lwt = lwt_module.LightweightTableCollection()\n            with pytest.raises(\n                TypeError,\n                match=\"^edge_insertion_order and \"\n                \"edge_removal_order must be specified \"\n                \"together$\",\n            ):\n                lwt.fromdict(d)\n\n    def test_index_bad_type(self, tables):\n        d = tables.asdict()\n        lwt = lwt_module.LightweightTableCollection()\n        d[\"indexes\"] = \"asdf\"\n        with pytest.raises(TypeError):\n            lwt.fromdict(d)\n\n    def test_reference_sequence(self, tables):\n        self.verify_metadata_schema(tables, \"reference_sequence\")\n\n        def get_refseq(d):\n            tables = tskit.TableCollection.fromdict(d)\n            return tables.reference_sequence\n\n        d = tables.asdict()\n        refseq_dict = d.pop(\"reference_sequence\")\n        assert get_refseq(d).is_null()\n\n        # All empty strings is the same thing\n        d[\"reference_sequence\"] = dict(data=\"\", url=\"\", metadata_schema=\"\", metadata=b\"\")\n        assert get_refseq(d).is_null()\n\n        del refseq_dict[\"metadata_schema\"]  # handled above\n        for key, value in refseq_dict.items():\n            d[\"reference_sequence\"] = {key: value}\n            refseq = get_refseq(d)\n            assert not refseq.is_null()\n            assert getattr(refseq, key) == value\n\n    def test_top_level_time_units(self, tables):\n        d = tables.asdict()\n        # None should give default value\n        d[\"time_units\"] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        tables = tskit.TableCollection.fromdict(out)\n        assert tables.time_units == tskit.TIME_UNITS_UNKNOWN\n        # Missing is tested in TestMissingData above\n        d = tables.asdict()\n        d[\"time_units\"] = NON_UTF8_STRING\n        lwt = lwt_module.LightweightTableCollection()\n        with pytest.raises(UnicodeEncodeError):\n            lwt.fromdict(d)\n\n    def test_top_level_metadata(self, tables):\n        d = tables.asdict()\n        # None should give default value\n        d[\"metadata\"] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        assert \"metadata\" not in out\n        tables = tskit.TableCollection.fromdict(out)\n        assert tables.metadata == {}\n        # Missing is tested in TestMissingData above\n\n    def test_top_level_metadata_schema(self, tables):\n        d = tables.asdict()\n        # None should give default value\n        d[\"metadata_schema\"] = None\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(d)\n        out = lwt.asdict()\n        assert \"metadata_schema\" not in out\n        tables = tskit.TableCollection.fromdict(out)\n        assert repr(tables.metadata_schema) == \"\"\n        # Missing is tested in TestMissingData above\n\n\nclass TestLifecycle:\n    def test_unassigned_empty(self):\n        lwt_dict = lwt_module.LightweightTableCollection().asdict()\n        assert tskit.TableCollection.fromdict(lwt_dict) == tskit.TableCollection(-1)\n\n    def test_del_empty(self):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt_dict = lwt.asdict()\n        del lwt\n        assert tskit.TableCollection.fromdict(lwt_dict) == tskit.TableCollection(-1)\n\n    def test_del_full(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        lwt_dict = lwt.asdict()\n        del lwt\n        assert tskit.TableCollection.fromdict(lwt_dict) == tables\n\n    def test_del_lwt_and_tables(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        lwt_dict = lwt.asdict()\n        del lwt\n        tables2 = tables.copy()\n        del tables\n        assert tskit.TableCollection.fromdict(lwt_dict) == tables2\n\n\nclass TestForceOffset64:\n    def get_offset_columns(self, dict_encoding):\n        for table_name, table in dict_encoding.items():\n            if isinstance(table, dict):\n                for name, array in table.items():\n                    if name.endswith(\"_offset\"):\n                        yield f\"{table_name}/{name}\", array\n\n    def test_bad_args(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        for bad_type in [None, {}, \"sdf\"]:\n            with pytest.raises(TypeError):\n                lwt.asdict(bad_type)\n\n    def test_off_by_default(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        d = lwt.asdict()\n        for _, array in self.get_offset_columns(d):\n            assert array.dtype == np.uint32\n\n    def test_types_64(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        d = lwt.asdict(force_offset_64=True)\n        for _, array in self.get_offset_columns(d):\n            assert array.dtype == np.uint64\n\n    def test_types_32(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        d = lwt.asdict(force_offset_64=False)\n        for _, array in self.get_offset_columns(d):\n            assert array.dtype == np.uint32\n\n    def test_values_equal(self, tables):\n        lwt = lwt_module.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        d64 = lwt.asdict(force_offset_64=True)\n        d32 = lwt.asdict(force_offset_64=False)\n        offsets_64 = dict(self.get_offset_columns(d64))\n        offsets_32 = dict(self.get_offset_columns(d32))\n        for col_name, col_32 in offsets_32.items():\n            col_64 = offsets_64[col_name]\n            assert col_64.shape == col_32.shape\n            assert np.all(col_64 == col_32)\n\n\n@pytest.mark.parametrize(\"bad_type\", [None, \"\", []])\ndef test_fromdict_bad_type(bad_type):\n    lwt = lwt_module.LightweightTableCollection()\n    with pytest.raises(TypeError):\n        lwt.fromdict(bad_type)\n"
  },
  {
    "path": "python/lwt_interface/example_c_module.c",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2020 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n// Turn off clang-formatting for this file as turning off formatting\n// for specific bits will make it more confusing.\n// clang-format off\n\n#define PY_SSIZE_T_CLEAN\n#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION\n\n#include <Python.h>\n#include <structmember.h>\n#include <numpy/arrayobject.h>\n\n#include \"kastore.h\"\n#include \"tskit.h\"\n\n#include \"tskit_lwt_interface.h\"\n\nstatic PyObject * \nexample_receiving(PyObject *self, PyObject *args) {\n    int err = -1;\n    PyObject* ret = NULL;\n    LightweightTableCollection *tables = NULL;\n    tsk_treeseq_t tree_seq;\n    tsk_tree_t tree;\n\n    memset(&tree, 0, sizeof(tsk_tree_t));\n    memset(&tree_seq, 0, sizeof(tsk_treeseq_t));\n    \n    /* Get the tables from the args */\n    if (!PyArg_ParseTuple(args, \"O!\", &LightweightTableCollectionType, &tables)) {\n        goto out;\n    }\n\n    /* Check that the tables are init'd to prevent seg faults */\n    if (LightweightTableCollection_check_state(tables) != 0) {\n        goto out;\n    }\n\n    /* Build a tree sequence from the tables */\n    err = tsk_treeseq_init(&tree_seq, tables->tables, 0);\n    if (err < 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    \n    /* Get the first tree */\n    err = tsk_tree_init(&tree, &tree_seq, 0);\n    if (err < 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    err = tsk_tree_first(&tree);\n    if (err < 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    \n    /* Return true if the tree has more than one root */\n    ret = Py_BuildValue(\"O\", tsk_tree_get_num_roots(&tree) > 1 ? Py_True: Py_False);\n    \nout:\n    tsk_tree_free(&tree);\n    tsk_treeseq_free(&tree_seq);\n    return ret;\n}\n\nstatic PyObject * example_modifying(PyObject *self, PyObject *args) {\n    int err = -1;\n    PyObject* ret = NULL;\n    LightweightTableCollection *tables = NULL;\n\n    if (!PyArg_ParseTuple(args, \"O!\", &LightweightTableCollectionType, &tables)) {\n        goto out;\n    }\n\n    /* Check that the tables are init'd to prevent seg faults */\n    if (LightweightTableCollection_check_state(tables) != 0) {\n        goto out;\n    }\n\n    /* Modify the tables, note the need to check for error states and handle them */\n    err = tsk_table_collection_clear(tables->tables, 0);\n    if (err < 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    err = tsk_node_table_add_row(&tables->tables->nodes, 0, 0, 0, 0, NULL, 0);\n    if (err < 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    err = tsk_node_table_add_row(&tables->tables->nodes, 0, 0, 0, 0, NULL, 0);\n    if (err < 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n\n    /* Only set the return after no errors */\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\n\nstatic PyMethodDef example_c_module_methods[] = {\n    {\"example_receiving\", (PyCFunction) example_receiving, METH_VARARGS, \"Example of function receiving tables\"},\n    {\"example_modifying\", (PyCFunction) example_modifying, METH_VARARGS, \"Example of function modifying tables\"},\n    { NULL, NULL, 0, NULL } /* sentinel */\n};\n\nstatic struct PyModuleDef example_c_module = {\n    .m_base = PyModuleDef_HEAD_INIT,\n    .m_name = \"example_c_module\",\n    .m_doc = \"Example C module using the tskit LightweightTableCollection.\",\n    .m_size = -1,\n    .m_methods = example_c_module_methods };\n\nPyMODINIT_FUNC\nPyInit_example_c_module(void)\n{\n    PyObject *module = PyModule_Create(&example_c_module);\n    if (module == NULL) {\n        return NULL;\n    }\n    import_array();\n    if (register_lwt_class(module) != 0) {\n        return NULL;\n    }\n\n    /* Put your own functions/class definitions here, as usual */\n\n\n    return module;\n}\n"
  },
  {
    "path": "python/lwt_interface/setup.py",
    "content": "import os.path\nimport platform\n\nfrom setuptools import Extension, setup\nfrom setuptools.command.build_ext import build_ext\n\nIS_WINDOWS = platform.system() == \"Windows\"\n\n\n# Obscure magic required to allow numpy be used as a 'setup_requires'.\n# Based on https://stackoverflow.com/questions/19919905\nclass local_build_ext(build_ext):  # noqa N801\n    def finalize_options(self):\n        build_ext.finalize_options(self)\n        import builtins\n\n        # Prevent numpy from thinking it is still in its setup process:\n        builtins.__NUMPY_SETUP__ = False\n        import numpy\n\n        self.include_dirs.append(numpy.get_include())\n\n\nlibdir = \"../lib\"\nkastore_dir = os.path.join(libdir, \"subprojects\", \"kastore\")\n# TODO pathlib glob this.\ntsk_source_files = [\n    \"core.c\",\n    \"tables.c\",\n    \"trees.c\",\n    \"genotypes.c\",\n    \"stats.c\",\n    \"convert.c\",\n    \"haplotype_matching.c\",\n]\nsources = (\n    [\"example_c_module.c\"]\n    + [os.path.join(libdir, \"tskit\", f) for f in tsk_source_files]\n    + [os.path.join(kastore_dir, \"kastore.c\")]\n)\n\ndefines = []\nlibraries = []\nif IS_WINDOWS:\n    # Needed for generating UUIDs in tskit\n    libraries.append(\"Advapi32\")\n    defines.append((\"WIN32\", None))\n\nextension_module = Extension(\n    \"example_c_module\",\n    sources=sources,\n    extra_compile_args=[\"-std=c99\"],\n    libraries=libraries,\n    define_macros=defines,\n    include_dirs=[libdir, kastore_dir],\n)\n\nnumpy_ver = \"numpy>=1.7\"\n\nsetup(\n    name=\"example_c_module\",\n    description=\"Example usage of the LightweightTableCollection tskit interface\",\n    ext_modules=[extension_module],\n    setup_requires=[numpy_ver],\n    cmdclass={\"build_ext\": local_build_ext},\n    license=\"MIT\",\n    platforms=[\"POSIX\", \"Windows\", \"MacOS X\"],\n)\n"
  },
  {
    "path": "python/lwt_interface/test_example_c_module.py",
    "content": "# flake8: noqa\nimport os\nimport sys\n\nimport pytest\n\n# Make sure we use the local tskit version.\n\nsys.path.insert(0, os.path.abspath(\"../\"))\n\n# An example of how to run the tests defined in the dict_encoding_testlib.py\n# file for a given compiled version of the code.\nimport dict_encoding_testlib\nimport example_c_module\nimport tskit\n\n# The test cases defined in dict_encoding_testlib all use the form\n# lwt_module.LightweightTableCollection() to create an instance\n# of LightweightTableCollection. So, by setting this variable in\n# the module here, we can control which definition of the\n# LightweightTableCollection gets used.\ndict_encoding_testlib.lwt_module = example_c_module\n\nfrom dict_encoding_testlib import *\n\n\ndef test_example_receiving():\n    # The example_receiving function returns true if the first tree\n    # has more than one root\n    lwt = example_c_module.LightweightTableCollection()\n    tables = tskit.TableCollection(1)\n    lwt.fromdict(tables.asdict())\n    # Our example function throws an error for an empty table collection\n    with pytest.raises(ValueError, match=\"Table collection must be indexed\"):\n        example_c_module.example_receiving(lwt)\n\n    # This tree sequence has one root so we get false\n    tables = msprime.simulate(10).dump_tables()\n    lwt.fromdict(tables.asdict())\n    assert not example_c_module.example_receiving(lwt)\n\n    # Add a root and we get true\n    tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE)\n    lwt.fromdict(tables.asdict())\n    assert example_c_module.example_receiving(lwt)\n\n\ndef test_example_modifying():\n    lwt = example_c_module.LightweightTableCollection()\n    # The example_modifying function clears out the table and adds two rows\n    tables = msprime.simulate(10, random_seed=42).tables\n    assert tables.edges.num_rows == 18\n    assert tables.nodes.num_rows == 19\n    lwt.fromdict(tables.asdict())\n    example_c_module.example_modifying(lwt)\n    modified_tables = tskit.TableCollection.fromdict(lwt.asdict())\n    assert modified_tables.edges.num_rows == 0\n    assert modified_tables.nodes.num_rows == 2\n"
  },
  {
    "path": "python/lwt_interface/tskit_lwt_interface.h",
    "content": "/*\n * MIT License\n *\n * Copyright (c) 2019-2021 Tskit Developers\n * Copyright (c) 2015-2018 University of Oxford\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in all\n * copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n/* This file defines the LightweightTableCollection class using the\n * Python-C interface. It is intended to be #include-d and compiled\n * into third-party Python modules that use the tskit C interface.\n * See https://github.com/tskit-dev/tskit/tree/main/python/lwt_interface\n * for details and usage examples.\n */\n\ntypedef struct {\n    // clang-format off\n    PyObject_HEAD\n    tsk_table_collection_t *tables;\n    // clang-format on\n} LightweightTableCollection;\n\nstatic void\nhandle_tskit_error(int err)\n{\n    PyErr_SetString(PyExc_ValueError, tsk_strerror(err));\n}\n\nstatic PyObject *\nmake_Py_Unicode_FromStringAndLength(const char *str, size_t length)\n{\n    PyObject *ret = NULL;\n\n    /* Py_BuildValue returns Py_None for zero length, we would rather\n       return a zero-length string */\n    if (length == 0) {\n        ret = PyUnicode_FromString(\"\");\n    } else {\n        ret = Py_BuildValue(\"s#\", str, length);\n    }\n    return ret;\n}\n\n/*\n * Retrieves the PyObject* corresponding the specified key in the\n * specified dictionary. If required is true, raise a TypeError if the\n * value is None or absent.\n *\n * NB This returns a *borrowed reference*, so don't DECREF it!\n */\nstatic PyObject *\nget_dict_value(PyObject *dict, const char *key_str, bool required)\n{\n    PyObject *ret = NULL;\n\n    ret = PyDict_GetItemString(dict, key_str);\n    if (ret == NULL) {\n        ret = Py_None;\n    }\n    if (required && ret == Py_None) {\n        PyErr_Format(PyExc_TypeError, \"'%s' is required\", key_str);\n        ret = NULL;\n    }\n    return ret;\n}\n\n/* Specialised version of get_dict_value that checks if the\n * value is a dictionary. */\nstatic PyObject *\nget_dict_value_dict(PyObject *dict, const char *key_str, bool required)\n{\n    PyObject *ret = NULL;\n    PyObject *value = get_dict_value(dict, key_str, required);\n\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None && !PyDict_Check(value)) {\n        PyErr_Format(PyExc_TypeError, \"'%s' is not a dict\", key_str);\n        goto out;\n    }\n    ret = value;\nout:\n    return ret;\n}\n\nstatic PyObject *\nget_dict_value_string(PyObject *dict, const char *key_str, bool required)\n{\n    PyObject *ret = NULL;\n    PyObject *value = get_dict_value(dict, key_str, required);\n\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None && !PyUnicode_Check(value)) {\n        PyErr_Format(PyExc_TypeError, \"'%s' is not a string\", key_str);\n        goto out;\n    }\n    ret = value;\nout:\n    return ret;\n}\n\nstatic PyObject *\nget_dict_value_bytes(PyObject *dict, const char *key_str, bool required)\n{\n    PyObject *ret = NULL;\n    PyObject *value = get_dict_value(dict, key_str, required);\n\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None && !PyBytes_Check(value)) {\n        PyErr_Format(PyExc_TypeError, \"'%s' is not bytes\", key_str);\n        goto out;\n    }\n    ret = value;\nout:\n    return ret;\n}\n\nstatic PyArrayObject *\ntable_read_column_array(\n    PyObject *input, int npy_type, size_t *num_rows, bool check_num_rows)\n{\n    PyArrayObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    npy_intp *shape;\n\n    array = (PyArrayObject *) PyArray_FROMANY(input, npy_type, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(array);\n    if (check_num_rows) {\n        if (*num_rows != (size_t) shape[0]) {\n            PyErr_SetString(PyExc_ValueError, \"Input array dimensions must be equal.\");\n            goto out;\n        }\n    } else {\n        *num_rows = (size_t) shape[0];\n    }\n    ret = array;\n    array = NULL;\nout:\n    Py_XDECREF(array);\n    return ret;\n}\n\nstatic PyArrayObject *\ntable_read_offset_array(\n    PyObject *input, size_t *num_rows, size_t length, bool check_num_rows)\n{\n    PyArrayObject *ret = NULL;\n    PyArrayObject *array = NULL;\n    npy_intp *shape;\n    uint64_t *data;\n\n    array\n        = (PyArrayObject *) PyArray_FROMANY(input, NPY_UINT64, 1, 1, NPY_ARRAY_IN_ARRAY);\n    if (array == NULL) {\n        goto out;\n    }\n    shape = PyArray_DIMS(array);\n    if (!check_num_rows) {\n        *num_rows = shape[0];\n        if (*num_rows == 0) {\n            PyErr_SetString(\n                PyExc_ValueError, \"Offset arrays must have at least one element\");\n            goto out;\n        }\n        *num_rows -= 1;\n    }\n    if (shape[0] != (npy_intp) (*num_rows + 1)) {\n        PyErr_SetString(PyExc_ValueError, \"offset columns must have n + 1 rows.\");\n        goto out;\n    }\n    data = PyArray_DATA(array);\n    if (data[*num_rows] != (uint64_t) length) {\n        PyErr_SetString(PyExc_ValueError, \"Bad offset column encoding\");\n        goto out;\n    }\n    ret = array;\nout:\n    if (ret == NULL) {\n        Py_XDECREF(array);\n    }\n    return ret;\n}\n\nstatic const char *\nparse_unicode_arg(PyObject *arg, Py_ssize_t *metadata_schema_length)\n{\n    const char *ret = NULL;\n    if (arg == NULL) {\n        PyErr_Format(PyExc_AttributeError,\n            \"Cannot del attribute, set to empty string (\\\"\\\") to clear.\");\n        goto out;\n    }\n    ret = PyUnicode_AsUTF8AndSize(arg, metadata_schema_length);\n    if (ret == NULL) {\n        goto out;\n    }\nout:\n    return ret;\n}\n\nstatic int\nparse_individual_table_dict(\n    tsk_individual_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows, metadata_length, location_length, parents_length;\n    char *metadata_data = NULL;\n    double *location_data = NULL;\n    tsk_id_t *parents_data = NULL;\n    uint64_t *metadata_offset_data = NULL;\n    uint64_t *location_offset_data = NULL;\n    uint64_t *parents_offset_data = NULL;\n    PyObject *flags_input = NULL;\n    PyArrayObject *flags_array = NULL;\n    PyObject *location_input = NULL;\n    PyArrayObject *location_array = NULL;\n    PyObject *location_offset_input = NULL;\n    PyArrayObject *location_offset_array = NULL;\n    PyObject *parents_input = NULL;\n    PyArrayObject *parents_array = NULL;\n    PyObject *parents_offset_input = NULL;\n    PyArrayObject *parents_offset_array = NULL;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the input values */\n    flags_input = get_dict_value(dict, \"flags\", true);\n    if (flags_input == NULL) {\n        goto out;\n    }\n    location_input = get_dict_value(dict, \"location\", false);\n    if (location_input == NULL) {\n        goto out;\n    }\n    location_offset_input = get_dict_value(dict, \"location_offset\", false);\n    if (location_offset_input == NULL) {\n        goto out;\n    }\n    parents_input = get_dict_value(dict, \"parents\", false);\n    if (parents_input == NULL) {\n        goto out;\n    }\n    parents_offset_input = get_dict_value(dict, \"parents_offset\", false);\n    if (parents_offset_input == NULL) {\n        goto out;\n    }\n    metadata_input = get_dict_value(dict, \"metadata\", false);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", false);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Pull out the arrays */\n    flags_array = table_read_column_array(flags_input, NPY_UINT32, &num_rows, false);\n    if (flags_array == NULL) {\n        goto out;\n    }\n    if ((location_input == Py_None) != (location_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"location and location_offset must be specified together\");\n        goto out;\n    }\n    if (location_input != Py_None) {\n        location_array = table_read_column_array(\n            location_input, NPY_FLOAT64, &location_length, false);\n        if (location_array == NULL) {\n            goto out;\n        }\n        location_data = PyArray_DATA(location_array);\n        location_offset_array = table_read_offset_array(\n            location_offset_input, &num_rows, location_length, true);\n        if (location_offset_array == NULL) {\n            goto out;\n        }\n        location_offset_data = PyArray_DATA(location_offset_array);\n    }\n    if ((parents_input == Py_None) != (parents_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"parents and parents_offset must be specified together\");\n        goto out;\n    }\n    if (parents_input != Py_None) {\n        parents_array\n            = table_read_column_array(parents_input, NPY_INT32, &parents_length, false);\n        if (parents_array == NULL) {\n            goto out;\n        }\n        parents_data = PyArray_DATA(parents_array);\n        parents_offset_array = table_read_offset_array(\n            parents_offset_input, &num_rows, parents_length, true);\n        if (parents_offset_array == NULL) {\n            goto out;\n        }\n        parents_offset_data = PyArray_DATA(parents_offset_array);\n    }\n    if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"metadata and metadata_offset must be specified together\");\n        goto out;\n    }\n    if (metadata_input != Py_None) {\n        metadata_array\n            = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n        if (metadata_array == NULL) {\n            goto out;\n        }\n        metadata_data = PyArray_DATA(metadata_array);\n        metadata_offset_array = table_read_offset_array(\n            metadata_offset_input, &num_rows, metadata_length, true);\n        if (metadata_offset_array == NULL) {\n            goto out;\n        }\n        metadata_offset_data = PyArray_DATA(metadata_offset_array);\n    }\n\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_individual_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_individual_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_individual_table_append_columns(table, num_rows, PyArray_DATA(flags_array),\n        location_data, location_offset_data, parents_data, parents_offset_data,\n        metadata_data, metadata_offset_data);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(flags_array);\n    Py_XDECREF(location_array);\n    Py_XDECREF(location_offset_array);\n    Py_XDECREF(parents_array);\n    Py_XDECREF(parents_offset_array);\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    return ret;\n}\n\nstatic int\nparse_node_table_dict(tsk_node_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows, metadata_length;\n    char *metadata_data = NULL;\n    uint64_t *metadata_offset_data = NULL;\n    void *population_data = NULL;\n    void *individual_data = NULL;\n    PyObject *time_input = NULL;\n    PyArrayObject *time_array = NULL;\n    PyObject *flags_input = NULL;\n    PyArrayObject *flags_array = NULL;\n    PyObject *population_input = NULL;\n    PyArrayObject *population_array = NULL;\n    PyObject *individual_input = NULL;\n    PyArrayObject *individual_array = NULL;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the input values */\n    flags_input = get_dict_value(dict, \"flags\", true);\n    if (flags_input == NULL) {\n        goto out;\n    }\n    time_input = get_dict_value(dict, \"time\", true);\n    if (time_input == NULL) {\n        goto out;\n    }\n    population_input = get_dict_value(dict, \"population\", false);\n    if (population_input == NULL) {\n        goto out;\n    }\n    individual_input = get_dict_value(dict, \"individual\", false);\n    if (individual_input == NULL) {\n        goto out;\n    }\n    metadata_input = get_dict_value(dict, \"metadata\", false);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", false);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Create the arrays */\n    flags_array = table_read_column_array(flags_input, NPY_UINT32, &num_rows, false);\n    if (flags_array == NULL) {\n        goto out;\n    }\n    time_array = table_read_column_array(time_input, NPY_FLOAT64, &num_rows, true);\n    if (time_array == NULL) {\n        goto out;\n    }\n    if (population_input != Py_None) {\n        population_array\n            = table_read_column_array(population_input, NPY_INT32, &num_rows, true);\n        if (population_array == NULL) {\n            goto out;\n        }\n        population_data = PyArray_DATA(population_array);\n    }\n    if (individual_input != Py_None) {\n        individual_array\n            = table_read_column_array(individual_input, NPY_INT32, &num_rows, true);\n        if (individual_array == NULL) {\n            goto out;\n        }\n        individual_data = PyArray_DATA(individual_array);\n    }\n    if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"metadata and metadata_offset must be specified together\");\n        goto out;\n    }\n    if (metadata_input != Py_None) {\n        metadata_array\n            = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n        if (metadata_array == NULL) {\n            goto out;\n        }\n        metadata_data = PyArray_DATA(metadata_array);\n        metadata_offset_array = table_read_offset_array(\n            metadata_offset_input, &num_rows, metadata_length, true);\n        if (metadata_offset_array == NULL) {\n            goto out;\n        }\n        metadata_offset_data = PyArray_DATA(metadata_offset_array);\n    }\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_node_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_node_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_node_table_append_columns(table, num_rows, PyArray_DATA(flags_array),\n        PyArray_DATA(time_array), population_data, individual_data, metadata_data,\n        metadata_offset_data);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(flags_array);\n    Py_XDECREF(time_array);\n    Py_XDECREF(population_array);\n    Py_XDECREF(individual_array);\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    return ret;\n}\n\nstatic int\nparse_edge_table_dict(tsk_edge_table_t *table, PyObject *dict, bool clear_table)\n{\n    int ret = -1;\n    int err;\n    size_t num_rows = 0;\n    size_t metadata_length;\n    char *metadata_data = NULL;\n    uint64_t *metadata_offset_data = NULL;\n    PyObject *left_input = NULL;\n    PyArrayObject *left_array = NULL;\n    PyObject *right_input = NULL;\n    PyArrayObject *right_array = NULL;\n    PyObject *parent_input = NULL;\n    PyArrayObject *parent_array = NULL;\n    PyObject *child_input = NULL;\n    PyArrayObject *child_array = NULL;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the input values */\n    left_input = get_dict_value(dict, \"left\", true);\n    if (left_input == NULL) {\n        goto out;\n    }\n    right_input = get_dict_value(dict, \"right\", true);\n    if (right_input == NULL) {\n        goto out;\n    }\n    parent_input = get_dict_value(dict, \"parent\", true);\n    if (parent_input == NULL) {\n        goto out;\n    }\n    child_input = get_dict_value(dict, \"child\", true);\n    if (child_input == NULL) {\n        goto out;\n    }\n    metadata_input = get_dict_value(dict, \"metadata\", false);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", false);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Create the arrays */\n    left_array = table_read_column_array(left_input, NPY_FLOAT64, &num_rows, false);\n    if (left_array == NULL) {\n        goto out;\n    }\n    right_array = table_read_column_array(right_input, NPY_FLOAT64, &num_rows, true);\n    if (right_array == NULL) {\n        goto out;\n    }\n    parent_array = table_read_column_array(parent_input, NPY_INT32, &num_rows, true);\n    if (parent_array == NULL) {\n        goto out;\n    }\n    child_array = table_read_column_array(child_input, NPY_INT32, &num_rows, true);\n    if (child_array == NULL) {\n        goto out;\n    }\n    if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"metadata and metadata_offset must be specified together\");\n        goto out;\n    }\n    if (metadata_input != Py_None) {\n        metadata_array\n            = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n        if (metadata_array == NULL) {\n            goto out;\n        }\n        metadata_data = PyArray_DATA(metadata_array);\n        metadata_offset_array = table_read_offset_array(\n            metadata_offset_input, &num_rows, metadata_length, true);\n        if (metadata_offset_array == NULL) {\n            goto out;\n        }\n        metadata_offset_data = PyArray_DATA(metadata_offset_array);\n    }\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_edge_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_edge_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_edge_table_append_columns(table, num_rows, PyArray_DATA(left_array),\n        PyArray_DATA(right_array), PyArray_DATA(parent_array), PyArray_DATA(child_array),\n        metadata_data, metadata_offset_data);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(left_array);\n    Py_XDECREF(right_array);\n    Py_XDECREF(parent_array);\n    Py_XDECREF(child_array);\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    return ret;\n}\n\nstatic int\nparse_migration_table_dict(\n    tsk_migration_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows;\n    size_t metadata_length;\n    char *metadata_data = NULL;\n    uint64_t *metadata_offset_data = NULL;\n    PyObject *left_input = NULL;\n    PyArrayObject *left_array = NULL;\n    PyObject *right_input = NULL;\n    PyArrayObject *right_array = NULL;\n    PyObject *node_input = NULL;\n    PyArrayObject *node_array = NULL;\n    PyObject *source_input = NULL;\n    PyArrayObject *source_array = NULL;\n    PyObject *dest_input = NULL;\n    PyArrayObject *dest_array = NULL;\n    PyObject *time_input = NULL;\n    PyArrayObject *time_array = NULL;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the input values */\n    left_input = get_dict_value(dict, \"left\", true);\n    if (left_input == NULL) {\n        goto out;\n    }\n    right_input = get_dict_value(dict, \"right\", true);\n    if (right_input == NULL) {\n        goto out;\n    }\n    node_input = get_dict_value(dict, \"node\", true);\n    if (node_input == NULL) {\n        goto out;\n    }\n    source_input = get_dict_value(dict, \"source\", true);\n    if (source_input == NULL) {\n        goto out;\n    }\n    dest_input = get_dict_value(dict, \"dest\", true);\n    if (dest_input == NULL) {\n        goto out;\n    }\n    time_input = get_dict_value(dict, \"time\", true);\n    if (time_input == NULL) {\n        goto out;\n    }\n    metadata_input = get_dict_value(dict, \"metadata\", false);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", false);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Build the arrays */\n    left_array = table_read_column_array(left_input, NPY_FLOAT64, &num_rows, false);\n    if (left_array == NULL) {\n        goto out;\n    }\n    right_array = table_read_column_array(right_input, NPY_FLOAT64, &num_rows, true);\n    if (right_array == NULL) {\n        goto out;\n    }\n    node_array = table_read_column_array(node_input, NPY_INT32, &num_rows, true);\n    if (node_array == NULL) {\n        goto out;\n    }\n    source_array = table_read_column_array(source_input, NPY_INT32, &num_rows, true);\n    if (source_array == NULL) {\n        goto out;\n    }\n    dest_array = table_read_column_array(dest_input, NPY_INT32, &num_rows, true);\n    if (dest_array == NULL) {\n        goto out;\n    }\n    time_array = table_read_column_array(time_input, NPY_FLOAT64, &num_rows, true);\n    if (time_array == NULL) {\n        goto out;\n    }\n    if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"metadata and metadata_offset must be specified together\");\n        goto out;\n    }\n    if (metadata_input != Py_None) {\n        metadata_array\n            = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n        if (metadata_array == NULL) {\n            goto out;\n        }\n        metadata_data = PyArray_DATA(metadata_array);\n        metadata_offset_array = table_read_offset_array(\n            metadata_offset_input, &num_rows, metadata_length, true);\n        if (metadata_offset_array == NULL) {\n            goto out;\n        }\n        metadata_offset_data = PyArray_DATA(metadata_offset_array);\n    }\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_migration_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_migration_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_migration_table_append_columns(table, num_rows, PyArray_DATA(left_array),\n        PyArray_DATA(right_array), PyArray_DATA(node_array), PyArray_DATA(source_array),\n        PyArray_DATA(dest_array), PyArray_DATA(time_array), metadata_data,\n        metadata_offset_data);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(left_array);\n    Py_XDECREF(right_array);\n    Py_XDECREF(node_array);\n    Py_XDECREF(source_array);\n    Py_XDECREF(dest_array);\n    Py_XDECREF(time_array);\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    return ret;\n}\n\nstatic int\nparse_site_table_dict(tsk_site_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows = 0;\n    size_t ancestral_state_length, metadata_length;\n    PyObject *position_input = NULL;\n    PyArrayObject *position_array = NULL;\n    PyObject *ancestral_state_input = NULL;\n    PyArrayObject *ancestral_state_array = NULL;\n    PyObject *ancestral_state_offset_input = NULL;\n    PyArrayObject *ancestral_state_offset_array = NULL;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    char *metadata_data;\n    uint64_t *metadata_offset_data;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the input values */\n    position_input = get_dict_value(dict, \"position\", true);\n    if (position_input == NULL) {\n        goto out;\n    }\n    ancestral_state_input = get_dict_value(dict, \"ancestral_state\", true);\n    if (ancestral_state_input == NULL) {\n        goto out;\n    }\n    ancestral_state_offset_input = get_dict_value(dict, \"ancestral_state_offset\", true);\n    if (ancestral_state_offset_input == NULL) {\n        goto out;\n    }\n    metadata_input = get_dict_value(dict, \"metadata\", false);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", false);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Get the arrays */\n    position_array\n        = table_read_column_array(position_input, NPY_FLOAT64, &num_rows, false);\n    if (position_array == NULL) {\n        goto out;\n    }\n    ancestral_state_array = table_read_column_array(\n        ancestral_state_input, NPY_INT8, &ancestral_state_length, false);\n    if (ancestral_state_array == NULL) {\n        goto out;\n    }\n    ancestral_state_offset_array = table_read_offset_array(\n        ancestral_state_offset_input, &num_rows, ancestral_state_length, true);\n    if (ancestral_state_offset_array == NULL) {\n        goto out;\n    }\n\n    metadata_data = NULL;\n    metadata_offset_data = NULL;\n    if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"metadata and metadata_offset must be specified together\");\n        goto out;\n    }\n    if (metadata_input != Py_None) {\n        metadata_array\n            = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n        if (metadata_array == NULL) {\n            goto out;\n        }\n        metadata_data = PyArray_DATA(metadata_array);\n        metadata_offset_array = table_read_offset_array(\n            metadata_offset_input, &num_rows, metadata_length, false);\n        if (metadata_offset_array == NULL) {\n            goto out;\n        }\n        metadata_offset_data = PyArray_DATA(metadata_offset_array);\n    }\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_site_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_site_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_site_table_append_columns(table, num_rows, PyArray_DATA(position_array),\n        PyArray_DATA(ancestral_state_array), PyArray_DATA(ancestral_state_offset_array),\n        metadata_data, metadata_offset_data);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(position_array);\n    Py_XDECREF(ancestral_state_array);\n    Py_XDECREF(ancestral_state_offset_array);\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    return ret;\n}\n\nstatic int\nparse_mutation_table_dict(tsk_mutation_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows = 0;\n    size_t derived_state_length = 0;\n    size_t metadata_length = 0;\n    PyObject *site_input = NULL;\n    PyArrayObject *site_array = NULL;\n    PyObject *derived_state_input = NULL;\n    PyArrayObject *derived_state_array = NULL;\n    PyObject *derived_state_offset_input = NULL;\n    PyArrayObject *derived_state_offset_array = NULL;\n    PyObject *node_input = NULL;\n    PyArrayObject *node_array = NULL;\n    PyObject *time_input = NULL;\n    PyArrayObject *time_array = NULL;\n    double *time_data;\n    PyObject *parent_input = NULL;\n    PyArrayObject *parent_array = NULL;\n    tsk_id_t *parent_data;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    char *metadata_data;\n    uint64_t *metadata_offset_data;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the input values */\n    site_input = get_dict_value(dict, \"site\", true);\n    if (site_input == NULL) {\n        goto out;\n    }\n    node_input = get_dict_value(dict, \"node\", true);\n    if (node_input == NULL) {\n        goto out;\n    }\n    parent_input = get_dict_value(dict, \"parent\", false);\n    if (parent_input == NULL) {\n        goto out;\n    }\n    time_input = get_dict_value(dict, \"time\", false);\n    if (time_input == NULL) {\n        goto out;\n    }\n    derived_state_input = get_dict_value(dict, \"derived_state\", true);\n    if (derived_state_input == NULL) {\n        goto out;\n    }\n    derived_state_offset_input = get_dict_value(dict, \"derived_state_offset\", true);\n    if (derived_state_offset_input == NULL) {\n        goto out;\n    }\n    metadata_input = get_dict_value(dict, \"metadata\", false);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", false);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Get the arrays */\n    site_array = table_read_column_array(site_input, NPY_INT32, &num_rows, false);\n    if (site_array == NULL) {\n        goto out;\n    }\n    derived_state_array = table_read_column_array(\n        derived_state_input, NPY_INT8, &derived_state_length, false);\n    if (derived_state_array == NULL) {\n        goto out;\n    }\n    derived_state_offset_array = table_read_offset_array(\n        derived_state_offset_input, &num_rows, derived_state_length, true);\n    if (derived_state_offset_array == NULL) {\n        goto out;\n    }\n    node_array = table_read_column_array(node_input, NPY_INT32, &num_rows, true);\n    if (node_array == NULL) {\n        goto out;\n    }\n\n    time_data = NULL;\n    if (time_input != Py_None) {\n        time_array = table_read_column_array(time_input, NPY_FLOAT64, &num_rows, true);\n        if (time_array == NULL) {\n            goto out;\n        }\n        time_data = PyArray_DATA(time_array);\n    }\n\n    parent_data = NULL;\n    if (parent_input != Py_None) {\n        parent_array = table_read_column_array(parent_input, NPY_INT32, &num_rows, true);\n        if (parent_array == NULL) {\n            goto out;\n        }\n        parent_data = PyArray_DATA(parent_array);\n    }\n\n    metadata_data = NULL;\n    metadata_offset_data = NULL;\n    if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) {\n        PyErr_SetString(\n            PyExc_TypeError, \"metadata and metadata_offset must be specified together\");\n        goto out;\n    }\n    if (metadata_input != Py_None) {\n        metadata_array\n            = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n        if (metadata_array == NULL) {\n            goto out;\n        }\n        metadata_data = PyArray_DATA(metadata_array);\n        metadata_offset_array = table_read_offset_array(\n            metadata_offset_input, &num_rows, metadata_length, false);\n        if (metadata_offset_array == NULL) {\n            goto out;\n        }\n        metadata_offset_data = PyArray_DATA(metadata_offset_array);\n    }\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_mutation_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_mutation_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_mutation_table_append_columns(table, num_rows, PyArray_DATA(site_array),\n        PyArray_DATA(node_array), parent_data, time_data,\n        PyArray_DATA(derived_state_array), PyArray_DATA(derived_state_offset_array),\n        metadata_data, metadata_offset_data);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(site_array);\n    Py_XDECREF(derived_state_array);\n    Py_XDECREF(derived_state_offset_array);\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    Py_XDECREF(node_array);\n    Py_XDECREF(parent_array);\n    Py_XDECREF(time_array);\n    return ret;\n}\n\nstatic int\nparse_population_table_dict(\n    tsk_population_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows, metadata_length;\n    PyObject *metadata_input = NULL;\n    PyArrayObject *metadata_array = NULL;\n    PyObject *metadata_offset_input = NULL;\n    PyArrayObject *metadata_offset_array = NULL;\n    PyObject *metadata_schema_input = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t metadata_schema_length = 0;\n\n    /* Get the inputs */\n    metadata_input = get_dict_value(dict, \"metadata\", true);\n    if (metadata_input == NULL) {\n        goto out;\n    }\n    metadata_offset_input = get_dict_value(dict, \"metadata_offset\", true);\n    if (metadata_offset_input == NULL) {\n        goto out;\n    }\n    metadata_schema_input = get_dict_value(dict, \"metadata_schema\", false);\n    if (metadata_schema_input == NULL) {\n        goto out;\n    }\n\n    /* Get the arrays */\n    metadata_array\n        = table_read_column_array(metadata_input, NPY_INT8, &metadata_length, false);\n    if (metadata_array == NULL) {\n        goto out;\n    }\n    metadata_offset_array = table_read_offset_array(\n        metadata_offset_input, &num_rows, metadata_length, false);\n    if (metadata_offset_array == NULL) {\n        goto out;\n    }\n    if (metadata_schema_input != Py_None) {\n        metadata_schema\n            = parse_unicode_arg(metadata_schema_input, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_population_table_set_metadata_schema(\n            table, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    if (clear_table) {\n        err = tsk_population_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_population_table_append_columns(table, num_rows,\n        PyArray_DATA(metadata_array), PyArray_DATA(metadata_offset_array));\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(metadata_array);\n    Py_XDECREF(metadata_offset_array);\n    return ret;\n}\n\nstatic int\nparse_provenance_table_dict(\n    tsk_provenance_table_t *table, PyObject *dict, bool clear_table)\n{\n    int err;\n    int ret = -1;\n    size_t num_rows, timestamp_length, record_length;\n    PyObject *timestamp_input = NULL;\n    PyArrayObject *timestamp_array = NULL;\n    PyObject *timestamp_offset_input = NULL;\n    PyArrayObject *timestamp_offset_array = NULL;\n    PyObject *record_input = NULL;\n    PyArrayObject *record_array = NULL;\n    PyObject *record_offset_input = NULL;\n    PyArrayObject *record_offset_array = NULL;\n\n    /* Get the inputs */\n    timestamp_input = get_dict_value(dict, \"timestamp\", true);\n    if (timestamp_input == NULL) {\n        goto out;\n    }\n    timestamp_offset_input = get_dict_value(dict, \"timestamp_offset\", true);\n    if (timestamp_offset_input == NULL) {\n        goto out;\n    }\n    record_input = get_dict_value(dict, \"record\", true);\n    if (record_input == NULL) {\n        goto out;\n    }\n    record_offset_input = get_dict_value(dict, \"record_offset\", true);\n    if (record_offset_input == NULL) {\n        goto out;\n    }\n\n    timestamp_array\n        = table_read_column_array(timestamp_input, NPY_INT8, &timestamp_length, false);\n    if (timestamp_array == NULL) {\n        goto out;\n    }\n    timestamp_offset_array = table_read_offset_array(\n        timestamp_offset_input, &num_rows, timestamp_length, false);\n    if (timestamp_offset_array == NULL) {\n        goto out;\n    }\n    record_array\n        = table_read_column_array(record_input, NPY_INT8, &record_length, false);\n    if (record_array == NULL) {\n        goto out;\n    }\n    record_offset_array\n        = table_read_offset_array(record_offset_input, &num_rows, record_length, true);\n    if (record_offset_array == NULL) {\n        goto out;\n    }\n\n    if (clear_table) {\n        err = tsk_provenance_table_clear(table);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    err = tsk_provenance_table_append_columns(table, num_rows,\n        PyArray_DATA(timestamp_array), PyArray_DATA(timestamp_offset_array),\n        PyArray_DATA(record_array), PyArray_DATA(record_offset_array));\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(timestamp_array);\n    Py_XDECREF(timestamp_offset_array);\n    Py_XDECREF(record_array);\n    Py_XDECREF(record_offset_array);\n    return ret;\n}\n\nstatic int\nparse_indexes_dict(tsk_table_collection_t *tables, PyObject *dict)\n{\n    int err;\n    int ret = -1;\n    size_t insertion_length, removal_length;\n    PyObject *insertion_input = NULL;\n    PyArrayObject *insertion_array = NULL;\n    PyObject *removal_input = NULL;\n    PyArrayObject *removal_array = NULL;\n\n    /* Get the inputs */\n    insertion_input = get_dict_value(dict, \"edge_insertion_order\", false);\n    if (insertion_input == NULL) {\n        goto out;\n    }\n    removal_input = get_dict_value(dict, \"edge_removal_order\", false);\n    if (removal_input == NULL) {\n        goto out;\n    }\n\n    if ((insertion_input == Py_None) != (removal_input == Py_None)) {\n        PyErr_SetString(PyExc_TypeError,\n            \"edge_insertion_order and edge_removal_order must be specified together\");\n        goto out;\n    }\n\n    if (insertion_input != Py_None) {\n        insertion_array = table_read_column_array(\n            insertion_input, NPY_INT32, &insertion_length, false);\n        if (insertion_array == NULL) {\n            goto out;\n        }\n        removal_array\n            = table_read_column_array(removal_input, NPY_INT32, &removal_length, false);\n        if (removal_array == NULL) {\n            goto out;\n        }\n        if (insertion_length != removal_length) {\n            PyErr_SetString(PyExc_ValueError,\n                \"edge_insertion_order and edge_removal_order must be the same length\");\n            goto out;\n        }\n        if (insertion_length != tables->edges.num_rows) {\n            PyErr_SetString(PyExc_ValueError,\n                \"edge_insertion_order and edge_removal_order must be \"\n                \"the same length as the number of edges\");\n            goto out;\n        }\n        err = tsk_table_collection_set_indexes(\n            tables, PyArray_DATA(insertion_array), PyArray_DATA(removal_array));\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    Py_XDECREF(insertion_array);\n    Py_XDECREF(removal_array);\n    return ret;\n}\n\nstatic int\nparse_reference_sequence_dict(tsk_reference_sequence_t *ref, PyObject *dict)\n{\n    int err;\n    int ret = -1;\n    PyObject *value = NULL;\n    const char *metadata_schema, *data, *url;\n    char *metadata;\n    Py_ssize_t metadata_schema_length, metadata_length, data_length, url_length;\n\n    /* metadata_schema */\n    value = get_dict_value_string(dict, \"metadata_schema\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        metadata_schema = parse_unicode_arg(value, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_reference_sequence_set_metadata_schema(\n            ref, metadata_schema, (tsk_size_t) metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    /* metadata */\n    value = get_dict_value_bytes(dict, \"metadata\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        err = PyBytes_AsStringAndSize(value, &metadata, &metadata_length);\n        if (err != 0) {\n            goto out;\n        }\n        err = tsk_reference_sequence_set_metadata(ref, metadata, metadata_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    /* data */\n    value = get_dict_value_string(dict, \"data\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        data = parse_unicode_arg(value, &data_length);\n        if (data == NULL) {\n            goto out;\n        }\n        err = tsk_reference_sequence_set_data(ref, data, (tsk_size_t) data_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    /* url */\n    value = get_dict_value_string(dict, \"url\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        url = parse_unicode_arg(value, &url_length);\n        if (url == NULL) {\n            goto out;\n        }\n        err = tsk_reference_sequence_set_url(ref, url, (tsk_size_t) url_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\nparse_table_collection_dict(tsk_table_collection_t *tables, PyObject *tables_dict)\n{\n    int ret = -1;\n    PyObject *value = NULL;\n    int err;\n    const char *time_units = NULL;\n    char *metadata = NULL;\n    const char *metadata_schema = NULL;\n    Py_ssize_t time_units_length, metadata_length, metadata_schema_length;\n\n    value = get_dict_value(tables_dict, \"sequence_length\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (!PyNumber_Check(value)) {\n        PyErr_Format(PyExc_TypeError, \"'sequence_length' is not number\");\n        goto out;\n    }\n    tables->sequence_length = PyFloat_AsDouble(value);\n\n    /* metadata_schema */\n    value = get_dict_value_string(tables_dict, \"metadata_schema\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        metadata_schema = parse_unicode_arg(value, &metadata_schema_length);\n        if (metadata_schema == NULL) {\n            goto out;\n        }\n        err = tsk_table_collection_set_metadata_schema(\n            tables, metadata_schema, metadata_schema_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    /* metadata */\n    value = get_dict_value_bytes(tables_dict, \"metadata\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        err = PyBytes_AsStringAndSize(value, &metadata, &metadata_length);\n        if (err != 0) {\n            goto out;\n        }\n        err = tsk_table_collection_set_metadata(tables, metadata, metadata_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    /* time_units */\n    value = get_dict_value_string(tables_dict, \"time_units\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        time_units = parse_unicode_arg(value, &time_units_length);\n        if (time_units == NULL) {\n            goto out;\n        }\n        err = tsk_table_collection_set_time_units(tables, time_units, time_units_length);\n        if (err != 0) {\n            handle_tskit_error(err);\n            goto out;\n        }\n    }\n\n    /* individuals */\n    value = get_dict_value_dict(tables_dict, \"individuals\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_individual_table_dict(&tables->individuals, value, true) != 0) {\n        goto out;\n    }\n\n    /* nodes */\n    value = get_dict_value_dict(tables_dict, \"nodes\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_node_table_dict(&tables->nodes, value, true) != 0) {\n        goto out;\n    }\n\n    /* edges */\n    value = get_dict_value_dict(tables_dict, \"edges\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_edge_table_dict(&tables->edges, value, true) != 0) {\n        goto out;\n    }\n\n    /* migrations */\n    value = get_dict_value_dict(tables_dict, \"migrations\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_migration_table_dict(&tables->migrations, value, true) != 0) {\n        goto out;\n    }\n\n    /* sites */\n    value = get_dict_value_dict(tables_dict, \"sites\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_site_table_dict(&tables->sites, value, true) != 0) {\n        goto out;\n    }\n\n    /* mutations */\n    value = get_dict_value_dict(tables_dict, \"mutations\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_mutation_table_dict(&tables->mutations, value, true) != 0) {\n        goto out;\n    }\n\n    /* populations */\n    value = get_dict_value_dict(tables_dict, \"populations\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_population_table_dict(&tables->populations, value, true) != 0) {\n        goto out;\n    }\n\n    /* provenances */\n    value = get_dict_value_dict(tables_dict, \"provenances\", true);\n    if (value == NULL) {\n        goto out;\n    }\n    if (parse_provenance_table_dict(&tables->provenances, value, true) != 0) {\n        goto out;\n    }\n\n    /* indexes */\n    value = get_dict_value_dict(tables_dict, \"indexes\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        if (parse_indexes_dict(tables, value) != 0) {\n            goto out;\n        }\n    }\n\n    /* reference_sequence */\n    value = get_dict_value_dict(tables_dict, \"reference_sequence\", false);\n    if (value == NULL) {\n        goto out;\n    }\n    if (value != Py_None) {\n        if (parse_reference_sequence_dict(&tables->reference_sequence, value) != 0) {\n            goto out;\n        }\n    }\n    ret = 0;\nout:\n    return ret;\n}\n\ntypedef struct _tsklwt_table_col_t {\n    const char *name;\n    void *data;\n    npy_intp num_rows;\n    int type;\n} tsklwt_table_col_t;\n\ntypedef struct _tsklwt_ragged_col_t {\n    const char *name;\n    void *data;\n    tsk_size_t *offset;\n    npy_intp num_rows;\n    npy_intp data_len;\n    int type;\n} tsklwt_ragged_col_t;\n\ntypedef struct _tsklwt_table_desc_t {\n    const char *name;\n    tsklwt_table_col_t *cols;\n    tsklwt_ragged_col_t *ragged_cols;\n    char *metadata_schema;\n    tsk_size_t metadata_schema_length;\n} tsklwt_table_desc_t;\n\nstatic int\nwrite_table_col(tsklwt_table_col_t *col, PyObject *table_dict)\n{\n    int ret = -1;\n\n    PyArrayObject *array\n        = (PyArrayObject *) PyArray_EMPTY(1, &col->num_rows, col->type, 0);\n    if (array == NULL) {\n        goto out;\n    }\n    memcpy(PyArray_DATA(array), col->data, col->num_rows * PyArray_ITEMSIZE(array));\n    if (PyDict_SetItemString(table_dict, col->name, (PyObject *) array) != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(array);\n    return ret;\n}\n\nstatic int\nwrite_ragged_col(tsklwt_ragged_col_t *col, PyObject *table_dict, bool force_offset_64)\n{\n    int ret = -1;\n    char offset_col_name[128];\n    npy_intp offset_len = col->num_rows + 1;\n    PyArrayObject *data_array = NULL;\n    PyArrayObject *offset_array = NULL;\n    bool offset_64 = force_offset_64 || col->offset[col->num_rows] > UINT32_MAX;\n    int offset_type = offset_64 ? NPY_UINT64 : NPY_UINT32;\n    uint32_t *dest;\n    npy_intp j;\n\n    data_array = (PyArrayObject *) PyArray_EMPTY(1, &col->data_len, col->type, 0);\n    offset_array = (PyArrayObject *) PyArray_EMPTY(1, &offset_len, offset_type, 0);\n    if (data_array == NULL || offset_array == NULL) {\n        goto out;\n    }\n\n    memcpy(PyArray_DATA(data_array), col->data,\n        col->data_len * PyArray_ITEMSIZE(data_array));\n    if (offset_64) {\n        memcpy(PyArray_DATA(offset_array), col->offset,\n            offset_len * PyArray_ITEMSIZE(offset_array));\n    } else {\n        dest = (uint32_t *) PyArray_DATA(offset_array);\n        for (j = 0; j < offset_len; j++) {\n            dest[j] = col->offset[j];\n        }\n    }\n\n    assert(strlen(col->name) + strlen(\"_offset\") + 2 < sizeof(offset_col_name));\n    strcpy(offset_col_name, col->name);\n    strcat(offset_col_name, \"_offset\");\n\n    if (PyDict_SetItemString(table_dict, col->name, (PyObject *) data_array) != 0) {\n        goto out;\n    }\n    if (PyDict_SetItemString(table_dict, offset_col_name, (PyObject *) offset_array)\n        != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(data_array);\n    Py_XDECREF(offset_array);\n    return ret;\n}\n\nstatic int\nwrite_string_to_dict(PyObject *dict, const char *key, const char *str, tsk_size_t length)\n{\n    int ret = -1;\n    PyObject *val = make_Py_Unicode_FromStringAndLength(str, length);\n\n    if (val == NULL) {\n        goto out;\n    }\n    if (PyDict_SetItemString(dict, key, val) != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(val);\n    return ret;\n}\n\nstatic int\nwrite_bytes_to_dict(\n    PyObject *dict, const char *key, const char *bytes, tsk_size_t length)\n{\n    int ret = -1;\n    PyObject *val = PyBytes_FromStringAndSize(bytes, length);\n\n    if (val == NULL) {\n        goto out;\n    }\n    if (PyDict_SetItemString(dict, key, val) != 0) {\n        goto out;\n    }\n    ret = 0;\nout:\n    Py_XDECREF(val);\n    return ret;\n}\n\nstatic PyObject *\nwrite_table_dict(const tsklwt_table_desc_t *table_desc, bool force_offset_64)\n{\n    PyObject *ret = NULL;\n    PyObject *table_dict = NULL;\n    tsklwt_table_col_t *col;\n    tsklwt_ragged_col_t *ragged_col;\n\n    table_dict = PyDict_New();\n    if (table_dict == NULL) {\n        goto out;\n    }\n    if (table_desc->cols != NULL) {\n        for (col = table_desc->cols; col->name != NULL; col++) {\n            if (write_table_col(col, table_dict) != 0) {\n                goto out;\n            }\n        }\n    }\n    if (table_desc->ragged_cols != NULL) {\n        for (ragged_col = table_desc->ragged_cols; ragged_col->name != NULL;\n            ragged_col++) {\n            if (write_ragged_col(ragged_col, table_dict, force_offset_64) != 0) {\n                goto out;\n            }\n        }\n    }\n    if (table_desc->metadata_schema_length > 0) {\n        if (write_string_to_dict(table_dict, \"metadata_schema\",\n                table_desc->metadata_schema, table_desc->metadata_schema_length)\n            != 0) {\n            goto out;\n        }\n    }\n    ret = table_dict;\n    table_dict = NULL;\nout:\n    Py_XDECREF(table_dict);\n    return ret;\n}\n\nstatic int\nwrite_table_arrays(\n    const tsk_table_collection_t *tables, PyObject *dict, bool force_offset_64)\n{\n    int ret = -1;\n    PyObject *table_dict = NULL;\n    size_t j;\n\n    tsklwt_table_col_t individual_cols[] = {\n        { \"flags\", (void *) tables->individuals.flags, tables->individuals.num_rows,\n            NPY_UINT32 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t individual_ragged_cols[] = {\n        { \"location\", (void *) tables->individuals.location,\n            tables->individuals.location_offset, tables->individuals.num_rows,\n            tables->individuals.location_length, NPY_FLOAT64 },\n        { \"parents\", (void *) tables->individuals.parents,\n            tables->individuals.parents_offset, tables->individuals.num_rows,\n            tables->individuals.parents_length, NPY_INT32 },\n        { \"metadata\", (void *) tables->individuals.metadata,\n            tables->individuals.metadata_offset, tables->individuals.num_rows,\n            tables->individuals.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t node_cols[] = {\n        { \"time\", (void *) tables->nodes.time, tables->nodes.num_rows, NPY_FLOAT64 },\n        { \"flags\", (void *) tables->nodes.flags, tables->nodes.num_rows, NPY_UINT32 },\n        { \"population\", (void *) tables->nodes.population, tables->nodes.num_rows,\n            NPY_INT32 },\n        { \"individual\", (void *) tables->nodes.individual, tables->nodes.num_rows,\n            NPY_INT32 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t node_ragged_cols[] = {\n        { \"metadata\", (void *) tables->nodes.metadata, tables->nodes.metadata_offset,\n            tables->nodes.num_rows, tables->nodes.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t edge_cols[] = {\n        { \"left\", (void *) tables->edges.left, tables->edges.num_rows, NPY_FLOAT64 },\n        { \"right\", (void *) tables->edges.right, tables->edges.num_rows, NPY_FLOAT64 },\n        { \"parent\", (void *) tables->edges.parent, tables->edges.num_rows, NPY_INT32 },\n        { \"child\", (void *) tables->edges.child, tables->edges.num_rows, NPY_INT32 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t edge_ragged_cols[] = {\n        { \"metadata\", (void *) tables->edges.metadata, tables->edges.metadata_offset,\n            tables->edges.num_rows, tables->edges.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t migration_cols[] = {\n        { \"left\", (void *) tables->migrations.left, tables->migrations.num_rows,\n            NPY_FLOAT64 },\n        { \"right\", (void *) tables->migrations.right, tables->migrations.num_rows,\n            NPY_FLOAT64 },\n        { \"node\", (void *) tables->migrations.node, tables->migrations.num_rows,\n            NPY_INT32 },\n        { \"source\", (void *) tables->migrations.source, tables->migrations.num_rows,\n            NPY_INT32 },\n        { \"dest\", (void *) tables->migrations.dest, tables->migrations.num_rows,\n            NPY_INT32 },\n        { \"time\", (void *) tables->migrations.time, tables->migrations.num_rows,\n            NPY_FLOAT64 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t migration_ragged_cols[] = {\n        { \"metadata\", (void *) tables->migrations.metadata,\n            tables->migrations.metadata_offset, tables->migrations.num_rows,\n            tables->migrations.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t site_cols[] = {\n        { \"position\", (void *) tables->sites.position, tables->sites.num_rows,\n            NPY_FLOAT64 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t site_ragged_cols[] = {\n        { \"ancestral_state\", (void *) tables->sites.ancestral_state,\n            tables->sites.ancestral_state_offset, tables->sites.num_rows,\n            tables->sites.ancestral_state_length, NPY_INT8 },\n        { \"metadata\", (void *) tables->sites.metadata, tables->sites.metadata_offset,\n            tables->sites.num_rows, tables->sites.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t mutation_cols[] = {\n        { \"site\", (void *) tables->mutations.site, tables->mutations.num_rows,\n            NPY_INT32 },\n        { \"node\", (void *) tables->mutations.node, tables->mutations.num_rows,\n            NPY_INT32 },\n        { \"time\", (void *) tables->mutations.time, tables->mutations.num_rows,\n            NPY_FLOAT64 },\n        { \"parent\", (void *) tables->mutations.parent, tables->mutations.num_rows,\n            NPY_INT32 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t mutation_ragged_cols[] = {\n        { \"derived_state\", (void *) tables->mutations.derived_state,\n            tables->mutations.derived_state_offset, tables->mutations.num_rows,\n            tables->mutations.derived_state_length, NPY_INT8 },\n        { \"metadata\", (void *) tables->mutations.metadata,\n            tables->mutations.metadata_offset, tables->mutations.num_rows,\n            tables->mutations.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t population_ragged_cols[] = {\n        { \"metadata\", (void *) tables->populations.metadata,\n            tables->populations.metadata_offset, tables->populations.num_rows,\n            tables->populations.metadata_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_ragged_col_t provenance_ragged_cols[] = {\n        { \"timestamp\", (void *) tables->provenances.timestamp,\n            tables->provenances.timestamp_offset, tables->provenances.num_rows,\n            tables->provenances.timestamp_length, NPY_INT8 },\n        { \"record\", (void *) tables->provenances.record,\n            tables->provenances.record_offset, tables->provenances.num_rows,\n            tables->provenances.record_length, NPY_INT8 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t indexes_cols[] = {\n        { \"edge_insertion_order\", (void *) tables->indexes.edge_insertion_order,\n            tables->indexes.num_edges, NPY_INT32 },\n        { \"edge_removal_order\", (void *) tables->indexes.edge_removal_order,\n            tables->indexes.num_edges, NPY_INT32 },\n        { NULL },\n    };\n\n    tsklwt_table_col_t no_indexes_cols[] = {\n        { NULL },\n    };\n\n    tsklwt_table_desc_t table_descs[] = {\n        { \"individuals\", individual_cols, individual_ragged_cols,\n            tables->individuals.metadata_schema,\n            tables->individuals.metadata_schema_length },\n        { \"nodes\", node_cols, node_ragged_cols, tables->nodes.metadata_schema,\n            tables->nodes.metadata_schema_length },\n        { \"edges\", edge_cols, edge_ragged_cols, tables->edges.metadata_schema,\n            tables->edges.metadata_schema_length },\n        { \"migrations\", migration_cols, migration_ragged_cols,\n            tables->migrations.metadata_schema,\n            tables->migrations.metadata_schema_length },\n        { \"sites\", site_cols, site_ragged_cols, tables->sites.metadata_schema,\n            tables->sites.metadata_schema_length },\n        { \"mutations\", mutation_cols, mutation_ragged_cols,\n            tables->mutations.metadata_schema,\n            tables->mutations.metadata_schema_length },\n        { \"populations\", NULL, population_ragged_cols,\n            tables->populations.metadata_schema,\n            tables->populations.metadata_schema_length },\n        { \"provenances\", NULL, provenance_ragged_cols, NULL, 0 },\n        /* We don't want to insert empty indexes, return an empty dict if there are none\n         */\n        { \"indexes\",\n            tsk_table_collection_has_index(tables, 0) ? indexes_cols : no_indexes_cols,\n            NULL, NULL, 0 },\n    };\n\n    for (j = 0; j < sizeof(table_descs) / sizeof(*table_descs); j++) {\n        table_dict = write_table_dict(&table_descs[j], force_offset_64);\n        if (table_dict == NULL) {\n            goto out;\n        }\n        if (PyDict_SetItemString(dict, table_descs[j].name, table_dict) != 0) {\n            goto out;\n        }\n        Py_DECREF(table_dict);\n    }\n\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic int\nwrite_top_level_data(\n    const tsk_table_collection_t *tables, PyObject *dict, bool force_offset_64)\n{\n    int ret = -1;\n    PyObject *val = NULL;\n\n    /* Dict representation version */\n    val = Py_BuildValue(\"ll\", 1, 6);\n    if (val == NULL) {\n        goto out;\n    }\n    if (PyDict_SetItemString(dict, \"encoding_version\", val) != 0) {\n        goto out;\n    }\n    Py_DECREF(val);\n    val = NULL;\n\n    val = Py_BuildValue(\"d\", tables->sequence_length);\n    if (val == NULL) {\n        goto out;\n    }\n    if (PyDict_SetItemString(dict, \"sequence_length\", val) != 0) {\n        goto out;\n    }\n    Py_DECREF(val);\n    val = NULL;\n\n    if (write_string_to_dict(\n            dict, \"time_units\", tables->time_units, tables->time_units_length)\n        != 0) {\n        goto out;\n    }\n    if (tables->metadata_schema_length > 0) {\n        if (write_string_to_dict(dict, \"metadata_schema\", tables->metadata_schema,\n                tables->metadata_schema_length)\n            != 0) {\n            goto out;\n        }\n    }\n    if (tables->metadata_length > 0) {\n        if (write_bytes_to_dict(\n                dict, \"metadata\", tables->metadata, tables->metadata_length)\n            != 0) {\n            goto out;\n        }\n    }\n\n    ret = 0;\nout:\n    Py_XDECREF(val);\n    return ret;\n}\n\nstatic PyObject *\nwrite_reference_sequence_dict(const tsk_reference_sequence_t *ref, bool force_offset_64)\n{\n    PyObject *ret = NULL;\n    PyObject *dict = NULL;\n\n    dict = PyDict_New();\n    if (dict == NULL) {\n        goto out;\n    }\n\n    if (ref->metadata_schema_length > 0) {\n        if (write_string_to_dict(dict, \"metadata_schema\", ref->metadata_schema,\n                ref->metadata_schema_length)\n            != 0) {\n            goto out;\n        }\n    }\n    if (ref->metadata_length > 0) {\n        if (write_bytes_to_dict(dict, \"metadata\", ref->metadata, ref->metadata_length)\n            != 0) {\n            goto out;\n        }\n    }\n    if (write_string_to_dict(dict, \"data\", ref->data, ref->data_length) != 0) {\n        goto out;\n    }\n    if (write_string_to_dict(dict, \"url\", ref->url, ref->url_length) != 0) {\n        goto out;\n    }\n\n    ret = dict;\n    dict = NULL;\nout:\n    Py_XDECREF(dict);\n    return ret;\n}\n\n/* Returns a dictionary encoding of the specified table collection */\nstatic PyObject *\ndump_tables_dict(tsk_table_collection_t *tables, bool force_offset_64)\n{\n    PyObject *ret = NULL;\n    PyObject *dict = NULL;\n    PyObject *ref_dict = NULL;\n    int err;\n\n    dict = PyDict_New();\n    if (dict == NULL) {\n        goto out;\n    }\n\n    err = write_top_level_data(tables, dict, force_offset_64);\n    if (err != 0) {\n        goto out;\n    }\n    if (tsk_table_collection_has_reference_sequence(tables)) {\n        ref_dict = write_reference_sequence_dict(\n            &tables->reference_sequence, force_offset_64);\n        if (ref_dict == NULL) {\n            goto out;\n        }\n        if (PyDict_SetItemString(dict, \"reference_sequence\", ref_dict) != 0) {\n            goto out;\n        }\n        Py_DECREF(ref_dict);\n        ref_dict = NULL;\n    }\n    err = write_table_arrays(tables, dict, force_offset_64);\n    if (err != 0) {\n        goto out;\n    }\n    ret = dict;\n    dict = NULL;\nout:\n    Py_XDECREF(dict);\n    Py_XDECREF(ref_dict);\n    return ret;\n}\n\n/*===================================================================\n * LightweightTableCollection\n *===================================================================\n */\n\nstatic int\nLightweightTableCollection_check_state(LightweightTableCollection *self)\n{\n    int ret = 0;\n    if (self->tables == NULL) {\n        PyErr_SetString(PyExc_SystemError, \"LightweightTableCollection not initialised\");\n        ret = -1;\n    }\n    return ret;\n}\n\nstatic void\nLightweightTableCollection_dealloc(LightweightTableCollection *self)\n{\n    if (self->tables != NULL) {\n        tsk_table_collection_free(self->tables);\n        PyMem_Free(self->tables);\n        self->tables = NULL;\n    }\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\nstatic int\nLightweightTableCollection_init(\n    LightweightTableCollection *self, PyObject *args, PyObject *kwds)\n{\n    int ret = -1;\n    int err;\n    static char *kwlist[] = { \"sequence_length\", NULL };\n    double sequence_length = -1;\n\n    self->tables = NULL;\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|d\", kwlist, &sequence_length)) {\n        goto out;\n    }\n    self->tables = PyMem_Malloc(sizeof(*self->tables));\n    if (self->tables == NULL) {\n        PyErr_NoMemory();\n        goto out;\n    }\n    err = tsk_table_collection_init(self->tables, 0);\n    if (err != 0) {\n        handle_tskit_error(err);\n        goto out;\n    }\n    self->tables->sequence_length = sequence_length;\n    ret = 0;\nout:\n    return ret;\n}\n\nstatic PyObject *\nLightweightTableCollection_asdict(\n    LightweightTableCollection *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *ret = NULL;\n    static char *kwlist[] = { \"force_offset_64\", NULL };\n    int force_offset_64 = 0;\n\n    if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|i\", kwlist, &force_offset_64)) {\n        goto out;\n    }\n    if (LightweightTableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    ret = dump_tables_dict(self->tables, force_offset_64);\nout:\n    return ret;\n}\n\nstatic PyObject *\nLightweightTableCollection_fromdict(LightweightTableCollection *self, PyObject *args)\n{\n    int err;\n    PyObject *ret = NULL;\n    PyObject *dict = NULL;\n\n    if (LightweightTableCollection_check_state(self) != 0) {\n        goto out;\n    }\n    if (!PyArg_ParseTuple(args, \"O!\", &PyDict_Type, &dict)) {\n        goto out;\n    }\n    err = parse_table_collection_dict(self->tables, dict);\n    if (err != 0) {\n        goto out;\n    }\n    ret = Py_BuildValue(\"\");\nout:\n    return ret;\n}\n\nstatic PyMethodDef LightweightTableCollection_methods[] = {\n    { .ml_name = \"asdict\",\n        .ml_meth = (PyCFunction) LightweightTableCollection_asdict,\n        .ml_flags = METH_VARARGS | METH_KEYWORDS,\n        .ml_doc = \"Returns the tables encoded as a dictionary.\" },\n    { .ml_name = \"fromdict\",\n        .ml_meth = (PyCFunction) LightweightTableCollection_fromdict,\n        .ml_flags = METH_VARARGS,\n        .ml_doc = \"Populates the internal tables using the specified dictionary.\" },\n    { NULL } /* Sentinel */\n};\n\nstatic PyTypeObject LightweightTableCollectionType = {\n    // clang-format off\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"LightweightTableCollection\",\n    .tp_doc = \"Low-level table collection interchange.\",\n    .tp_basicsize = sizeof(LightweightTableCollection),\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_new = PyType_GenericNew,\n    .tp_methods = LightweightTableCollection_methods,\n    .tp_init = (initproc) LightweightTableCollection_init,\n    .tp_dealloc = (destructor) LightweightTableCollection_dealloc,\n    // clang-format on\n};\n\nstatic int\nregister_lwt_class(PyObject *module)\n{\n    if (PyType_Ready(&LightweightTableCollectionType) < 0) {\n        return -1;\n    }\n    Py_INCREF(&LightweightTableCollectionType);\n    PyModule_AddObject(module, \"LightweightTableCollection\",\n        (PyObject *) &LightweightTableCollectionType);\n    return 0;\n}\n"
  },
  {
    "path": "python/pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools>=45\", \"wheel\", \"numpy>=2.0\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"tskit\"\ndynamic = [\"version\"]\nauthors = [\n    {name = \"Tskit Developers\", email = \"admin@tskit.dev\"},\n]\ndescription = \"The tree sequence toolkit.\"\nreadme = \"README.rst\"\nlicense = \"MIT\"\nclassifiers = [\n    \"Programming Language :: C\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n    \"Programming Language :: Python :: 3 :: Only\",\n    \"Development Status :: 5 - Production/Stable\",\n    \"Environment :: Other Environment\",\n    \"Intended Audience :: Science/Research\",\n    \"Operating System :: POSIX\",\n    \"Operating System :: MacOS :: MacOS X\",\n    \"Operating System :: Microsoft :: Windows\",\n    \"Topic :: Scientific/Engineering\",\n    \"Topic :: Scientific/Engineering :: Bio-Informatics\",\n]\nkeywords = [\n    \"population genetics\",\n    \"tree sequence\",\n    \"ancestral recombination graph\",\n    \"evolutionary tree\",\n    \"statistical genetics\",\n    \"phylogenetics\",\n    \"tskit\",\n]\nrequires-python = \">=3.11\"\ndependencies = [\n    \"jsonschema>=3.0.0\",\n    \"numpy>=2\",\n]\n\n[project.urls]\nHomepage = \"https://tskit.dev/tskit\"\nDocumentation = \"https://tskit.dev/tskit/docs/stable\"\nChangelog = \"https://tskit.dev/tskit/docs/stable/changelogs.html\"\n\"Bug Tracker\" = \"https://github.com/tskit-dev/tskit/issues\"\nGitHub = \"https://github.com/tskit-dev/tskit/\"\n\n[project.scripts]\ntskit = \"tskit.cli:tskit_main\"\n\n[tool.setuptools]\npackages = [\"tskit\", \"tskit.jit\"]\n\n[tool.setuptools.dynamic]\nversion = {attr = \"tskit._version.tskit_version\"}\n\n[dependency-groups]\ntest = [\n    \"biopython\",\n    \"coverage\",\n    \"dendropy\",\n    \"kastore\",\n    \"lshmm\",\n    \"msgpack\",\n    \"msprime\",\n    \"networkx\",\n    \"numba\",\n    \"portion\",\n    \"pytest\",\n    \"pytest-cov\",\n    \"pytest-xdist\",\n    \"tszip\",\n    \"xmlunittest\",\n    \"svgwrite\",\n    \"newick\",\n    \"zarr\",\n]\n\ntest-lwt = [\n    \"cython\",\n    \"pytest\",\n    \"pytest-cov\",\n]\n\ndocs = [\n    \"jupyter-book<2\",\n    \"breathe\",\n    \"sphinx-autodoc-typehints\",\n    \"sphinx-issues\",\n    \"sphinx-argparse\",\n    \"msprime\",\n    \"numba\",\n    \"sphinx-book-theme\",\n    \"pandas\",\n]\n\nwheels = [\n  \"cibuildwheel\",\n]\n\npackaging = [\n    \"twine\",\n    \"validate-pyproject[all]\",\n]\n\n# Fully pin lint requirements for determinism.\nlint = [\n    \"clang-format==21.1.8\",\n    \"ruff==0.15.1\",\n    \"prek==0.3.3\",\n]\n\ndev = [\n    {include-group = \"docs\"},\n    {include-group = \"lint\"},\n    {include-group = \"test\"},\n    {include-group = \"packaging\"},\n]\n\n[tool.pytest.ini_options]\naddopts = \"-n 4\"\ntestpaths = [\"tests\"]\n\n[tool.ruff]\n# Assume Python 3.11\ntarget-version = \"py311\"\n\nline-length = 89\nindent-width = 4\n\n[tool.ruff.lint]\nselect = [\"E\", \"F\", \"B\", \"W\", \"I\", \"N\", \"UP\", \"A\", \"PT\"]\n\nignore = [\n    \"A001\", \"A002\", \"RUF\",\n    \"B905\", #Don't add strict=False to zips (B905)\n    \"E741\", # using 'l' as a variable name\n    \"N806\", \"N802\", \"N803\", # Various nags about uppercase vars\n    \"N818\", # Exceptions not called \"Error\"\n    \"UP032\", # Don't upgrade to f strings\n    \"UP031\", # Allow old-skool format specifiers\n    \"B904\", # raise from-issues\n    \"PT006\", # Wrong type passed to first argument of `pytest.mark.parametrize`\n    \"PT007\", # Wrong values type in `pytest.mark.parametrize` expected `list` of `tuple`\n    \"PT009\", # Replace `assertAlmostEqual(...)` with `assert ...`\n    \"PT018\", # Assertion should be broken down into multiple parts\n    \"PT030\", # `pytest.warns(UserWarning)` is too broad, set the `match`\n    \"PT009\", # pytest.raises(ValueError) too broad\n    \"PT011\", # pytest.raises(ValueError) too broad\n    \"PT012\", # pytest.raises() block should contain a single simple statement\n    # \"B028\", # No stacklevel for warnings\n    \"B018\", # Found useless expression. Either assign it\n    \"PT031\", # `pytest.warns()` block should contain a single simple statement\n]\n\nfixable = [\"ALL\"]\nunfixable = []\n\n[tool.ruff.lint.isort]\nknown-first-party = [\"_tskit\", \"tskit\", \"lwt_interface\"]\nknown-third-party = [\n  \"msprime\",\n  \"numpy\",\n  \"pytest\",\n]\n\n[tool.cibuildwheel]\nbuild = [\"cp311-*\", \"cp312-*\", \"cp313-*\", \"cp314-*\"]\nbuild-frontend = \"build\"\nbuild-verbosity = 1\ntest-command = [\n  \"python -c \\\"import _tskit\\\"\",\n  \"tskit --help\",\n]\n\n[tool.cibuildwheel.linux]\narchs = [\"x86_64\"]\n"
  },
  {
    "path": "python/setup.py",
    "content": "import os\nimport platform\n\nimport numpy\nfrom setuptools import Extension, setup\n\nIS_WINDOWS = platform.system() == \"Windows\"\n\n\nlibdir = \"lib\"\nkastore_dir = os.path.join(libdir, \"subprojects\", \"kastore\")\ntsk_source_files = [\n    \"core.c\",\n    \"tables.c\",\n    \"trees.c\",\n    \"genotypes.c\",\n    \"stats.c\",\n    \"convert.c\",\n    \"haplotype_matching.c\",\n]\nsources = (\n    [\"_tskitmodule.c\"]\n    + [os.path.join(libdir, \"tskit\", f) for f in tsk_source_files]\n    + [os.path.join(kastore_dir, \"kastore.c\")]\n)\n\ndefines = []\nlibraries = []\nif IS_WINDOWS:\n    libraries.append(\"Advapi32\")\n    defines.append((\"WIN32\", None))\n\n_tskit_module = Extension(\n    \"_tskit\",\n    sources=sources,\n    extra_compile_args=[\"-std=c99\"],\n    libraries=libraries,\n    define_macros=defines,\n    include_dirs=[\"lwt_interface\", libdir, kastore_dir, numpy.get_include()],\n)\n\nsetup(\n    ext_modules=[_tskit_module],\n)\n"
  },
  {
    "path": "python/stress_lowlevel.py",
    "content": "import curses\nimport os\nimport random\nimport resource\nimport sys\nimport time\nimport tracemalloc\nfrom contextlib import redirect_stdout\n\nimport pytest\n\n\"\"\"\nCode to stress the low-level API as much as possible to expose\nany memory leaks or error handling issues.\n\"\"\"\n\n\ndef main(stdscr):\n    if len(sys.argv) > 1:\n        args = sys.argv[1:]\n    else:\n        args = [\"-n0\", \"tests/test_python_c.py\"]\n\n    class StressPlugin:\n        def __init__(self):\n            self.max_rss = 0\n            self.max_rss_iter = 0\n            self.min_rss = 1e100\n            self.iteration = 0\n            self.last_print = time.time()\n            self.memory_start = None\n\n        def pytest_sessionstart(self):\n            if self.memory_start is None:\n                tracemalloc.start()\n                self.memory_start = tracemalloc.take_snapshot()\n\n        def pytest_sessionfinish(self):\n            memory_current = tracemalloc.take_snapshot()\n            rusage = resource.getrusage(resource.RUSAGE_SELF)\n            if self.max_rss < rusage.ru_maxrss:\n                self.max_rss = rusage.ru_maxrss\n                self.max_rss_iter = self.iteration\n            if self.min_rss > rusage.ru_maxrss:\n                self.min_rss = rusage.ru_maxrss\n\n            # We don't want to flood stdout, so we rate-limit to 1 per second.\n            if time.time() - self.last_print > 1:\n                stdscr.clear()\n                rows, cols = stdscr.getmaxyx()\n                stdscr.addstr(\n                    0,\n                    0,\n                    \"iter\\tRSS\\tmin\\tmax\\tmax@iter\"[: cols - 1],\n                )\n                stdscr.addstr(\n                    1,\n                    0,\n                    \"\\t\".join(\n                        map(\n                            str,\n                            [\n                                self.iteration,\n                                rusage.ru_maxrss,\n                                self.min_rss,\n                                self.max_rss,\n                                self.max_rss_iter,\n                            ],\n                        )\n                    )[: cols - 1],\n                )\n                stats = memory_current.compare_to(self.memory_start, \"traceback\")\n                for i, stat in enumerate(stats[: rows - 3], 1):\n                    stdscr.addstr(i + 2, 0, str(stat)[: cols - 1])\n                self.last_print = time.time()\n                stdscr.refresh()\n                self.iteration += 1\n\n    plugin = StressPlugin()\n    while True:\n        # We don't want any random variation in the amount of memory\n        # used from test-to-test.\n        random.seed(1)\n        with open(os.devnull, \"w\") as devnull:\n            with redirect_stdout(devnull):\n                result = pytest.main(args, plugins=[plugin])\n        if result != 0:\n            exit(\"TESTS FAILED\")\n\n\nif __name__ == \"__main__\":\n    stdscr = curses.initscr()\n    curses.noecho()\n    curses.cbreak()\n\n    try:\n        main(stdscr)\n    finally:\n        curses.echo()\n        curses.nocbreak()\n        curses.endwin()\n"
  },
  {
    "path": "python/tests/__init__.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2023 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\nimport base64\n\nimport tskit\n\nfrom . import tsutil\nfrom .simplify import *  # NOQA\n\n# TODO remove this code and refactor elsewhere.\n\n\nclass PythonTree:\n    \"\"\"\n    Presents the same interface as the Tree object for testing. This\n    is tightly coupled with the PythonTreeSequence object below which updates\n    the internal structures during iteration.\n    \"\"\"\n\n    def __init__(self, num_nodes):\n        self.num_nodes = num_nodes\n        self.parent = [tskit.NULL for _ in range(num_nodes)]\n        self.left_child = [tskit.NULL for _ in range(num_nodes)]\n        self.right_child = [tskit.NULL for _ in range(num_nodes)]\n        self.left_sib = [tskit.NULL for _ in range(num_nodes)]\n        self.right_sib = [tskit.NULL for _ in range(num_nodes)]\n        self.num_children = [0 for _ in range(num_nodes)]\n        self.edge = [tskit.NULL for _ in range(num_nodes)]\n        self.left = 0\n        self.right = 0\n        self.index = -1\n        self.left_root = -1\n        # We need a sites function, so this name is taken.\n        self.site_list = []\n\n    @classmethod\n    def from_tree(cls, tree):\n        ret = PythonTree(tree.tree_sequence.num_nodes)\n        ret.left, ret.right = tree.get_interval()\n        ret.site_list = list(tree.sites())\n        ret.index = tree.get_index()\n        ret.left_root = tree.left_root\n        ret.tree = tree\n        for u in range(ret.num_nodes):\n            ret.parent[u] = tree.parent(u)\n            ret.left_child[u] = tree.left_child(u)\n            ret.right_child[u] = tree.right_child(u)\n            ret.left_sib[u] = tree.left_sib(u)\n            ret.right_sib[u] = tree.right_sib(u)\n            ret.num_children[u] = tree.num_children(u)\n            ret.edge[u] = tree.edge(u)\n        assert ret == tree\n        return ret\n\n    @property\n    def roots(self):\n        u = self.left_root\n        roots = []\n        while u != tskit.NULL:\n            roots.append(u)\n            u = self.right_sib[u]\n        return roots\n\n    def children(self, u):\n        v = self.left_child[u]\n        ret = []\n        while v != tskit.NULL:\n            ret.append(v)\n            v = self.right_sib[v]\n        return ret\n\n    def get_interval(self):\n        return self.left, self.right\n\n    def get_parent(self, node):\n        return self.parent[node]\n\n    def get_children(self, node):\n        return self.children[node]\n\n    def get_index(self):\n        return self.index\n\n    def get_parent_dict(self):\n        d = {\n            u: self.parent[u]\n            for u in range(self.num_nodes)\n            if self.parent[u] != tskit.NULL\n        }\n        return d\n\n    def sites(self):\n        return iter(self.site_list)\n\n    def __eq__(self, other):\n        return (\n            self.get_parent_dict() == other.get_parent_dict()\n            and self.get_interval() == other.get_interval()\n            and self.roots == other.roots\n            and self.get_index() == other.get_index()\n            and list(self.sites()) == list(other.sites())\n        )\n\n    def __ne__(self, other):\n        return not self.__eq__(other)\n\n\nclass PythonTreeSequence:\n    \"\"\"\n    A python implementation of the TreeSequence object.\n\n    TODO this class is of limited use now and should be factored out as\n    part of a drive towards more modular versions of the tests currently\n    in tests_highlevel.py.\n    \"\"\"\n\n    def __init__(self, tree_sequence, breakpoints=None):\n        self._tree_sequence = tree_sequence\n        self._sites = []\n        # TODO this code here is expressed in terms of the low-level\n        # tree sequence for legacy reasons. It probably makes more sense\n        # to describe it in terms of the tables now if we want to have an\n        # independent implementation.\n        ll_ts = self._tree_sequence._ll_tree_sequence\n\n        def make_mutation(id_):\n            (\n                site,\n                node,\n                derived_state,\n                parent,\n                metadata,\n                time,\n                edge,\n                inherited_state,\n            ) = ll_ts.get_mutation(id_)\n            return tskit.Mutation(\n                id=id_,\n                site=site,\n                node=node,\n                time=time,\n                derived_state=derived_state,\n                parent=parent,\n                metadata=metadata,\n                edge=edge,\n                inherited_state=inherited_state,\n                metadata_decoder=tskit.metadata.parse_metadata_schema(\n                    ll_ts.get_table_metadata_schemas().mutation\n                ).decode_row,\n            )\n\n        for j in range(tree_sequence.num_sites):\n            pos, ancestral_state, ll_mutations, id_, metadata = ll_ts.get_site(j)\n            self._sites.append(\n                tskit.Site(\n                    id=id_,\n                    position=pos,\n                    ancestral_state=ancestral_state,\n                    mutations=[make_mutation(ll_mut) for ll_mut in ll_mutations],\n                    metadata=metadata,\n                    metadata_decoder=tskit.metadata.parse_metadata_schema(\n                        ll_ts.get_table_metadata_schemas().site\n                    ).decode_row,\n                )\n            )\n\n    def trees(self):\n        pt = PythonTree(self._tree_sequence.get_num_nodes())\n        pt.index = 0\n        for (left, right), rtt in tsutil.algorithm_R(self._tree_sequence):\n            pt.parent[:] = rtt.parent\n            pt.left_child[:] = rtt.left_child\n            pt.right_child[:] = rtt.right_child\n            pt.left_sib[:] = rtt.left_sib\n            pt.right_sib[:] = rtt.right_sib\n            pt.num_children[:] = rtt.num_children\n            pt.edge[:] = rtt.edge\n            pt.left_root = rtt.left_child[-1]\n            pt.left = left\n            pt.right = right\n            # Add in all the sites\n            pt.site_list = [\n                site for site in self._sites if left <= site.position < right\n            ]\n            yield pt\n            pt.index += 1\n        pt.index = -1\n\n\nclass MRCACalculator:\n    \"\"\"\n    Class to that allows us to compute the nearest common ancestor of arbitrary\n    nodes in an oriented forest.\n\n    This is an implementation of Schieber and Vishkin's nearest common ancestor\n    algorithm from TAOCP volume 4A, pg.164-167 [K11]_. Preprocesses the\n    input tree into a sideways heap in O(n) time and processes queries for the\n    nearest common ancestor between an arbitary pair of nodes in O(1) time.\n\n    :param oriented_forest: the input oriented forest\n    :type oriented_forest: list of integers\n    \"\"\"\n\n    LAMBDA = 0\n\n    def __init__(self, oriented_forest):\n        # We turn this oriened forest into a 1 based array by adding 1\n        # to everything\n        converted = [0] + [x + 1 for x in oriented_forest]\n        self.__preprocess(converted)\n\n    def __preprocess(self, oriented_forest):\n        \"\"\"\n        Preprocess the oriented forest, so that we can answer mrca queries\n        in constant time.\n        \"\"\"\n        n = len(oriented_forest)\n        child = [self.LAMBDA for i in range(n)]\n        parent = [self.LAMBDA for i in range(n)]\n        sib = [self.LAMBDA for i in range(n)]\n        self.__lambda = [0 for i in range(n)]\n        self.__pi = [0 for i in range(n)]\n        self.__tau = [0 for i in range(n)]\n        self.__beta = [0 for i in range(n)]\n        self.__alpha = [0 for i in range(n)]\n        for u in range(n):\n            v = oriented_forest[u]\n            sib[u] = child[v]\n            child[v] = u\n            parent[u] = v\n        p = child[self.LAMBDA]\n        n = 0\n        self.__lambda[0] = -1\n        while p != self.LAMBDA:\n            notDone = True\n            while notDone:\n                n += 1\n                self.__pi[p] = n\n                self.__tau[n] = self.LAMBDA\n                self.__lambda[n] = 1 + self.__lambda[n >> 1]\n                if child[p] != self.LAMBDA:\n                    p = child[p]\n                else:\n                    notDone = False\n            self.__beta[p] = n\n            notDone = True\n            while notDone:\n                self.__tau[self.__beta[p]] = parent[p]\n                if sib[p] != self.LAMBDA:\n                    p = sib[p]\n                    notDone = False\n                else:\n                    p = parent[p]\n                    if p != self.LAMBDA:\n                        h = self.__lambda[n & -self.__pi[p]]\n                        self.__beta[p] = ((n >> h) | 1) << h\n                    else:\n                        notDone = False\n        # Begin the second traversal\n        self.__lambda[0] = self.__lambda[n]\n        self.__pi[self.LAMBDA] = 0\n        self.__beta[self.LAMBDA] = 0\n        self.__alpha[self.LAMBDA] = 0\n        p = child[self.LAMBDA]\n        while p != self.LAMBDA:\n            notDone = True\n            while notDone:\n                a = self.__alpha[parent[p]] | (self.__beta[p] & -self.__beta[p])\n                self.__alpha[p] = a\n                if child[p] != self.LAMBDA:\n                    p = child[p]\n                else:\n                    notDone = False\n            notDone = True\n            while notDone:\n                if sib[p] != self.LAMBDA:\n                    p = sib[p]\n                    notDone = False\n                else:\n                    p = parent[p]\n                    notDone = p != self.LAMBDA\n\n    def get_mrca(self, x, y):\n        \"\"\"\n        Returns the most recent common ancestor of the nodes x and y,\n        or -1 if the nodes belong to different trees.\n\n        :param x: the first node\n        :param y: the second node\n        :return: the MRCA of nodes x and y\n        \"\"\"\n        # WE need to rescale here because SV expects 1-based arrays.\n        return self._sv_mrca(x + 1, y + 1) - 1\n\n    def _sv_mrca(self, x, y):\n        if self.__beta[x] <= self.__beta[y]:\n            h = self.__lambda[self.__beta[y] & -self.__beta[x]]\n        else:\n            h = self.__lambda[self.__beta[x] & -self.__beta[y]]\n        k = self.__alpha[x] & self.__alpha[y] & -(1 << h)\n        h = self.__lambda[k & -k]\n        j = ((self.__beta[x] >> h) | 1) << h\n        if j == self.__beta[x]:\n            xhat = x\n        else:\n            ell = self.__lambda[self.__alpha[x] & ((1 << h) - 1)]\n            xhat = self.__tau[((self.__beta[x] >> ell) | 1) << ell]\n        if j == self.__beta[y]:\n            yhat = y\n        else:\n            ell = self.__lambda[self.__alpha[y] & ((1 << h) - 1)]\n            yhat = self.__tau[((self.__beta[y] >> ell) | 1) << ell]\n        if self.__pi[xhat] <= self.__pi[yhat]:\n            z = xhat\n        else:\n            z = yhat\n        return z\n\n\ndef base64_encode(metadata):\n    \"\"\"\n    Returns the specified metadata bytes object encoded as an ASCII-safe\n    string.\n    \"\"\"\n    return base64.b64encode(metadata).decode(\"utf8\")\n\n\ndef cached_example(ts_func):\n    \"\"\"\n    Utility decorator to cache the result of a single function call\n    returning a tree sequence example.\n    \"\"\"\n    cache = None\n\n    def f(*args):\n        nonlocal cache\n        if cache is None:\n            cache = ts_func(*args)\n        return cache\n\n    return f\n"
  },
  {
    "path": "python/tests/conftest.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2022 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nConfiguration and fixtures for pytest. Only put test-suite wide fixtures in here. Module\nspecific fixtures should live in their modules.\n\nTo use a fixture in a test simply refer to it by name as an argument. This is called\ndependency injection. Note that all fixtures should have the suffix \"_fixture\" to make\nit clear in test code.\n\nFor example to use the `ts` fixture (a tree sequence with data in all fields) in a test:\n\nclass TestClass:\n    def test_something(self, ts_fixture):\n        assert ts_fixture.some_method() == expected\n\nFixtures can be parameterised etc. see https://docs.pytest.org/en/stable/fixture.html\n\nNote that fixtures have a \"scope\" for example `ts_fixture` below is only created once\nper test session and re-used for subsequent tests.\n\"\"\"\n\nimport msprime\nimport pytest\n\nfrom . import tsutil\n\n\ndef pytest_addoption(parser):\n    \"\"\"\n    Add options, e.g. to skip tests marked with `@pytest.mark.slow`\n    \"\"\"\n    parser.addoption(\n        \"--skip-slow\", action=\"store_true\", default=False, help=\"Skip slow tests\"\n    )\n    parser.addoption(\n        \"--skip-network\",\n        action=\"store_true\",\n        default=False,\n        help=\"Skip network/FIFO tests\",\n    )\n    parser.addoption(\n        \"--overwrite-expected-visualizations\",\n        action=\"store_true\",\n        default=False,\n        help=\"Overwrite the expected viz files in tests/data/svg/\",\n    )\n    parser.addoption(\n        \"--draw-svg-debug-box\",\n        action=\"store_true\",\n        default=False,\n        help=\"To help debugging, draw lines around the plotboxes in SVG output files\",\n    )\n\n\ndef pytest_configure(config):\n    \"\"\"\n    Add docs on the \"slow\" marker\n    \"\"\"\n    config.addinivalue_line(\"markers\", \"slow: mark test as slow to run\")\n    config.addinivalue_line(\"markers\", \"network: mark test as using network/FIFO\")\n\n\ndef pytest_collection_modifyitems(config, items):\n    if config.getoption(\"--skip-slow\"):\n        skip_slow = pytest.mark.skip(reason=\"--skip-slow specified\")\n        for item in items:\n            if \"slow\" in item.keywords:\n                item.add_marker(skip_slow)\n    if config.getoption(\"--skip-network\"):\n        skip_network = pytest.mark.skip(reason=\"--skip-network specified\")\n        for item in items:\n            if \"network\" in item.keywords:\n                item.add_marker(skip_network)\n\n\n@pytest.fixture\ndef overwrite_viz(request):\n    return request.config.getoption(\"--overwrite-expected-visualizations\")\n\n\n@pytest.fixture\ndef draw_plotbox(request):\n    return request.config.getoption(\"--draw-svg-debug-box\")\n\n\n@pytest.fixture(scope=\"session\")\ndef simple_degree1_ts_fixture():\n    return msprime.simulate(10, random_seed=42)\n\n\n@pytest.fixture(scope=\"session\")\ndef simple_degree2_ts_fixture():\n    ts = msprime.simulate(10, recombination_rate=0.2, random_seed=42)\n    assert ts.num_trees == 2\n    return ts\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_fixture():\n    \"\"\"\n    A tree sequence with data in all fields\n    \"\"\"\n    return tsutil.all_fields_ts()\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_fixture_for_simplify():\n    \"\"\"\n    A tree sequence with data in all fields execpt edge metadata and migrations\n    \"\"\"\n    return tsutil.all_fields_ts(edge_metadata=False, migrations=False)\n\n\n@pytest.fixture(scope=\"session\")\ndef replicate_ts_fixture():\n    \"\"\"\n    A list of tree sequences\n    \"\"\"\n    return list(msprime.simulate(10, num_replicates=10, random_seed=42))\n"
  },
  {
    "path": "python/tests/data/SLiM/README",
    "content": "The files in this directory are generated by SLiM.\n"
  },
  {
    "path": "python/tests/data/SLiM/minimal-example.txt",
    "content": "initialize() {\n    initializeTreeSeq(); \n    initializeMutationRate(0.0);\n    initializeMutationType(\"m1\", 0.5, \"f\", -0.1);\n    initializeGenomicElementType(\"g1\", m1, 1.0);\n    initializeGenomicElement(g1, 0, 9);\n    initializeRecombinationRate(1e-1);\n}\n1 {\n    sim.addSubpop(\"p1\", 5);\n}\n3 {\n    sim.treeSeqOutput(\"tests/data/SLiM/minimal-example.trees\");\n    sim.simulationFinished();\n}\n"
  },
  {
    "path": "python/tests/data/SLiM/single-locus-example.txt",
    "content": "initialize() {\n    initializeTreeSeq(); \n    initializeMutationRate(0.0);\n    initializeMutationType(\"m1\", 0.5, \"f\", -0.1);\n    initializeGenomicElementType(\"g1\", m1, 1.0);\n    initializeGenomicElement(g1, 0, 9);\n    initializeRecombinationRate(0);\n}\n1 {\n    sim.addSubpop(\"p1\", 5);\n}\n3 {\n    sim.treeSeqOutput(\"tests/data/SLiM/single-locus-example.trees\");\n    sim.simulationFinished();\n}\n"
  },
  {
    "path": "python/tests/data/dict-encodings/generate_msprime.py",
    "content": "import pathlib\nimport pickle\n\nimport _msprime\nimport msprime\n\npop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\nmigration_matrix = [[0, 1], [1, 0]]\nts = msprime.simulate(\n    population_configurations=pop_configs,\n    migration_matrix=migration_matrix,\n    mutation_rate=1,\n    record_migrations=True,\n    random_seed=1,\n)\nlwt = _msprime.LightweightTableCollection()\nlwt.fromdict(ts.tables.asdict())\n\ntest_dir = pathlib.Path(__file__).parent\nwith open(test_dir / f\"msprime-{msprime.__version__}.pkl\", \"wb\") as f:\n    pickle.dump(lwt.asdict(), f)\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/01-edges.txt",
    "content": "left    right   parent  child\n0.000000\t4.000000\t5\t2,3\t\n4.000000\t9.000000\t5\t3\t\n22.000000\t28.000000\t5\t3\t\n0.000000\t18.000000\t6\t0,1,4\t\n18.000000\t19.000000\t6\t0,1,4,5\t\n19.000000\t28.000000\t6\t0,1,5\n0.000000\t19.000000\t7\t6\t\n19.000000\t28.000000\t7\t2,6\t\n0.000000\t28.000000\t8\t7\t\n0.000000\t28.000000\t9\t8\t\n0.000000\t18.000000\t10\t5,9\t\n18.000000\t28.000000\t10\t9\t\n0.000000\t19.000000\t11\t10\t\n19.000000\t28.000000\t11\t4,10\n0.000000\t9.000000\t12\t11\t\n9.000000\t22.000000\t12\t3,11\n22.000000\t28.000000\t12\t11\t\n0.000000\t28.000000\t13\t12\t\n0.000000\t28.000000\t14\t13\t\n0.000000\t28.000000\t15\t14\t\n0.000000\t4.000000\t16\t15\t\n4.000000\t19.000000\t16\t2,15\n19.000000\t28.000000\t16\t15\t\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/01-mutations.txt",
    "content": "site\tnode\tderived_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/01-nodes.txt",
    "content": "is_sample\ttime\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n0\t5.000000\n0\t6.000000\n0\t7.000000\n0\t8.000000\n0\t9.000000\n0\t10.000000\n0\t11.000000\n0\t12.000000\n0\t13.000000\n0\t14.000000\n0\t15.000000\n0\t16.000000\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/01-sites.txt",
    "content": "position\tancestral_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/02-edges.txt",
    "content": "left\tright\tparent\tchild\n63.824647\t100.000000\t100\t47,85\n41.913156\t100.000000\t101\t81,83\n0.000000\t100.000000\t102\t88,98\n0.000000\t76.111147\t103\t38,76\n0.000000\t100.000000\t104\t12,63\n0.000000\t100.000000\t105\t3,79\n0.000000\t100.000000\t106\t23,95\n0.000000\t100.000000\t107\t46,61\n0.000000\t63.824647\t108\t47,87\n0.000000\t41.913156\t109\t19,81\n41.913156\t100.000000\t109\t19,101\n65.841615\t100.000000\t110\t62,68\n0.000000\t42.476761\t111\t59,72\n0.000000\t79.893379\t112\t65,92\n0.000000\t100.000000\t113\t60,107\n38.209860\t53.470013\t114\t15,86\n99.748128\t100.000000\t115\t30,96\n86.643371\t95.255452\t116\t51,54\n0.000000\t100.000000\t117\t2,21\n95.255452\t100.000000\t118\t49,51\n91.315428\t100.000000\t119\t74,78\n27.039936\t100.000000\t120\t24,80\n27.368300\t63.824647\t121\t94,108\n63.824647\t100.000000\t121\t87,94\n92.895855\t100.000000\t122\t7,64\n0.000000\t46.386608\t123\t9,74\n0.000000\t85.936096\t124\t56,91\n0.000000\t47.640188\t125\t67,77\n30.422602\t43.241239\t126\t40,55\n0.000000\t6.538844\t127\t4,41\n51.451422\t100.000000\t128\t9,52\n62.577097\t90.247778\t129\t93,99\n0.000000\t7.108776\t130\t85,90\n60.658379\t100.000000\t131\t57,67\n0.000000\t100.000000\t132\t18,109\n11.309867\t79.893379\t133\t112,117\n79.893379\t100.000000\t133\t92,117\n96.284451\t99.748128\t134\t59,96\n42.476761\t43.241239\t135\t72,126\n43.241239\t45.405170\t135\t55,72\n0.000000\t63.435890\t136\t89,104\n4.419653\t34.689388\t137\t30,64\n3.568615\t27.368300\t138\t62,108\n23.785721\t60.658379\t139\t7,57\n60.658379\t92.895855\t139\t7,131\n92.895855\t100.000000\t139\t122,131\n37.043091\t83.777115\t140\t5,49\n63.435890\t100.000000\t141\t33,104\n0.000000\t34.240481\t142\t14,43,45,66,113,136\n34.240481\t58.841850\t142\t14,29,43,45,66,113,136\n58.841850\t63.435890\t142\t14,43,45,66,113,136\n63.435890\t75.398652\t142\t14,43,45,66,89,113\n75.398652\t83.394074\t142\t14,43,45,66,89,96,113\n83.394074\t85.706099\t142\t43,45,66,89,96,113\n85.706099\t85.936096\t142\t45,66,89,96,113\n85.936096\t92.349588\t142\t45,56,66,89,96,113\n92.349588\t93.764750\t142\t45,56,66,89,96\n93.764750\t96.284451\t142\t45,56,89,96\n96.284451\t99.748128\t142\t45,56,89,134\n99.748128\t100.000000\t142\t45,56,59,89\n0.000000\t20.449369\t143\t24,97\n60.445610\t74.287710\t144\t42,59\n52.173268\t61.719977\t145\t64,99\n25.474716\t25.584357\t146\t51,96\n88.701340\t95.255452\t147\t49,106\n95.255452\t100.000000\t147\t106,118\n0.000000\t16.128261\t148\t55,82\n46.492800\t51.451422\t149\t9,133\n43.241239\t100.000000\t150\t40,142\n16.128261\t20.956228\t151\t82,86\n83.693827\t92.003768\t152\t72,75\n7.108776\t23.785721\t153\t7,90\n23.785721\t93.565018\t153\t90,139\n0.000000\t23.444517\t154\t33,35\n50.484324\t52.173268\t155\t0,64\n14.560625\t15.750456\t156\t70,102\n72.191272\t73.218934\t157\t6,54\n23.051339\t38.269459\t158\t6,105\n83.122541\t89.826128\t158\t25,105\n19.401604\t21.440137\t159\t0,32\n83.534148\t100.000000\t159\t13,65\n86.401076\t100.000000\t160\t1,82\n55.154018\t60.905848\t161\t28,93\n1.397707\t4.419653\t162\t30,37\n4.419653\t30.811847\t162\t37,137\n55.451624\t83.122541\t163\t11,105\n83.122541\t89.826128\t163\t11,158\n89.826128\t100.000000\t163\t11,105\n50.640219\t60.445610\t164\t53,59\n60.445610\t74.287710\t164\t53,144\n74.287710\t96.284451\t164\t53,59\n0.000000\t7.108776\t165\t7,32\n7.108776\t19.401604\t165\t32,153\n20.956228\t27.039936\t166\t80,86\n27.039936\t38.209860\t166\t86,120\n54.284561\t55.451624\t166\t11,120\n55.451624\t100.000000\t166\t120,163\n0.000000\t37.043091\t167\t5,78\n45.914485\t46.492800\t168\t99,133\n46.492800\t51.451422\t168\t99,149\n51.451422\t52.173268\t168\t99,133\n52.173268\t54.775535\t168\t133,145\n0.000000\t1.397707\t169\t13,30\n1.397707\t18.000399\t169\t13,162\n13.905651\t20.721380\t170\t34,69\n23.444517\t55.154018\t171\t33,93\n21.440137\t30.600251\t172\t0,4\n99.132998\t100.000000\t173\t27,160\n0.000000\t22.327425\t174\t96,105\n26.281985\t32.637756\t175\t54,124\n32.070756\t38.209860\t176\t85,166\n38.209860\t51.803381\t176\t85,120\n37.135096\t73.914710\t177\t103,132\n30.600251\t39.656727\t178\t4,50\n41.362197\t55.154018\t179\t34,171\n55.154018\t63.435890\t179\t33,34\n63.435890\t77.115170\t179\t34,141\n79.777617\t83.693827\t180\t72,133\n39.656727\t47.640188\t181\t50,125\n47.640188\t60.658379\t181\t50,67\n99.753960\t100.000000\t182\t20,93\n74.287710\t100.000000\t183\t42,121\n22.902252\t37.043091\t184\t13,49\n37.043091\t83.534148\t184\t13,140\n34.067864\t34.689388\t185\t137,184\n34.689388\t51.690836\t185\t30,184\n0.000000\t11.300701\t186\t8,17,20,22,42,52,57,71,75,94,117,142,143\n11.300701\t11.309867\t186\t8,17,20,22,42,57,71,75,94,117,142,143\n11.309867\t18.561545\t186\t8,17,20,22,42,57,71,75,94,133,142,143\n18.561545\t20.449369\t186\t17,20,22,42,57,71,75,94,133,142,143\n20.449369\t20.575506\t186\t17,20,22,42,57,71,75,94,97,133,142\n20.575506\t23.785721\t186\t17,20,22,42,57,71,75,97,133,142\n23.785721\t24.710938\t186\t17,20,22,42,71,75,97,133,142\n24.710938\t25.328103\t186\t17,20,22,31,42,71,75,97,133,142\n25.328103\t27.114357\t186\t17,20,22,31,42,71,75,97,133,138,142\n27.114357\t27.368300\t186\t1,17,20,22,31,42,71,75,97,133,138,142\n27.368300\t27.631858\t186\t1,17,20,22,31,42,62,71,75,97,133,142\n27.631858\t32.637756\t186\t1,17,20,31,42,62,71,75,97,133,142\n32.637756\t33.673379\t186\t1,17,20,31,42,54,62,71,75,97,133,142\n33.673379\t34.689388\t186\t1,17,20,31,42,62,71,75,97,133,142\n34.689388\t36.134011\t186\t1,17,20,31,42,62,64,71,75,97,133,142\n36.134011\t42.476761\t186\t1,17,20,31,42,62,64,71,75,97,126,133,142\n42.476761\t42.751756\t186\t1,17,20,31,42,62,64,71,75,97,133,135,142\n42.751756\t43.241239\t186\t1,17,20,31,42,62,64,71,75,97,121,133,135,142\n43.241239\t44.698548\t186\t1,17,20,31,42,62,64,71,75,97,121,133,135,150\n44.698548\t45.405170\t186\t1,11,17,20,31,42,62,64,71,75,97,121,133,135,150\n45.405170\t45.914485\t186\t1,11,17,20,31,42,62,64,71,72,75,97,121,133,150\n45.914485\t46.386608\t186\t1,11,17,20,31,42,62,64,71,72,75,97,121,150\n46.386608\t50.484324\t186\t1,11,17,20,31,42,62,64,71,72,74,75,97,121,150\n50.484324\t51.344527\t186\t1,11,17,20,31,42,62,71,72,74,75,97,121,150\n51.344527\t53.470013\t186\t1,11,20,31,42,62,71,72,74,75,97,121,150\n53.470013\t54.284561\t186\t1,11,20,31,42,62,71,72,74,75,86,97,121,150\n54.284561\t54.414499\t186\t1,20,31,42,62,71,72,74,75,86,97,121,150\n54.414499\t54.775535\t186\t1,20,31,42,62,72,74,75,86,97,121,150\n54.775535\t58.841850\t186\t1,20,31,42,62,72,74,75,86,97,121,133,150\n58.841850\t58.916166\t186\t1,20,29,31,42,62,72,74,75,86,97,121,133,150\n58.916166\t60.445610\t186\t1,20,31,42,62,72,74,75,86,97,121,133,150\n60.445610\t65.841615\t186\t1,20,31,62,72,74,75,86,97,121,133,150\n65.841615\t67.041287\t186\t1,20,31,72,74,75,86,97,110,121,133,150\n67.041287\t71.852095\t186\t1,20,31,72,74,75,78,86,97,110,121,133,150\n71.852095\t74.244135\t186\t1,20,31,72,74,75,78,86,97,110,121,124,133,150\n74.244135\t74.287710\t186\t1,20,31,74,75,78,86,97,110,121,124,133,150\n74.287710\t76.111147\t186\t1,20,31,74,75,78,86,97,110,124,133,150,183\n76.111147\t79.777617\t186\t1,20,31,38,74,75,78,86,97,110,124,133,150,183\n79.777617\t80.236310\t186\t1,20,31,38,74,75,78,86,97,110,124,150,183\n80.236310\t81.076006\t186\t1,20,31,38,41,74,75,78,86,97,110,124,150,183\n81.076006\t83.693827\t186\t1,20,31,38,41,74,75,78,86,97,110,150,183\n83.693827\t84.317521\t186\t1,20,31,38,41,74,78,86,97,110,150,152,183\n84.317521\t86.401076\t186\t1,20,31,38,41,74,78,86,97,110,152,183\n86.401076\t87.032912\t186\t20,31,38,41,74,78,86,97,110,152,160,183\n87.032912\t91.315428\t186\t20,31,38,41,71,74,78,86,97,110,152,160,183\n91.315428\t92.003768\t186\t20,31,38,41,71,86,97,110,119,152,160,183\n92.003768\t92.357754\t186\t20,31,38,41,71,72,75,86,97,110,119,160,183\n92.357754\t94.331891\t186\t20,31,38,41,58,71,72,75,86,97,110,119,160,183\n94.331891\t94.869872\t186\t20,31,38,39,41,58,71,72,75,86,97,110,119,160,183\n94.869872\t99.132998\t186\t20,31,38,39,41,58,71,72,75,86,97,102,110,119,160,183\n99.132998\t99.753960\t186\t20,31,38,39,41,58,71,72,75,86,97,102,110,119,173,183\n99.753960\t100.000000\t186\t31,38,39,41,58,71,72,75,86,97,102,110,119,173,183\n81.203782\t82.034281\t187\t30,50\n93.836844\t99.748128\t187\t30,36\n99.748128\t100.000000\t187\t36,115\n93.565018\t94.899812\t188\t14,90\n21.243630\t30.422602\t189\t25,55\n43.757852\t69.373708\t189\t8,25\n88.947794\t100.000000\t190\t28,77\n85.951325\t94.331891\t191\t39,84\n57.108058\t93.565018\t192\t153,164\n93.565018\t96.284451\t192\t139,164\n96.284451\t100.000000\t192\t53,139\n18.561545\t21.243630\t193\t8,55\n23.120737\t43.757852\t193\t8,82\n31.693641\t38.209860\t194\t15,68\n38.209860\t38.778292\t194\t68,114\n81.076006\t85.936096\t195\t124,192\n85.936096\t100.000000\t195\t91,192\n16.654066\t18.561545\t196\t55,68\n18.561545\t21.243630\t196\t68,193\n21.243630\t23.120737\t196\t8,68\n23.120737\t31.693641\t196\t68,193\n81.752010\t95.899004\t197\t26,27\n20.721380\t41.362197\t198\t34,44\n41.362197\t77.115170\t198\t44,179\n77.115170\t79.005768\t198\t34,44\n0.000000\t3.568615\t199\t62,73\n3.568615\t19.497026\t199\t73,138\n0.000000\t3.475795\t200\t16,69\n30.811847\t36.777478\t201\t10,37\n0.000000\t22.902252\t202\t49,84\n51.803381\t63.824647\t202\t84,85\n63.824647\t85.951325\t202\t84,100\n85.951325\t94.331891\t202\t100,191\n94.331891\t94.444668\t202\t84,100\n73.914710\t78.645366\t203\t10,132\n85.706099\t100.000000\t204\t8,43\n0.000000\t21.243630\t205\t25,27\n21.243630\t23.725869\t205\t27,189\n61.095745\t72.191272\t206\t6,177\n72.191272\t73.218934\t206\t157,177\n73.218934\t73.914710\t206\t6,177\n73.914710\t76.111147\t206\t6,103\n76.111147\t100.000000\t206\t6,76\n39.199376\t42.476761\t207\t58,111\n42.476761\t49.108935\t207\t58,59\n0.000000\t14.560625\t208\t70,111\n14.560625\t15.750456\t208\t111,156\n15.750456\t39.199376\t208\t70,111\n39.199376\t46.891849\t208\t70,207\n39.263142\t55.154018\t209\t28,54\n55.154018\t60.905848\t209\t54,161\n60.905848\t72.191272\t209\t28,54\n0.000000\t22.902252\t210\t31,202\n22.902252\t24.710938\t210\t31,84\n34.233492\t35.877983\t211\t16,102\n62.308779\t63.126608\t212\t0,202\n0.000000\t4.851737\t213\t103,169\n38.778292\t53.470013\t214\t78,114\n53.470013\t67.041287\t214\t15,78\n78.884401\t99.468331\t215\t4,186\n0.000000\t1.397707\t216\t0,1,6,10,11,15,26,28,29,34,36,37,39,40,44,48,50,51,53,54,58,64,68,80,83,86,93,99,102,106,108,112,123,124,125,127,130,132,148,154,165,167,174,186,199,200,205,208,210,213\n1.397707\t3.475795\t216\t0,1,6,10,11,15,26,28,29,34,36,39,40,44,48,50,51,53,54,58,64,68,80,83,86,93,99,102,106,108,112,123,124,125,127,130,132,148,154,165,167,174,186,199,200,205,208,210,213\n3.475795\t3.568615\t216\t0,1,6,10,11,15,16,26,28,29,34,36,39,40,44,48,50,51,53,54,58,64,68,69,80,83,86,93,99,102,106,108,112,123,124,125,127,130,132,148,154,165,167,174,186,199,205,208,210,213\n3.568615\t4.419653\t216\t0,1,6,10,11,15,16,26,28,29,34,36,39,40,44,48,50,51,53,54,58,64,68,69,80,83,86,93,99,102,106,112,123,124,125,127,130,132,148,154,165,167,174,186,199,205,208,210,213\n4.419653\t4.851737\t216\t0,1,6,10,11,15,16,26,28,29,34,36,39,40,44,48,50,51,53,54,58,68,69,80,83,86,93,99,102,106,112,123,124,125,127,130,132,148,154,165,167,174,186,199,205,208,210,213\n4.851737\t6.538844\t216\t0,1,6,10,11,15,16,26,28,29,34,36,39,40,44,48,50,51,53,54,58,68,69,80,83,86,93,99,102,103,106,112,123,124,125,127,130,132,148,154,165,167,169,174,186,199,205,208,210\n6.538844\t7.108776\t216\t0,1,4,6,10,11,15,16,26,28,29,34,36,39,40,41,44,48,50,51,53,54,58,68,69,80,83,86,93,99,102,103,106,112,123,124,125,130,132,148,154,165,167,169,174,186,199,205,208,210\n7.108776\t11.300701\t216\t0,1,4,6,10,11,15,16,26,28,29,34,36,39,40,41,44,48,50,51,53,54,58,68,69,80,83,85,86,93,99,102,103,106,112,123,124,125,132,148,154,165,167,169,174,186,199,205,208,210\n11.300701\t11.309867\t216\t0,1,4,6,10,11,15,16,26,28,29,34,36,39,40,41,44,48,50,51,52,53,54,58,68,69,80,83,85,86,93,99,102,103,106,112,123,124,125,132,148,154,165,167,169,174,186,199,205,208,210\n11.309867\t13.905651\t216\t0,1,4,6,10,11,15,16,26,28,29,34,36,39,40,41,44,48,50,51,52,53,54,58,68,69,80,83,85,86,93,99,102,103,106,123,124,125,132,148,154,165,167,169,174,186,199,205,208,210\n13.905651\t14.560625\t216\t0,1,4,6,10,11,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,68,80,83,85,86,93,99,102,103,106,123,124,125,132,148,154,165,167,169,170,174,186,199,205,208,210\n14.560625\t15.750456\t216\t0,1,4,6,10,11,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,68,80,83,85,86,93,99,103,106,123,124,125,132,148,154,165,167,169,170,174,186,199,205,208,210\n15.750456\t16.128261\t216\t0,1,4,6,10,11,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,68,80,83,85,86,93,99,102,103,106,123,124,125,132,148,154,165,167,169,170,174,186,199,205,208,210\n16.128261\t16.654066\t216\t0,1,4,6,10,11,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,55,58,68,80,83,85,93,99,102,103,106,123,124,125,132,151,154,165,167,169,170,174,186,199,205,208,210\n16.654066\t18.000399\t216\t0,1,4,6,10,11,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,80,83,85,93,99,102,103,106,123,124,125,132,151,154,165,167,169,170,174,186,196,199,205,208,210\n18.000399\t19.401604\t216\t0,1,4,6,10,11,13,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,80,83,85,93,99,102,103,106,123,124,125,132,151,154,162,165,167,170,174,186,196,199,205,208,210\n19.401604\t19.497026\t216\t1,4,6,10,11,13,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,80,83,85,93,99,102,103,106,123,124,125,132,151,153,154,159,162,167,170,174,186,196,199,205,208,210\n19.497026\t20.449369\t216\t1,4,6,10,11,13,15,16,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,73,80,83,85,93,99,102,103,106,123,124,125,132,138,151,153,154,159,162,167,170,174,186,196,205,208,210\n20.449369\t20.575506\t216\t1,4,6,10,11,13,15,16,24,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,73,80,83,85,93,99,102,103,106,123,124,125,132,138,151,153,154,159,162,167,170,174,186,196,205,208,210\n20.575506\t20.721380\t216\t1,4,6,10,11,13,15,16,24,26,28,29,36,39,40,41,44,48,50,51,52,53,54,58,73,80,83,85,93,94,99,102,103,106,123,124,125,132,138,151,153,154,159,162,167,170,174,186,196,205,208,210\n20.721380\t20.956228\t216\t1,4,6,10,11,13,15,16,24,26,28,29,36,39,40,41,48,50,51,52,53,54,58,69,73,80,83,85,93,94,99,102,103,106,123,124,125,132,138,151,153,154,159,162,167,174,186,196,198,205,208,210\n20.956228\t21.440137\t216\t1,4,6,10,11,13,15,16,24,26,28,29,36,39,40,41,48,50,51,52,53,54,58,69,73,82,83,85,93,94,99,102,103,106,123,124,125,132,138,153,154,159,162,166,167,174,186,196,198,205,208,210\n21.440137\t22.327425\t216\t1,6,10,11,13,15,16,24,26,28,29,32,36,39,40,41,48,50,51,52,53,54,58,69,73,82,83,85,93,94,99,102,103,106,123,124,125,132,138,153,154,162,166,167,172,174,186,196,198,205,208,210\n22.327425\t22.902252\t216\t1,6,10,11,13,15,16,24,26,28,29,32,36,39,40,41,48,50,51,52,53,54,58,69,73,82,83,85,93,94,96,99,102,103,105,106,123,124,125,132,138,153,154,162,166,167,172,186,196,198,205,208,210\n22.902252\t23.051339\t216\t1,6,10,11,15,16,24,26,28,29,32,36,39,40,41,48,50,51,52,53,54,58,69,73,82,83,85,93,94,96,99,102,103,105,106,123,124,125,132,138,153,154,162,166,167,172,184,186,196,198,205,208,210\n23.051339\t23.120737\t216\t1,10,11,15,16,24,26,28,29,32,36,39,40,41,48,50,51,52,53,54,58,69,73,82,83,85,93,94,96,99,102,103,106,123,124,125,132,138,153,154,158,162,166,167,172,184,186,196,198,205,208,210\n23.120737\t23.444517\t216\t1,10,11,15,16,24,26,28,29,32,36,39,40,41,48,50,51,52,53,54,58,69,73,83,85,93,94,96,99,102,103,106,123,124,125,132,138,153,154,158,162,166,167,172,184,186,196,198,205,208,210\n23.444517\t23.725869\t216\t1,10,11,15,16,24,26,28,29,32,35,36,39,40,41,48,50,51,52,53,54,58,69,73,83,85,94,96,99,102,103,106,123,124,125,132,138,153,158,162,166,167,171,172,184,186,196,198,205,208,210\n23.725869\t24.710938\t216\t1,10,11,15,16,24,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,54,58,69,73,83,85,94,96,99,102,103,106,123,124,125,132,138,153,158,162,166,167,171,172,184,186,189,196,198,208,210\n24.710938\t25.328103\t216\t1,10,11,15,16,24,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,54,58,69,73,83,84,85,94,96,99,102,103,106,123,124,125,132,138,153,158,162,166,167,171,172,184,186,189,196,198,208\n25.328103\t25.474716\t216\t1,10,11,15,16,24,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,54,58,69,73,83,84,85,94,96,99,102,103,106,123,124,125,132,153,158,162,166,167,171,172,184,186,189,196,198,208\n25.474716\t25.584357\t216\t1,10,11,15,16,24,26,27,28,29,32,35,36,39,40,41,48,50,52,53,54,58,69,73,83,84,85,94,99,102,103,106,123,124,125,132,146,153,158,162,166,167,171,172,184,186,189,196,198,208\n25.584357\t26.281985\t216\t1,10,11,15,16,24,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,54,58,69,73,83,84,85,94,96,99,102,103,106,123,124,125,132,153,158,162,166,167,171,172,184,186,189,196,198,208\n26.281985\t27.039936\t216\t1,10,11,15,16,24,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,58,69,73,83,84,85,94,96,99,102,103,106,123,125,132,153,158,162,166,167,171,172,175,184,186,189,196,198,208\n27.039936\t27.114357\t216\t1,10,11,15,16,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,58,69,73,83,84,85,94,96,99,102,103,106,123,125,132,153,158,162,166,167,171,172,175,184,186,189,196,198,208\n27.114357\t27.368300\t216\t10,11,15,16,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,58,69,73,83,84,85,94,96,99,102,103,106,123,125,132,153,158,162,166,167,171,172,175,184,186,189,196,198,208\n27.368300\t27.631858\t216\t10,11,15,16,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,58,69,73,83,84,85,96,99,102,103,106,121,123,125,132,153,158,162,166,167,171,172,175,184,186,189,196,198,208\n27.631858\t30.422602\t216\t10,11,15,16,22,26,27,28,29,32,35,36,39,40,41,48,50,51,52,53,58,69,73,83,84,85,96,99,102,103,106,121,123,125,132,153,158,162,166,167,171,172,175,184,186,189,196,198,208\n30.422602\t30.600251\t216\t10,11,15,16,22,25,26,27,28,29,32,35,36,39,41,48,50,51,52,53,58,69,73,83,84,85,96,99,102,103,106,121,123,125,126,132,153,158,162,166,167,171,172,175,184,186,196,198,208\n30.600251\t30.811847\t216\t0,10,11,15,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,58,69,73,83,84,85,96,99,102,103,106,121,123,125,126,132,153,158,162,166,167,171,175,178,184,186,196,198,208\n30.811847\t31.693641\t216\t0,11,15,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,58,69,73,83,84,85,96,99,102,103,106,121,123,125,126,132,137,153,158,166,167,171,175,178,184,186,196,198,201,208\n31.693641\t32.070756\t216\t0,11,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,58,69,73,83,84,85,96,99,102,103,106,121,123,125,126,132,137,153,158,166,167,171,175,178,184,186,193,194,198,201,208\n32.070756\t32.637756\t216\t0,11,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,58,69,73,83,84,96,99,102,103,106,121,123,125,126,132,137,153,158,167,171,175,176,178,184,186,193,194,198,201,208\n32.637756\t33.673379\t216\t0,11,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,58,69,73,83,84,96,99,102,103,106,121,123,124,125,126,132,137,153,158,167,171,176,178,184,186,193,194,198,201,208\n33.673379\t34.067864\t216\t0,11,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,54,58,69,73,83,84,96,99,102,103,106,121,123,124,125,126,132,137,153,158,167,171,176,178,184,186,193,194,198,201,208\n34.067864\t34.233492\t216\t0,11,16,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,54,58,69,73,83,84,96,99,102,103,106,121,123,124,125,126,132,153,158,167,171,176,178,185,186,193,194,198,201,208\n34.233492\t34.240481\t216\t0,11,22,25,26,27,28,29,32,35,36,39,41,48,51,52,53,54,58,69,73,83,84,96,99,103,106,121,123,124,125,126,132,153,158,167,171,176,178,185,186,193,194,198,201,208,211\n34.240481\t35.877983\t216\t0,11,22,25,26,27,28,32,35,36,39,41,48,51,52,53,54,58,69,73,83,84,96,99,103,106,121,123,124,125,126,132,153,158,167,171,176,178,185,186,193,194,198,201,208,211\n35.877983\t36.134011\t216\t0,11,16,22,25,26,27,28,32,35,36,39,41,48,51,52,53,54,58,69,73,83,84,96,99,102,103,106,121,123,124,125,126,132,153,158,167,171,176,178,185,186,193,194,198,201,208\n36.134011\t36.777478\t216\t0,11,16,22,25,26,27,28,32,35,36,39,41,48,51,52,53,54,58,69,73,83,84,96,99,102,103,106,121,123,124,125,132,153,158,167,171,176,178,185,186,193,194,198,201,208\n36.777478\t37.043091\t216\t0,10,11,16,22,25,26,27,28,32,35,36,37,39,41,48,51,52,53,54,58,69,73,83,84,96,99,102,103,106,121,123,124,125,132,153,158,167,171,176,178,185,186,193,194,198,208\n37.043091\t37.135096\t216\t0,10,11,16,22,25,26,27,28,32,35,36,37,39,41,48,51,52,53,54,58,69,73,78,83,84,96,99,102,103,106,121,123,124,125,132,153,158,171,176,178,185,186,193,194,198,208\n37.135096\t38.269459\t216\t0,10,11,16,22,25,26,27,28,32,35,36,37,39,41,48,51,52,53,54,58,69,73,78,83,84,96,99,102,106,121,123,124,125,153,158,171,176,177,178,185,186,193,194,198,208\n38.269459\t38.778292\t216\t0,6,10,11,16,22,25,26,27,28,32,35,36,37,39,41,48,51,52,53,54,58,69,73,78,83,84,96,99,102,105,106,121,123,124,125,153,171,176,177,178,185,186,193,194,198,208\n38.778292\t39.199376\t216\t0,6,10,11,16,22,25,26,27,28,32,35,36,37,39,41,48,51,52,53,54,58,68,69,73,83,84,96,99,102,105,106,121,123,124,125,153,171,176,177,178,185,186,193,198,208,214\n39.199376\t39.263142\t216\t0,6,10,11,16,22,25,26,27,28,32,35,36,37,39,41,48,51,52,53,54,68,69,73,83,84,96,99,102,105,106,121,123,124,125,153,171,176,177,178,185,186,193,198,208,214\n39.263142\t39.656727\t216\t0,6,10,11,16,22,25,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,83,84,96,99,102,105,106,121,123,124,125,153,171,176,177,178,185,186,193,198,208,209,214\n39.656727\t41.362197\t216\t0,4,6,10,11,16,22,25,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,83,84,96,99,102,105,106,121,123,124,153,171,176,177,181,185,186,193,198,208,209,214\n41.362197\t41.913156\t216\t0,4,6,10,11,16,22,25,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,83,84,96,99,102,105,106,121,123,124,153,176,177,181,185,186,193,198,208,209,214\n41.913156\t42.751756\t216\t0,4,6,10,11,16,22,25,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,84,96,99,102,105,106,121,123,124,153,176,177,181,185,186,193,198,208,209,214\n42.751756\t43.757852\t216\t0,4,6,10,11,16,22,25,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,84,96,99,102,105,106,123,124,153,176,177,181,185,186,193,198,208,209,214\n43.757852\t44.698548\t216\t0,4,6,10,11,16,22,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,82,84,96,99,102,105,106,123,124,153,176,177,181,185,186,189,198,208,209,214\n44.698548\t45.405170\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,68,69,73,82,84,96,99,102,105,106,123,124,153,176,177,181,185,186,189,198,208,209,214\n45.405170\t45.914485\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,68,69,73,82,84,96,99,102,105,106,123,124,153,176,177,181,185,186,189,198,208,209,214\n45.914485\t46.386608\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,68,69,73,82,84,96,102,105,106,123,124,153,168,176,177,181,185,186,189,198,208,209,214\n46.386608\t46.492800\t216\t0,4,6,9,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,68,69,73,82,84,96,102,105,106,124,153,168,176,177,181,185,186,189,198,208,209,214\n46.492800\t46.891849\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,68,69,73,82,84,96,102,105,106,124,153,168,176,177,181,185,186,189,198,208,209,214\n46.891849\t47.640188\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,68,69,70,73,82,84,96,102,105,106,124,153,168,176,177,181,185,186,189,198,207,209,214\n47.640188\t49.108935\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,68,69,70,73,77,82,84,96,102,105,106,124,153,168,176,177,181,185,186,189,198,207,209,214\n49.108935\t50.484324\t216\t0,4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,58,59,68,69,70,73,77,82,84,96,102,105,106,124,153,168,176,177,181,185,186,189,198,209,214\n50.484324\t50.640219\t216\t4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,53,55,58,59,68,69,70,73,77,82,84,96,102,105,106,124,153,155,168,176,177,181,185,186,189,198,209,214\n50.640219\t51.344527\t216\t4,6,10,16,22,26,27,32,35,36,37,39,41,48,51,52,55,58,68,69,70,73,77,82,84,96,102,105,106,124,153,155,164,168,176,177,181,185,186,189,198,209,214\n51.344527\t51.451422\t216\t4,6,10,16,17,22,26,27,32,35,36,37,39,41,48,51,52,55,58,68,69,70,73,77,82,84,96,102,105,106,124,153,155,164,168,176,177,181,185,186,189,198,209,214\n51.451422\t51.690836\t216\t4,6,10,16,17,22,26,27,32,35,36,37,39,41,48,51,55,58,68,69,70,73,77,82,84,96,102,105,106,124,128,153,155,164,168,176,177,181,185,186,189,198,209,214\n51.690836\t51.803381\t216\t4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,73,77,82,84,96,102,105,106,124,128,153,155,164,168,176,177,181,184,186,189,198,209,214\n51.803381\t52.173268\t216\t4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,73,77,82,96,102,105,106,120,124,128,153,155,164,168,177,181,184,186,189,198,202,209,214\n52.173268\t54.284561\t216\t0,4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,73,77,82,96,102,105,106,120,124,128,153,164,168,177,181,184,186,189,198,202,209,214\n54.284561\t54.414499\t216\t0,4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,73,77,82,96,102,105,106,124,128,153,164,166,168,177,181,184,186,189,198,202,209,214\n54.414499\t54.775535\t216\t0,4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,71,73,77,82,96,102,105,106,124,128,153,164,166,168,177,181,184,186,189,198,202,209,214\n54.775535\t55.451624\t216\t0,4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,71,73,77,82,96,102,105,106,124,128,145,153,164,166,177,181,184,186,189,198,202,209,214\n55.451624\t57.108058\t216\t0,4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,71,73,77,82,96,102,106,124,128,145,153,164,166,177,181,184,186,189,198,202,209,214\n57.108058\t58.916166\t216\t0,4,6,10,16,17,22,26,27,30,32,35,36,37,39,41,48,51,55,58,68,69,70,71,73,77,82,96,102,106,124,128,145,166,177,181,184,186,189,192,198,202,209,214\n58.916166\t60.658379\t216\t0,4,6,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,51,55,58,68,69,70,71,73,77,82,96,102,106,124,128,145,166,177,181,184,186,189,192,198,202,209,214\n60.658379\t60.905848\t216\t0,4,6,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,68,69,70,71,73,77,82,96,102,106,124,128,145,166,177,184,186,189,192,198,202,209,214\n60.905848\t61.095745\t216\t0,4,6,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,68,69,70,71,73,77,82,93,96,102,106,124,128,145,166,177,184,186,189,192,198,202,209,214\n61.095745\t61.719977\t216\t0,4,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,68,69,70,71,73,77,82,93,96,102,106,124,128,145,166,184,186,189,192,198,202,206,209,214\n61.719977\t62.308779\t216\t0,4,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,68,69,70,71,73,77,82,93,96,99,102,106,124,128,166,184,186,189,192,198,202,206,209,214\n62.308779\t62.577097\t216\t4,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,68,69,70,71,73,77,82,93,96,99,102,106,124,128,166,184,186,189,192,198,206,209,212,214\n62.577097\t63.126608\t216\t4,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,68,69,70,71,73,77,82,96,102,106,124,128,129,166,184,186,189,192,198,206,209,212,214\n63.126608\t65.841615\t216\t0,4,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,68,69,70,71,73,77,82,96,102,106,124,128,129,166,184,186,189,192,198,202,206,209,214\n65.841615\t67.041287\t216\t0,4,10,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,69,70,71,73,77,82,96,102,106,124,128,129,166,184,186,189,192,198,202,206,209,214\n67.041287\t69.373708\t216\t0,4,10,15,16,17,22,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,69,70,71,73,77,82,96,102,106,124,128,129,166,184,186,189,192,198,202,206,209\n69.373708\t71.852095\t216\t0,4,8,10,15,16,17,22,25,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,69,70,71,73,77,82,96,102,106,124,128,129,166,184,186,192,198,202,206,209\n71.852095\t72.191272\t216\t0,4,8,10,15,16,17,22,25,26,27,29,30,32,35,36,37,39,41,48,50,51,55,58,64,69,70,71,73,77,82,96,102,106,128,129,166,184,186,192,198,202,206,209\n72.191272\t73.218934\t216\t0,4,8,10,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,55,58,64,69,70,71,73,77,82,96,102,106,128,129,166,184,186,192,198,202,206\n73.218934\t73.914710\t216\t0,4,8,10,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,73,77,82,96,102,106,128,129,166,184,186,192,198,202,206\n73.914710\t74.244135\t216\t0,4,8,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,73,77,82,96,102,106,128,129,166,184,186,192,198,202,203,206\n74.244135\t75.398652\t216\t0,4,8,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,72,73,77,82,96,102,106,128,129,166,184,186,192,198,202,203,206\n75.398652\t77.115170\t216\t0,4,8,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,72,73,77,82,102,106,128,129,166,184,186,192,198,202,203,206\n77.115170\t78.645366\t216\t0,4,8,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,72,73,77,82,102,106,128,129,141,166,184,186,192,198,202,203,206\n78.645366\t78.884401\t216\t0,4,8,10,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,72,73,77,82,102,106,128,129,132,141,166,184,186,192,198,202,206\n78.884401\t79.005768\t216\t0,8,10,15,16,17,22,25,26,27,28,29,30,32,35,36,37,39,41,48,50,51,54,55,58,64,69,70,71,72,73,77,82,102,106,128,129,132,141,166,184,192,198,202,206,215\n79.005768\t79.777617\t216\t0,8,10,15,16,17,22,25,26,27,28,29,30,32,34,35,36,37,39,41,44,48,50,51,54,55,58,64,69,70,71,72,73,77,82,102,106,128,129,132,141,166,184,192,202,206,215\n79.777617\t79.893379\t216\t0,8,10,15,16,17,22,25,26,27,28,29,30,32,34,35,36,37,39,41,44,48,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,192,202,206,215\n79.893379\t80.236310\t216\t0,8,10,15,16,17,22,25,26,27,28,29,30,32,34,35,36,37,39,41,44,48,50,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,192,202,206,215\n80.236310\t81.076006\t216\t0,8,10,15,16,17,22,25,26,27,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,192,202,206,215\n81.076006\t81.203782\t216\t0,8,10,15,16,17,22,25,26,27,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,195,202,206,215\n81.203782\t81.752010\t216\t0,8,10,15,16,17,22,25,26,27,28,29,32,34,35,36,37,39,44,48,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,187,195,202,206,215\n81.752010\t82.034281\t216\t0,8,10,15,16,17,22,25,28,29,32,34,35,36,37,39,44,48,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,187,195,197,202,206,215\n82.034281\t83.122541\t216\t0,8,10,15,16,17,22,25,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,195,197,202,206,215\n83.122541\t83.394074\t216\t0,8,10,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,195,197,202,206,215\n83.394074\t83.534148\t216\t0,8,10,14,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,65,69,70,71,73,77,82,102,106,128,129,132,141,166,180,184,195,197,202,206,215\n83.534148\t83.693827\t216\t0,8,10,14,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,140,141,159,166,180,195,197,202,206,215\n83.693827\t83.777115\t216\t0,8,10,14,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,133,140,141,159,166,195,197,202,206,215\n83.777115\t84.317521\t216\t0,5,8,10,14,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,49,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,133,141,159,166,195,197,202,206,215\n84.317521\t85.706099\t216\t0,5,8,10,14,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,49,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,133,141,150,159,166,195,197,202,206,215\n85.706099\t85.951325\t216\t0,5,10,14,15,16,17,22,28,29,30,32,34,35,36,37,39,44,48,49,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,133,141,150,159,166,195,197,202,204,206,215\n85.951325\t86.401076\t216\t0,5,10,14,15,16,17,22,28,29,30,32,34,35,36,37,44,48,49,50,51,54,55,58,64,69,70,71,73,77,82,102,106,128,129,132,133,141,150,159,166,195,197,202,204,206,215\n86.401076\t86.643371\t216\t0,5,10,14,15,16,17,22,28,29,30,32,34,35,36,37,44,48,49,50,51,54,55,58,64,69,70,71,73,77,102,106,128,129,132,133,141,150,159,166,195,197,202,204,206,215\n86.643371\t87.032912\t216\t0,5,10,14,15,16,17,22,28,29,30,32,34,35,36,37,44,48,49,50,55,58,64,69,70,71,73,77,102,106,116,128,129,132,133,141,150,159,166,195,197,202,204,206,215\n87.032912\t88.701340\t216\t0,5,10,14,15,16,17,22,28,29,30,32,34,35,36,37,44,48,49,50,55,58,64,69,70,73,77,102,106,116,128,129,132,133,141,150,159,166,195,197,202,204,206,215\n88.701340\t88.947794\t216\t0,5,10,14,15,16,17,22,28,29,30,32,34,35,36,37,44,48,50,55,58,64,69,70,73,77,102,116,128,129,132,133,141,147,150,159,166,195,197,202,204,206,215\n88.947794\t89.826128\t216\t0,5,10,14,15,16,17,22,29,30,32,34,35,36,37,44,48,50,55,58,64,69,70,73,102,116,128,129,132,133,141,147,150,159,166,190,195,197,202,204,206,215\n89.826128\t90.247778\t216\t0,5,10,14,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,58,64,69,70,73,102,116,128,129,132,133,141,147,150,159,166,190,195,197,202,204,206,215\n90.247778\t92.349588\t216\t0,5,10,14,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,58,64,69,70,73,93,99,102,116,128,132,133,141,147,150,159,166,190,195,197,202,204,206,215\n92.349588\t92.357754\t216\t0,5,10,14,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,58,64,69,70,73,93,99,102,113,116,128,132,133,141,147,150,159,166,190,195,197,202,204,206,215\n92.357754\t92.895855\t216\t0,5,10,14,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,64,69,70,73,93,99,102,113,116,128,132,133,141,147,150,159,166,190,195,197,202,204,206,215\n92.895855\t93.565018\t216\t0,5,10,14,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,69,70,73,93,99,102,113,116,128,132,133,141,147,150,159,166,190,195,197,202,204,206,215\n93.565018\t93.764750\t216\t0,5,10,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,69,70,73,93,99,102,113,116,128,132,133,141,147,150,159,166,188,190,195,197,202,204,206,215\n93.764750\t93.836844\t216\t0,5,10,15,16,17,22,25,29,30,32,34,35,36,37,44,48,50,55,66,69,70,73,93,99,102,113,116,128,132,133,141,147,150,159,166,188,190,195,197,202,204,206,215\n93.836844\t94.444668\t216\t0,5,10,15,16,17,22,25,29,32,34,35,37,44,48,50,55,66,69,70,73,93,99,102,113,116,128,132,133,141,147,150,159,166,187,188,190,195,197,202,204,206,215\n94.444668\t94.869872\t216\t0,5,10,15,16,17,22,25,29,32,34,35,37,44,48,50,55,66,69,70,73,84,93,99,100,102,113,116,128,132,133,141,147,150,159,166,187,188,190,195,197,204,206,215\n94.869872\t94.899812\t216\t0,5,10,15,16,17,22,25,29,32,34,35,37,44,48,50,55,66,69,70,73,84,93,99,100,113,116,128,132,133,141,147,150,159,166,187,188,190,195,197,204,206,215\n94.899812\t95.255452\t216\t0,5,10,14,15,16,17,22,25,29,32,34,35,37,44,48,50,55,66,69,70,73,84,90,93,99,100,113,116,128,132,133,141,147,150,159,166,187,190,195,197,204,206,215\n95.255452\t95.899004\t216\t0,5,10,14,15,16,17,22,25,29,32,34,35,37,44,48,50,54,55,66,69,70,73,84,90,93,99,100,113,128,132,133,141,147,150,159,166,187,190,195,197,204,206,215\n95.899004\t99.132998\t216\t0,5,10,14,15,16,17,22,25,26,27,29,32,34,35,37,44,48,50,54,55,66,69,70,73,84,90,93,99,100,113,128,132,133,141,147,150,159,166,187,190,195,204,206,215\n99.132998\t99.468331\t216\t0,5,10,14,15,16,17,22,25,26,29,32,34,35,37,44,48,50,54,55,66,69,70,73,84,90,93,99,100,113,128,132,133,141,147,150,159,166,187,190,195,204,206,215\n99.468331\t99.753960\t216\t0,4,5,10,14,15,16,17,22,25,26,29,32,34,35,37,44,48,50,54,55,66,69,70,73,84,90,93,99,100,113,128,132,133,141,147,150,159,166,186,187,190,195,204,206\n99.753960\t100.000000\t216\t0,4,5,10,14,15,16,17,22,25,26,29,32,34,35,37,44,48,50,54,55,66,69,70,73,84,90,99,100,113,128,132,133,141,147,150,159,166,182,186,187,190,195,204,206\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/02-mutations.txt",
    "content": "site\tnode\tderived_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/02-nodes.txt",
    "content": "is_sample\ttime\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n1\t0.000000\n0\t0.000194\n0\t0.000317\n0\t0.000403\n0\t0.000539\n0\t0.001031\n0\t0.001435\n0\t0.001762\n0\t0.001774\n0\t0.001809\n0\t0.002119\n0\t0.002788\n0\t0.002811\n0\t0.003626\n0\t0.003640\n0\t0.003920\n0\t0.003996\n0\t0.004180\n0\t0.004187\n0\t0.004326\n0\t0.004453\n0\t0.005014\n0\t0.005035\n0\t0.005512\n0\t0.005679\n0\t0.005842\n0\t0.006024\n0\t0.006182\n0\t0.006282\n0\t0.006540\n0\t0.006850\n0\t0.006989\n0\t0.007400\n0\t0.007440\n0\t0.007559\n0\t0.007880\n0\t0.008043\n0\t0.008337\n0\t0.008406\n0\t0.008968\n0\t0.009216\n0\t0.009236\n0\t0.009300\n0\t0.010000\n0\t0.010592\n0\t0.011448\n0\t0.011471\n0\t0.011991\n0\t0.012237\n0\t0.012290\n0\t0.012429\n0\t0.012484\n0\t0.013078\n0\t0.013189\n0\t0.014031\n0\t0.014208\n0\t0.014449\n0\t0.014731\n0\t0.015388\n0\t0.015556\n0\t0.015588\n0\t0.015727\n0\t0.015773\n0\t0.015945\n0\t0.016374\n0\t0.016542\n0\t0.016560\n0\t0.016713\n0\t0.017029\n0\t0.017180\n0\t0.017280\n0\t0.017546\n0\t0.017637\n0\t0.017806\n0\t0.017943\n0\t0.017983\n0\t0.018078\n0\t0.018319\n0\t0.018490\n0\t0.018598\n0\t0.018688\n0\t0.019008\n0\t0.019012\n0\t0.019112\n0\t0.019190\n0\t0.019191\n0\t0.019477\n0\t0.020000\n0\t0.020659\n0\t0.020952\n0\t0.021267\n0\t0.021289\n0\t0.021641\n0\t0.021823\n0\t0.022321\n0\t0.022553\n0\t0.022602\n0\t0.023120\n0\t0.023233\n0\t0.024210\n0\t0.024342\n0\t0.024893\n0\t0.024922\n0\t0.024934\n0\t0.025736\n0\t0.025806\n0\t0.025938\n0\t0.026345\n0\t0.026486\n0\t0.026561\n0\t0.026877\n0\t0.027657\n0\t0.028587\n0\t0.029557\n0\t0.029563\n0\t0.029588\n0\t0.029963\n0\t0.030000\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/02-sites.txt",
    "content": "position\tancestral_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/03-edges.txt",
    "content": "left\tright\tparent\tchild\n0.000000\t10000.000000\t50\t29,31\n0.000000\t10000.000000\t51\t11,15\n0.000000\t1554.123401\t52\t1,51\n1554.123401\t10000.000000\t52\t1\n0.000000\t1736.203571\t53\t52\n1736.203571\t10000.000000\t53\t51,52\n0.000000\t10000.000000\t54\t4,12,27,38,39,40\n0.000000\t10000.000000\t55\t17,25,45,48,49,50\n0.000000\t10000.000000\t56\t24,55\n0.000000\t1554.123401\t57\t56\n1554.123401\t1736.203571\t57\t51,56\n1736.203571\t10000.000000\t57\t56\n0.000000\t10000.000000\t58\t0,13,22,57\n0.000000\t10000.000000\t59\t2,3,5,6,7,8,9,10,14,16,18,19,20,21,23,26,28,30,32,33,34,35,36,37,41,42,43,44,46,47,53,54,58\n0.000000\t10000.000000\t60\t59\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/03-mutations.txt",
    "content": "site\tnode\tderived_state\n0\t52\t1\n1\t34\t1\n2\t57\t1\n2\t3\t1\n3\t58\t1\n4\t34\t1\n5\t56\t1\n6\t55\t1\n6\t1\t1\n7\t51\t1\n8\t43\t1\n9\t54\t1\n9\t0\t1\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/03-nodes.txt",
    "content": "is_sample\ttime\tpopulation\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n0\t50.000000\t-1\t\n0\t51.000000\t-1\t\n0\t52.000000\t-1\t\n0\t53.000000\t-1\t\n0\t54.000000\t-1\t\n0\t55.000000\t-1\t\n0\t56.000000\t-1\t\n0\t57.000000\t-1\t\n0\t58.000000\t-1\t\n0\t59.000000\t-1\t\n0\t60.000000\t-1\t\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/03-sites.txt",
    "content": "position\tancestral_state\n284.252209\t0\n1313.686815\t0\n1554.123401\t0\n1736.203571\t0\n3310.290546\t0\n4208.672558\t0\n4995.288904\t0\n5187.559857\t0\n5211.162157\t0\n5483.889413\t0\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/04-edges.txt",
    "content": "left\tright\tparent\tchild\n0.000000\t0.500000\t6\t0,1\n0.500000\t1.000000\t6\t4,5\n0.000000\t0.400000\t7\t2,3\n0.000000\t0.500000\t8\t4,5\n0.500000\t1.000000\t8\t0,1\n0.400000\t1.000000\t9\t2,3\n0.400000\t1.000000\t10\t8,9\n0.000000\t0.100000\t13\t6,14\n0.100000\t0.400000\t15\t7,14\n0.000000\t0.100000\t11\t7,13\n0.100000\t0.400000\t11\t6,15\n0.000000\t0.400000\t12\t8,11\n0.400000\t1.000000\t12\t6,10\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/04-mutations.txt",
    "content": "site\tnode\tderived_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/04-nodes.txt",
    "content": "is_sample\ttime\tpopulation\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n1\t0.000000\t-1\t\n0\t1.000000\t-1\t\n0\t1.000000\t-1\t\n0\t1.000000\t-1\t\n0\t1.000000\t-1\t\n0\t2.000000\t-1\t\n0\t3.000000\t-1\t\n0\t4.000000\t-1\t\n0\t2.000000\t-1\t\n0\t1.000000\t-1\t\n0\t2.000000\t-1\t\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/04-sites.txt",
    "content": "position\tancestral_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/05-edges.txt",
    "content": "left   right  parent  child\n0.0    0.8    5       9\n0.3    1.0    5       10\n0.0    1.0    6       8\n0.0    0.3    6       10\n0.0    0.9    7       11\n0.0    1.0    7       12\n0.8    1.0    7       9\n0.9    1.0    1       11\n0.4    1.0    1       6\n0.0    0.4    4       6\n0.0    1.0    4       7\n0.0    1.0    0       1,2,4,5\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/05-mutations.txt",
    "content": "site\tnode\tderived_state\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/05-nodes.txt",
    "content": "id    is_sample    population    time\n0     0    0        6.0\n1     0    0        2.0\n2     0    0        2.0\n3     0    0        2.0\n4     0    0        2.0\n5     0    0        1.0\n6     0    0        1.0\n7     0    0        1.0\n8     1    0        0.0\n9     1    0        0.0\n10    1    0        0.0\n11    1    0        0.0\n12    1    0        0.0\n"
  },
  {
    "path": "python/tests/data/simplify-bugs/05-sites.txt",
    "content": "position\tancestral_state\n"
  },
  {
    "path": "python/tests/ibd.py",
    "content": "# MIT License\n#\n# Copyright (c) 2020-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nPython implementation of the IBD-finding algorithms.\n\"\"\"\n\nimport argparse\nimport collections\n\nimport numpy as np\n\nimport tskit\n\n\nclass Segment:\n    \"\"\"\n    A class representing a single segment. Each segment has a left and right,\n    denoting the loci over which it spans, a node and a next, giving the next\n    in the chain.\n\n    The node it records is the *output* node ID.\n    \"\"\"\n\n    def __init__(self, left=None, right=None, node=None, next_seg=None):\n        self.left = left\n        self.right = right\n        self.node = node\n        self.next = next_seg\n\n    def __str__(self):\n        s = \"({}-{}->{}:next={})\".format(\n            self.left, self.right, self.node, repr(self.next)\n        )\n        return s\n\n    def __repr__(self):\n        return repr((self.left, self.right, self.node))\n\n    def __eq__(self, other):\n        # NOTE: to simplify tests, we DON'T check for equality of 'next'.\n        return (\n            self.left == other.left\n            and self.right == other.right\n            and self.node == other.node\n        )\n\n    def __lt__(self, other):\n        return (self.node, self.left, self.right) < (\n            other.node,\n            other.left,\n            other.right,\n        )\n\n\nclass SegmentList:\n    \"\"\"\n    A class representing a list of segments that are descended from a given ancestral\n    node via a particular child of the ancestor.\n    Each SegmentList keeps track of the first and last segment in the list, head and\n    tail.\n    \"\"\"\n\n    def __init__(self, head=None, tail=None):\n        self.head = head\n        self.tail = tail\n\n    def __str__(self):\n        return repr(self)\n\n    def __repr__(self):\n        tuple_segs = []\n        seg = self.head\n        while seg is not None:\n            tuple_segs.append((seg.left, seg.right, seg.node))\n            seg = seg.next\n        return repr(tuple_segs)\n\n    def extend(self, seglist):\n        \"\"\"\n        Extends this segment list with the segments in the specified list.\n        \"\"\"\n        assert isinstance(seglist, SegmentList)\n        if seglist.head is not None:\n            if self.head is None:\n                self.head = seglist.head\n                self.tail = seglist.tail\n            else:\n                self.tail.next = seglist.head\n                self.tail = seglist.tail\n\n    def append(self, segment):\n        \"\"\"\n        Append the specified segment to the end of this list.\n        \"\"\"\n        assert isinstance(segment, Segment)\n        if self.head is None:\n            self.head = segment\n            self.tail = segment\n        else:\n            self.tail.next = segment\n            self.tail = segment\n\n\nclass IbdResult:\n    \"\"\"\n    Class representing the IBD segments in a tree sequence for a given\n    set of sample pairs.\n    \"\"\"\n\n    def __init__(self):\n        self.segments = collections.defaultdict(list)\n\n    def __repr__(self):\n        return repr(self.segments)\n\n    def __str__(self):\n        return repr(self)\n\n    def add_segment_deprecated(self, a, b, seg):\n        # The original version of add_segment that doesn't sort or squash.\n        key = (a, b) if a < b else (b, a)\n        self.segments[key].append(tskit.IdentitySegment(seg.left, seg.right, seg.node))\n\n    def add_segment(self, a, b, seg):\n        key = (a, b) if a < b else (b, a)\n\n        # Get position and add into the correct position.\n        current_segs = self.segments[key]\n        num_segs = len(current_segs)\n\n        if num_segs == 0:\n            self.segments[key].append(\n                tskit.IdentitySegment(seg.left, seg.right, seg.node)\n            )\n        else:\n            # Find the position for the new segment.\n            i = 0\n            while (\n                i < num_segs\n                and current_segs[i].node <= seg.node\n                and current_segs[i].right <= seg.left\n            ):\n                i += 1\n\n            # Calculate boolean values that determine whether to squash\n            # and if so, where.\n            PUT_FIRST = False  # Insert segment at start of list.\n            PUT_LAST = False  # Insert segment at end of list.\n            SQUASH_LEFT = False  # Squash with the left segment.\n            SQUASH_RIGHT = False  # Squash with the right segment.\n\n            if i == 0:\n                PUT_FIRST = True\n            if i == num_segs:\n                PUT_LAST = True\n            if not PUT_FIRST:\n                if (\n                    current_segs[i - 1].node == seg.node\n                    and current_segs[i - 1].right == seg.left\n                ):\n                    SQUASH_LEFT = True\n            if not PUT_LAST:\n                if (\n                    seg.node == current_segs[i].node\n                    and seg.right == current_segs[i].left\n                ):\n                    SQUASH_RIGHT = True\n\n            # Insert the new segment and squash if needed.\n            if SQUASH_LEFT and not SQUASH_RIGHT:\n                current_segs[i - 1].right = seg.right\n            elif SQUASH_RIGHT and not SQUASH_LEFT:\n                current_segs[i].left = seg.left\n            elif SQUASH_LEFT and SQUASH_RIGHT:\n                # To squash twice, must pop one of the existing segments.\n                current_segs[i - 1].right = current_segs[i].right\n                current_segs.pop(i)\n            else:\n                self.segments[key].insert(\n                    i, tskit.IdentitySegment(seg.left, seg.right, seg.node)\n                )\n\n\nclass IbdFinder:\n    \"\"\"\n    Finds all IBD relationships between specified sample pairs in a tree sequence.\n    \"\"\"\n\n    def __init__(self, ts, *, within=None, between=None, min_span=0, max_time=None):\n        self.ts = ts\n        self.result = IbdResult()\n        if within is not None and between is not None:\n            raise ValueError(\"within and between are mutually exclusive\")\n\n        self.sample_set_id = np.zeros(ts.num_nodes, dtype=int) - 1\n        self.finding_between = False\n        if between is not None:\n            self.finding_between = True\n            for set_id, samples in enumerate(between):\n                self.sample_set_id[samples] = set_id\n        else:\n            if within is None:\n                within = ts.samples()\n            self.sample_set_id[within] = 0\n        self.min_span = min_span\n        self.max_time = np.inf if max_time is None else max_time\n        self.A = [SegmentList() for _ in range(ts.num_nodes)]  # Descendant segments\n        for u in range(ts.num_nodes):\n            if self.sample_set_id[u] != -1:\n                self.A[u].append(Segment(0, ts.sequence_length, u))\n        self.tables = self.ts.tables\n\n    def print_state(self):\n        print(\"IBD Finder\")\n        print(\"min_span = \", self.min_span)\n        print(\"max_time   = \", self.max_time)\n        print(\"finding_between = \", self.finding_between)\n        print(\"u\\tset_id\\tA = \")\n        for u, a in enumerate(self.A):\n            print(u, self.sample_set_id[u], a, sep=\"\\t\")\n\n    def run(self, squash=False):\n        node_times = self.tables.nodes.time\n        for e in self.ts.edges():\n            time = node_times[e.parent]\n            if time > self.max_time:\n                # Stop looking for IBD segments once the\n                # processed nodes are older than the max time.\n                break\n            child_segs = SegmentList()\n            s = self.A[e.child].head\n            while s is not None:\n                intvl = (\n                    max(e.left, s.left),\n                    min(e.right, s.right),\n                )\n                # if intvl[1] - intvl[0] > self.min_span:\n                child_segs.append(Segment(intvl[0], intvl[1], s.node))\n                s = s.next\n            self.record_ibd(e.parent, child_segs, squash=squash)\n            self.A[e.parent].extend(child_segs)\n        if self.min_span > 0:\n            self.filter_by_min_span()\n        return self.result.segments\n\n    def record_ibd(self, current_parent, child_segs, squash):\n        \"\"\"\n        Given the specified set of child segments for the current parent\n        record the IBD segments that will occur as a result of adding these\n        new segments into the existing list.\n        \"\"\"\n        # Note the implementation here is O(n^2) because we have to compare\n        # every segment with every other one. If the segments were stored in\n        # left-to-right sorted order, we could avoid and merge them more\n        # efficiently. There is some added complexity in doing this, however.\n        seg0 = self.A[current_parent].head\n        while seg0 is not None:\n            seg1 = child_segs.head\n            while seg1 is not None:\n                left = max(seg0.left, seg1.left)\n                right = min(seg0.right, seg1.right)\n                # If there are any overlapping segments, record as a new\n                # IBD relationship.\n                if self.passes_filters(seg0.node, seg1.node, left, right):\n                    if squash:\n                        self.result.add_segment(\n                            seg0.node,\n                            seg1.node,\n                            Segment(left, right, current_parent),\n                        )\n                    else:\n                        self.result.add_segment_deprecated(\n                            seg0.node,\n                            seg1.node,\n                            Segment(left, right, current_parent),\n                        )\n                seg1 = seg1.next\n            seg0 = seg0.next\n\n    def filter_by_min_span(self):\n        \"\"\"\n        Remove any IBD segments that are smaller than min_span.\n        Note that we can't do this until we have squashed the IBD segments\n        \"\"\"\n        keys_to_pop = []\n        for key in self.result.segments.keys():\n            self.result.segments[key] = [\n                s for s in self.result.segments[key] if s.right - s.left > self.min_span\n            ]\n            if len(self.result.segments[key]) == 0:\n                keys_to_pop.append(key)\n\n        # Remove any keys that now have no IBD segments.\n        for key in keys_to_pop:\n            self.result.segments.pop(key)\n\n    def passes_filters(self, a, b, left, right):\n        if a == b:\n            return False\n        if right - left <= 0:\n            return False\n        if self.finding_between:\n            return self.sample_set_id[a] != self.sample_set_id[b]\n        else:\n            return True\n\n\nif __name__ == \"__main__\":\n    \"\"\"\n    A simple CLI for running IBDFinder on a command line from the `python`\n    subdirectory. Basic usage:\n    > python3 ./tests/ibd.py --infile test.trees\n    \"\"\"\n\n    parser = argparse.ArgumentParser(\n        description=\"Command line interface for the IBDFinder.\"\n    )\n\n    parser.add_argument(\n        \"--infile\",\n        type=str,\n        dest=\"infile\",\n        nargs=1,\n        metavar=\"IN_FILE\",\n        help=\"The tree sequence to be analysed.\",\n    )\n\n    parser.add_argument(\n        \"--min-length\",\n        type=float,\n        dest=\"min_span\",\n        nargs=1,\n        metavar=\"MIN_LENGTH\",\n        help=\"Only segments longer than this cutoff will be returned.\",\n    )\n\n    parser.add_argument(\n        \"--max-time\",\n        type=float,\n        dest=\"max_time\",\n        nargs=1,\n        metavar=\"MAX_TIME\",\n        help=\"Only segments younger this time will be returned.\",\n    )\n\n    parser.add_argument(\n        \"--samples\",\n        type=int,\n        dest=\"samples\",\n        nargs=2,\n        metavar=\"SAMPLES\",\n        help=\"If provided, only this pair's IBD info is returned.\",\n    )\n\n    args = parser.parse_args()\n    ts = tskit.load(args.infile[0])\n    if args.min_span is None:\n        min_span = 0\n    else:\n        min_span = args.min_span[0]\n    if args.max_time is None:\n        max_time = None\n    else:\n        max_time = args.max_time[0]\n\n    s = IbdFinder(ts, min_span=min_span, max_time=max_time)\n    all_segs = s.run()\n\n    if args.samples is None:\n        print(all_segs)\n    else:\n        samples = args.samples\n        print(all_segs[(samples[0], samples[1])])\n"
  },
  {
    "path": "python/tests/simplify.py",
    "content": "# MIT License\n#\n# Copyright (c) 2019-2024 Tskit Developers\n# Copyright (c) 2015-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nPython implementation of the simplify algorithm.\n\"\"\"\n\nimport sys\n\nimport numpy as np\nimport portion\n\nimport tskit\n\n\ndef overlapping_segments(segments):\n    \"\"\"\n    Returns an iterator over the (left, right, X) tuples describing the\n    distinct overlapping segments in the specified set.\n    \"\"\"\n    S = sorted(segments, key=lambda x: x.left)\n    n = len(S)\n    # Insert a sentinel at the end for convenience.\n    S.append(Segment(sys.float_info.max, 0))\n    right = S[0].left\n    X = []\n    j = 0\n    while j < n:\n        # Remove any elements of X with right <= left\n        left = right\n        X = [x for x in X if x.right > left]\n        if len(X) == 0:\n            left = S[j].left\n        while j < n and S[j].left == left:\n            X.append(S[j])\n            j += 1\n        j -= 1\n        right = min(x.right for x in X)\n        right = min(right, S[j + 1].left)\n        yield left, right, X\n        j += 1\n\n    while len(X) > 0:\n        left = right\n        X = [x for x in X if x.right > left]\n        if len(X) > 0:\n            right = min(x.right for x in X)\n            yield left, right, X\n\n\nclass Segment:\n    \"\"\"\n    A class representing a single segment. Each segment has a left and right,\n    denoting the loci over which it spans, a node and a next, giving the next\n    in the chain.\n\n    The node it records is the *output* node ID.\n    \"\"\"\n\n    def __init__(self, left=None, right=None, node=None, next_segment=None):\n        self.left = left\n        self.right = right\n        self.node = node\n        self.next = next_segment\n\n    def __str__(self):\n        s = \"({}-{}->{}:next={})\".format(\n            self.left, self.right, self.node, repr(self.next)\n        )\n        return s\n\n    def __repr__(self):\n        return repr((self.left, self.right, self.node))\n\n    def __lt__(self, other):\n        return (self.left, self.right, self.node) < (other.left, other.right, self.node)\n\n\nclass Simplifier:\n    \"\"\"\n    Simplifies a tree sequence to its minimal representation given a subset\n    of the leaves.\n    \"\"\"\n\n    def __init__(\n        self,\n        ts,\n        sample,\n        reduce_to_site_topology=False,\n        filter_sites=True,\n        filter_populations=True,\n        filter_individuals=True,\n        keep_unary=False,\n        keep_unary_in_individuals=False,\n        keep_input_roots=False,\n        filter_nodes=True,\n        update_sample_flags=True,\n    ):\n        self.ts = ts\n        self.n = len(sample)\n        self.reduce_to_site_topology = reduce_to_site_topology\n        self.sequence_length = ts.sequence_length\n        self.filter_sites = filter_sites\n        self.filter_populations = filter_populations\n        self.filter_individuals = filter_individuals\n        self.filter_nodes = filter_nodes\n        self.update_sample_flags = update_sample_flags\n        self.keep_unary = keep_unary\n        self.keep_unary_in_individuals = keep_unary_in_individuals\n        self.keep_input_roots = keep_input_roots\n        self.num_mutations = ts.num_mutations\n        self.input_sites = list(ts.sites())\n        self.A_head = [None for _ in range(ts.num_nodes)]\n        self.A_tail = [None for _ in range(ts.num_nodes)]\n        self.tables = self.ts.tables.copy()\n        self.tables.clear()\n        self.edge_buffer = {}\n        self.node_id_map = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n        self.is_sample = np.zeros(ts.num_nodes, dtype=np.int8)\n        self.mutation_node_map = [-1 for _ in range(self.num_mutations)]\n        self.samples = set(sample)\n        self.sort_offset = -1\n        # We keep a map of input nodes to mutations.\n        self.mutation_map = [[] for _ in range(ts.num_nodes)]\n        position = ts.sites_position\n        site = ts.mutations_site\n        node = ts.mutations_node\n        for mutation_id in range(ts.num_mutations):\n            site_position = position[site[mutation_id]]\n            self.mutation_map[node[mutation_id]].append((site_position, mutation_id))\n\n        for sample_id in sample:\n            self.is_sample[sample_id] = 1\n\n        if not self.filter_nodes:\n            # NOTE In the C implementation we would really just not touch the\n            # original tables.\n            self.tables.nodes.replace_with(self.ts.tables.nodes)\n            if self.update_sample_flags:\n                flags = self.tables.nodes.flags\n                # Zero out other sample flags\n                flags = np.bitwise_and(\n                    flags, np.uint32(~tskit.NODE_IS_SAMPLE & 0xFFFFFFFF)\n                )\n                flags[sample] |= tskit.NODE_IS_SAMPLE\n                self.tables.nodes.flags = flags.astype(np.uint32)\n\n            self.node_id_map[:] = np.arange(ts.num_nodes)\n            for sample_id in sample:\n                self.add_ancestry(sample_id, 0, self.sequence_length, sample_id)\n        else:\n            for sample_id in sample:\n                output_id = self.record_node(sample_id)\n                self.add_ancestry(sample_id, 0, self.sequence_length, output_id)\n\n        self.position_lookup = None\n        if self.reduce_to_site_topology:\n            self.position_lookup = np.hstack([[0], position, [self.sequence_length]])\n\n    def record_node(self, input_id):\n        \"\"\"\n        Adds a new node to the output table corresponding to the specified input\n        node ID.\n        \"\"\"\n        node = self.ts.node(input_id)\n        flags = node.flags\n        if self.update_sample_flags:\n            # Need to zero out the sample flag\n            flags &= ~tskit.NODE_IS_SAMPLE\n            if self.is_sample[input_id]:\n                flags |= tskit.NODE_IS_SAMPLE\n        output_id = self.tables.nodes.append(node.replace(flags=flags))\n        self.node_id_map[input_id] = output_id\n        return output_id\n\n    def rewind_node(self, input_id, output_id):\n        \"\"\"\n        Remove the mapping for the specified input and output node pair. This is\n        done because there are no edges referring to the node.\n        \"\"\"\n        assert output_id == len(self.tables.nodes) - 1\n        assert output_id == self.node_id_map[input_id]\n        self.tables.nodes.truncate(output_id)\n        self.node_id_map[input_id] = -1\n\n    def flush_edges(self):\n        \"\"\"\n        Flush the edges to the output table after sorting and squashing\n        any redundant records.\n        \"\"\"\n        num_edges = 0\n        for child in sorted(self.edge_buffer.keys()):\n            for edge in self.edge_buffer[child]:\n                self.tables.edges.append(edge)\n                num_edges += 1\n        self.edge_buffer.clear()\n        return num_edges\n\n    def record_edge(self, left, right, parent, child):\n        \"\"\"\n        Adds an edge to the output list.\n        \"\"\"\n        if self.reduce_to_site_topology:\n            X = self.position_lookup\n            left_index = np.searchsorted(X, left)\n            right_index = np.searchsorted(X, right)\n            # Find the smallest site position index greater than or equal to left\n            # and right, i.e., slide each endpoint of an interval to the right\n            # until they hit a site position. If both left and right map to the\n            # the same position then we discard this edge. We also discard an\n            # edge if left = 0 and right is less than the first site position.\n            if left_index == right_index or (left_index == 0 and right_index == 1):\n                return\n            # Remap back to zero if the left end maps to the first site.\n            if left_index == 1:\n                left_index = 0\n            left = X[left_index]\n            right = X[right_index]\n        if child not in self.edge_buffer:\n            self.edge_buffer[child] = [tskit.Edge(left, right, parent, child)]\n        else:\n            last = self.edge_buffer[child][-1]\n            if last.right == left:\n                last.right = right\n            else:\n                self.edge_buffer[child].append(tskit.Edge(left, right, parent, child))\n\n    def print_state(self):\n        print(\".................\")\n        print(\"Ancestors: \")\n        num_nodes = len(self.A_tail)\n        for j in range(num_nodes):\n            print(\"\\t\", j, \"->\", end=\"\")\n            x = self.A_head[j]\n            while x is not None:\n                print(f\"({x.left}-{x.right}->{x.node})\", end=\"\")\n                x = x.next\n            print()\n        print(\"Mutation map:\")\n        for u in range(len(self.mutation_map)):\n            v = self.mutation_map[u]\n            if len(v) > 0:\n                print(\"\\t\", u, \"->\", v)\n        print(\"Node ID map: (input->output)\")\n        for input_id, output_id in enumerate(self.node_id_map):\n            print(\"\\t\", input_id, \"->\", output_id)\n        print(\"Mutation node map\")\n        for j in range(self.num_mutations):\n            print(\"\\t\", j, \"->\", self.mutation_node_map[j])\n        print(\"Output:\")\n        print(self.tables)\n        self.check_state()\n\n    def map_mutations(self, left, right, input_id, output_id):\n        \"\"\"\n        Map any mutations for the input node ID on the\n        interval to its output ID.\n        \"\"\"\n        assert output_id != -1\n        # TODO we should probably remove these as they are used.\n        # Or else, binary search the list so it's quick.\n        for x, mutation_id in self.mutation_map[input_id]:\n            if left <= x < right:\n                self.mutation_node_map[mutation_id] = output_id\n\n    def add_ancestry(self, input_id, left, right, node):\n        tail = self.A_tail[input_id]\n        if tail is None:\n            x = Segment(left, right, node)\n            self.A_head[input_id] = x\n            self.A_tail[input_id] = x\n        else:\n            if tail.right == left and tail.node == node:\n                tail.right = right\n            else:\n                x = Segment(left, right, node)\n                tail.next = x\n                self.A_tail[input_id] = x\n\n        self.map_mutations(left, right, input_id, node)\n\n    def merge_labeled_ancestors(self, S, input_id):\n        \"\"\"\n        All ancestry segments in S come together into a new parent.\n        The new parent must be assigned and any overlapping segments coalesced.\n        \"\"\"\n        output_id = self.node_id_map[input_id]\n        is_sample = self.is_sample[input_id]\n        if is_sample:\n            # Free up the existing ancestry mapping.\n            x = self.A_tail[input_id]\n            assert x.left == 0 and x.right == self.sequence_length\n            self.A_tail[input_id] = None\n            self.A_head[input_id] = None\n\n        prev_right = 0\n        for left, right, X in overlapping_segments(S):\n            if len(X) == 1:\n                ancestry_node = X[0].node\n                if is_sample:\n                    self.record_edge(left, right, output_id, ancestry_node)\n                    ancestry_node = output_id\n                elif self.keep_unary or (\n                    self.keep_unary_in_individuals\n                    and self.ts.node(input_id).individual >= 0\n                ):\n                    if output_id == -1:\n                        output_id = self.record_node(input_id)\n                    self.record_edge(left, right, output_id, ancestry_node)\n            else:\n                if output_id == -1:\n                    output_id = self.record_node(input_id)\n                ancestry_node = output_id\n                for x in X:\n                    self.record_edge(left, right, output_id, x.node)\n            if is_sample and left != prev_right:\n                # Fill in any gaps in the ancestry for the sample\n                self.add_ancestry(input_id, prev_right, left, output_id)\n            if self.keep_unary or (\n                self.keep_unary_in_individuals and self.ts.node(input_id).individual >= 0\n            ):\n                ancestry_node = output_id\n            self.add_ancestry(input_id, left, right, ancestry_node)\n            prev_right = right\n\n        if is_sample and prev_right != self.sequence_length:\n            # If a trailing gap exists in the sample ancestry, fill it in.\n            self.add_ancestry(input_id, prev_right, self.sequence_length, output_id)\n        if output_id != -1:\n            num_edges = self.flush_edges()\n            if self.filter_nodes and num_edges == 0 and not is_sample:\n                self.rewind_node(input_id, output_id)\n\n    def extract_ancestry(self, edge):\n        S = []\n        x = self.A_head[edge.child]\n\n        x_head = None\n        x_prev = None\n        while x is not None:\n            if x.right > edge.left and edge.right > x.left:\n                y = Segment(max(x.left, edge.left), min(x.right, edge.right), x.node)\n                # print(\"snip\", y)\n                S.append(y)\n                assert x.left <= y.left\n                assert x.right >= y.right\n                seg_left = None\n                seg_right = None\n                if x.left != y.left:\n                    seg_left = Segment(x.left, y.left, x.node)\n                    if x_prev is None:\n                        x_head = seg_left\n                    else:\n                        x_prev.next = seg_left\n                    x_prev = seg_left\n                if x.right != y.right:\n                    x.left = y.right\n                    seg_right = x\n                else:\n                    # Free x\n                    seg_right = x.next\n                if x_prev is None:\n                    x_head = seg_right\n                else:\n                    x_prev.next = seg_right\n                x = seg_right\n            else:\n                if x_prev is None:\n                    x_head = x\n                x_prev = x\n                x = x.next\n        # Note - we had some code to defragment segments in the output\n        # chain here, but couldn't find an example where it needed to\n        # be called. So, looks like squashing isn't necessary here.\n        self.A_head[edge.child] = x_head\n        self.A_tail[edge.child] = x_prev\n        return S\n\n    def process_parent_edges(self, edges):\n        \"\"\"\n        Process all of the edges for a given parent.\n        \"\"\"\n        assert len({e.parent for e in edges}) == 1\n        parent = edges[0].parent\n        S = []\n        for edge in edges:\n            S.extend(self.extract_ancestry(edge))\n        self.merge_labeled_ancestors(S, parent)\n        self.check_state()\n\n    def finalise_sites(self):\n        # Build a map from the old mutation IDs to new IDs. Any mutation that\n        # has not been mapped to a node in the new tree sequence will be removed.\n        mutation_id_map = [-1 for _ in range(self.num_mutations)]\n        num_output_mutations = 0\n\n        for site in self.ts.sites():\n            num_output_site_mutations = 0\n            for mut in site.mutations:\n                mapped_node = self.mutation_node_map[mut.id]\n                mapped_parent = -1\n                if mut.parent != -1:\n                    mapped_parent = mutation_id_map[mut.parent]\n                if mapped_node != -1:\n                    mutation_id_map[mut.id] = num_output_mutations\n                    num_output_mutations += 1\n                    num_output_site_mutations += 1\n            output_site = True\n            if self.filter_sites and num_output_site_mutations == 0:\n                output_site = False\n\n            if output_site:\n                for mut in site.mutations:\n                    if mutation_id_map[mut.id] != -1:\n                        mapped_parent = -1\n                        if mut.parent != -1:\n                            mapped_parent = mutation_id_map[mut.parent]\n                        self.tables.mutations.append(\n                            mut.replace(\n                                site=len(self.tables.sites),\n                                node=self.mutation_node_map[mut.id],\n                                parent=mapped_parent,\n                            )\n                        )\n                self.tables.sites.append(site)\n\n    def finalise_references(self):\n        input_populations = self.ts.tables.populations\n        population_id_map = np.arange(len(input_populations) + 1, dtype=np.int32)\n        # Trick to ensure the null population gets mapped to itself.\n        population_id_map[-1] = -1\n        input_individuals = self.ts.tables.individuals\n        individual_id_map = np.arange(len(input_individuals) + 1, dtype=np.int32)\n        # Trick to ensure the null individual gets mapped to itself.\n        individual_id_map[-1] = -1\n\n        population_ref_count = np.ones(len(input_populations), dtype=int)\n        if self.filter_populations:\n            population_ref_count[:] = 0\n            population_id_map[:] = -1\n        individual_ref_count = np.ones(len(input_individuals), dtype=int)\n        if self.filter_individuals:\n            individual_ref_count[:] = 0\n            individual_id_map[:] = -1\n\n        for node in self.tables.nodes:\n            if self.filter_populations and node.population != tskit.NULL:\n                population_ref_count[node.population] += 1\n            if self.filter_individuals and node.individual != tskit.NULL:\n                individual_ref_count[node.individual] += 1\n\n        for input_id, count in enumerate(population_ref_count):\n            if count > 0:\n                row = input_populations[input_id]\n                output_id = self.tables.populations.append(row)\n                population_id_map[input_id] = output_id\n        for input_id, count in enumerate(individual_ref_count):\n            if count > 0:\n                row = input_individuals[input_id]\n                output_id = self.tables.individuals.append(row)\n                individual_id_map[input_id] = output_id\n\n        # Remap the population ID references for nodes.\n        nodes = self.tables.nodes\n        nodes.set_columns(\n            flags=nodes.flags,\n            time=nodes.time,\n            metadata=nodes.metadata,\n            metadata_offset=nodes.metadata_offset,\n            individual=individual_id_map[nodes.individual],\n            population=population_id_map[nodes.population],\n        )\n\n        # Remap the parent ids of individuals\n        individuals_copy = self.tables.individuals.copy()\n        self.tables.individuals.clear()\n        for row in individuals_copy:\n            mapped_parents = []\n            for p in row.parents:\n                if p == -1:\n                    mapped_parents.append(-1)\n                else:\n                    mapped_parents.append(individual_id_map[p])\n            self.tables.individuals.append(row.replace(parents=mapped_parents))\n\n        # We don't support migrations for now. We'll need to remap these as well.\n        assert self.ts.num_migrations == 0\n\n    def insert_input_roots(self):\n        youngest_root_time = np.inf\n        for input_id in range(len(self.node_id_map)):\n            x = self.A_head[input_id]\n            if x is not None:\n                output_id = self.node_id_map[input_id]\n                if output_id == -1:\n                    output_id = self.record_node(input_id)\n                while x is not None:\n                    if x.node != output_id:\n                        self.record_edge(x.left, x.right, output_id, x.node)\n                        self.map_mutations(x.left, x.right, input_id, output_id)\n                    x = x.next\n                self.flush_edges()\n                root_time = self.tables.nodes.time[output_id]\n                if root_time < youngest_root_time:\n                    youngest_root_time = root_time\n        # We have to sort the edge table from the point where the edges\n        # for the youngest root would be inserted.\n        # Note: it would be nicer to do the sort here, but we have to\n        # wait until the finalise_references method has been called to\n        # make sure all the populations etc have been setup.\n        node_time = self.tables.nodes.time\n        edge_parent = self.tables.edges.parent\n        offset = 0\n        while (\n            offset < len(self.tables.edges)\n            and node_time[edge_parent[offset]] < youngest_root_time\n        ):\n            offset += 1\n        self.sort_offset = offset\n\n    def simplify(self):\n        if self.ts.num_edges > 0:\n            all_edges = list(self.ts.edges())\n            edges = all_edges[:1]\n            for e in all_edges[1:]:\n                if e.parent != edges[0].parent:\n                    self.process_parent_edges(edges)\n                    edges = []\n                edges.append(e)\n            self.process_parent_edges(edges)\n        if self.keep_input_roots:\n            self.insert_input_roots()\n        self.finalise_sites()\n        self.finalise_references()\n        if self.sort_offset != -1:\n            self.tables.sort(edge_start=self.sort_offset)\n        ts = self.tables.tree_sequence()\n        return ts, self.node_id_map\n\n    def check_state(self):\n        # print(\"CHECK_STATE\")\n        all_ancestry = []\n        num_nodes = len(self.A_head)\n        for j in range(num_nodes):\n            head = self.A_head[j]\n            tail = self.A_tail[j]\n            if head is None:\n                assert tail is None\n            else:\n                x = head\n                while x.next is not None:\n                    assert x.right <= x.next.left\n                    x = x.next\n                assert x == tail\n                x = head\n                while x is not None:\n                    assert x.left < x.right\n                    all_ancestry.append(portion.openclosed(x.left, x.right))\n                    if x.next is not None:\n                        assert x.right <= x.next.left\n                        # We should also not have any squashable segments.\n                        if x.right == x.next.left:\n                            assert x.node != x.next.node\n                    x = x.next\n        # Make sure we haven't lost ancestry.\n        if len(all_ancestry) > 0:\n            union = all_ancestry[0]\n            for interval in all_ancestry[1:]:\n                union = union.union(interval)\n            assert union.atomic\n            assert union.lower == 0\n            assert union.upper == self.sequence_length\n\n\nclass AncestorMap:\n    \"\"\"\n    Simplifies a tree sequence to show relationships between\n    samples and a designated set of ancestors.\n    \"\"\"\n\n    def __init__(self, ts, sample, ancestors):\n        self.ts = ts\n        self.samples = set(sample)\n        assert (self.samples).issubset(set(range(0, ts.num_nodes)))\n        self.ancestors = set(ancestors)\n        assert (self.ancestors).issubset(set(range(0, ts.num_nodes)))\n        self.table = tskit.EdgeTable()\n        self.sequence_length = ts.sequence_length\n        self.A_head = [None for _ in range(ts.num_nodes)]\n        self.A_tail = [None for _ in range(ts.num_nodes)]\n        for sample_id in sample:\n            self.add_ancestry(0, self.sequence_length, sample_id, sample_id)\n        self.edge_buffer = {}\n        self.oldest_ancestor_time = max(ts.nodes_time[u] for u in ancestors)\n        self.oldest_sample_time = max(ts.nodes_time[u] for u in sample)\n        self.oldest_node_time = max(self.oldest_ancestor_time, self.oldest_sample_time)\n\n    def link_ancestors(self):\n        if self.ts.num_edges > 0:\n            all_edges = list(self.ts.edges())\n            edges = all_edges[:1]\n            for e in all_edges[1:]:\n                if self.ts.tables.nodes.time[e.parent] > self.oldest_node_time:\n                    break\n                if e.parent != edges[0].parent:\n                    self.process_parent_edges(edges)\n                    edges = []\n                edges.append(e)\n            self.process_parent_edges(edges)\n        return self.table\n\n    def process_parent_edges(self, edges):\n        \"\"\"\n        Process all of the edges for a given parent.\n        \"\"\"\n        assert len({e.parent for e in edges}) == 1\n        parent = edges[0].parent\n        S = []\n        for edge in edges:\n            x = self.A_head[edge.child]\n            while x is not None:\n                if x.right > edge.left and edge.right > x.left:\n                    y = Segment(max(x.left, edge.left), min(x.right, edge.right), x.node)\n                    S.append(y)\n                x = x.next\n        self.merge_labeled_ancestors(S, parent)\n        self.check_state()\n\n    def merge_labeled_ancestors(self, S, input_id):\n        \"\"\"\n        All ancestry segments in S come together into a new parent.\n        The new parent must be assigned and any overlapping segments coalesced.\n        \"\"\"\n        is_sample = input_id in self.samples\n        if is_sample:\n            # Free up the existing ancestry mapping.\n            x = self.A_tail[input_id]\n            assert x.left == 0 and x.right == self.sequence_length\n            self.A_tail[input_id] = None\n            self.A_head[input_id] = None\n\n        is_ancestor = input_id in self.ancestors\n        prev_right = 0\n        for left, right, X in overlapping_segments(S):\n            if is_ancestor or is_sample:\n                for x in X:\n                    ancestry_node = x.node\n                    self.record_edge(left, right, input_id, ancestry_node)\n                self.add_ancestry(left, right, input_id, input_id)\n\n                if is_sample and left != prev_right:\n                    # Fill in any gaps in the ancestry for the sample.\n                    self.add_ancestry(prev_right, left, input_id, input_id)\n\n            else:\n                for x in X:\n                    ancestry_node = x.node\n                    # Add sample ancestry for the currently-processed segment set.\n                    self.add_ancestry(left, right, ancestry_node, input_id)\n            prev_right = right\n\n        if is_sample and prev_right != self.sequence_length:\n            # If a trailing gap exists in the sample ancestry, fill it in.\n            self.add_ancestry(prev_right, self.sequence_length, input_id, input_id)\n        if input_id != -1:\n            self.flush_edges()\n\n    def record_edge(self, left, right, parent, child):\n        \"\"\"\n        Adds an edge to the output list.\n        \"\"\"\n        if child not in self.edge_buffer:\n            self.edge_buffer[child] = [tskit.Edge(left, right, parent, child)]\n        else:\n            last = self.edge_buffer[child][-1]\n            if last.right == left:\n                last.right = right\n            else:\n                self.edge_buffer[child].append(tskit.Edge(left, right, parent, child))\n\n    def add_ancestry(self, left, right, node, current_node):\n        tail = self.A_tail[current_node]\n        if tail is None:\n            x = Segment(left, right, node)\n            self.A_head[current_node] = x\n            self.A_tail[current_node] = x\n        else:\n            if tail.right == left and tail.node == node:\n                tail.right = right\n            else:\n                x = Segment(left, right, node)\n                tail.next = x\n                self.A_tail[current_node] = x\n\n    def flush_edges(self):\n        \"\"\"\n        Flush the edges to the output table after sorting and squashing\n        any redundant records.\n        \"\"\"\n        num_edges = 0\n        for child in sorted(self.edge_buffer.keys()):\n            for edge in self.edge_buffer[child]:\n                self.table.append(edge)\n                num_edges += 1\n        self.edge_buffer.clear()\n        return num_edges\n\n    def check_state(self):\n        num_nodes = len(self.A_head)\n        for j in range(num_nodes):\n            head = self.A_head[j]\n            tail = self.A_tail[j]\n            if head is None:\n                assert tail is None\n            else:\n                x = head\n                while x.next is not None:\n                    x = x.next\n                assert x == tail\n                x = head.next\n                while x is not None:\n                    assert x.left < x.right\n                    if x.next is not None:\n                        if self.ancestors is None:\n                            assert x.right <= x.next.left\n                        # We should also not have any squashable segments.\n                        if x.right == x.next.left:\n                            assert x.node != x.next.node\n                    x = x.next\n\n    def print_state(self):\n        print(\".................\")\n        print(\"Ancestors: \")\n        num_nodes = len(self.A_tail)\n        for j in range(num_nodes):\n            print(\"\\t\", j, \"->\", end=\"\")\n            x = self.A_head[j]\n            while x is not None:\n                print(f\"({x.left}-{x.right}->{x.node})\", end=\"\")\n                x = x.next\n            print()\n        print(\"Output:\")\n        print(self.table)\n        self.check_state()\n\n\nif __name__ == \"__main__\":\n    # Simple CLI for running simplifier/ancestor mapping above.\n    class_to_implement = sys.argv[1]\n    assert class_to_implement == \"Simplifier\" or class_to_implement == \"AncestorMap\"\n    ts = tskit.load(sys.argv[2])\n\n    if class_to_implement == \"Simplifier\":\n        samples = list(map(int, sys.argv[3:]))\n\n        print(\"When keep_unary = True:\")\n        s = Simplifier(ts, samples, keep_unary=True)\n        # s.print_state()\n        tss, _ = s.simplify()\n        tables = tss.dump_tables()\n        print(tables.nodes)\n        print(tables.edges)\n        print(tables.sites)\n        print(tables.mutations)\n\n        print(\"\\nWhen keep_unary = False\")\n        s = Simplifier(ts, samples, keep_unary=False)\n        # s.print_state()\n        tss, _ = s.simplify()\n        tables = tss.dump_tables()\n        print(tables.nodes)\n        print(tables.edges)\n        print(tables.sites)\n        print(tables.mutations)\n\n        print(\"\\nWhen keep_unary_in_individuals = True\")\n        s = Simplifier(ts, samples, keep_unary_in_individuals=True)\n        # s.print_state()\n        tss, _ = s.simplify()\n        tables = tss.dump_tables()\n        print(tables.nodes)\n        print(tables.edges)\n        print(tables.sites)\n        print(tables.mutations)\n\n    elif class_to_implement == \"AncestorMap\":\n        samples = sys.argv[3]\n        samples = samples.split(\",\")\n        samples = list(map(int, samples))\n\n        ancestors = sys.argv[4]\n        ancestors = ancestors.split(\",\")\n        ancestors = list(map(int, ancestors))\n\n        s = AncestorMap(ts, samples, ancestors)\n        tss = s.link_ancestors()\n        # tables = tss.dump_tables()\n        # print(tables.nodes)\n        print(tss)\n"
  },
  {
    "path": "python/tests/test_avl_tree.py",
    "content": "# MIT License\n#\n# Copyright (c) 2021-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nA Python version of the C AVL tree code for development purposes.\n\nBased on Knuth's AVL tree code in TAOCP volume 3, adapted from\nhttps://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art070\n\nNote there is a bug in that Python translation which is missing\nP.B = 0 at the end of A9.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport dataclasses\nfrom typing import Any\n\nimport numpy as np\nimport pytest\n\n# The nodes of the tree are assumed to contain KEY, LLINK, and RLINK fields.\n# We also have a new field\n#\n# B(P) = balance factor of NODE(P)\n#\n# the height of the right subtree minus the height of the left subtree; this field\n# always contains either +1, 0, or -1.  A special header node also appears at the top\n# of the tree, in location HEAD; the value of RLINK(HEAD) is a pointer to the root\n# of the tree, and LLINK(HEAD) is used to keep track of the overall height of the tree.\n# We assume that the tree is nonempty, namely that RLINK(HEAD) != ^.\n\n\n@dataclasses.dataclass(eq=False)\nclass Node:\n    key: Any = None\n    llink: Node = None\n    rlink: Node = None\n    balance: int = 0\n\n    def __str__(self):\n        llink = None if self.llink is None else self.llink.key\n        rlink = None if self.rlink is None else self.rlink.key\n        return (\n            f\"Node(key={self.key}, balance={self.balance}, llink={llink}, rlink={rlink})\"\n        )\n\n\n# For convenience in description, the algorithm uses the notation LINK(a,P)\n# as a synonym for LLINK(P) if a = -1, and for RLINK(P) if a = +1.\n\n\ndef get_link(a, P):\n    if a == -1:\n        return P.llink\n    else:\n        return P.rlink\n\n\ndef set_link(a, P, val):\n    if a == -1:\n        P.llink = val\n    else:\n        P.rlink = val\n\n\nclass AvlTree:\n    def __init__(self):\n        self.head = Node()\n        self.size = 0\n        self.height = 0\n\n    @property\n    def root(self):\n        return self.head.rlink\n\n    def __str__(self):\n        stack = [(self.head, 0)]\n        s = f\"size = {self.size} height = {self.height}\\n\"\n        while len(stack) > 0:\n            node, depth = stack.pop()\n            s += (\"  \" * depth) + f\"KEY={node.key} B={node.balance}\\n\"\n            for child in [node.llink, node.rlink]:\n                if child is not None:\n                    stack.append((child, depth + 1))\n        return s\n\n    def ordered_keys(self):\n        \"\"\"\n        Return the keys in sorted order. This is done by an in-order\n        traversal of the nodes.\n        \"\"\"\n\n        def inorder(node):\n            if node is not None:\n                yield from inorder(node.llink)\n                yield node.key\n                yield from inorder(node.rlink)\n\n        yield from inorder(self.root)\n\n    def search(self, key):\n        P = self.root\n        while P is not None:\n            if key == P.key:\n                break\n            elif key < P.key:\n                P = P.llink\n            else:\n                P = P.rlink\n        return P\n\n    def __insert_empty(self, key):\n        self.head.rlink = Node()\n        self.head.rlink.key = key\n        self.size = 1\n        self.height = 1\n        return self.head.rlink\n\n    def __insert(self, K):\n        # A1. [Initialize.] Set T <- HEAD, S <- P <- RLINK(HEAD).\n        # (The pointer variable P will move down the tree; S will point\n        # to the place where rebalancing may be necessary, and\n        # T always points to the parent of S.)\n        T = self.head\n        S = P = self.head.rlink\n\n        # A2. [Compare.] If K < KEY(P), go to A3; if K > KEY(P), go to A4; and if\n        # K = KEY(P), the search terminates successfully.\n        while True:\n            if K == P.key:\n                return P\n            elif K < P.key:\n                # A3. [Move left.] Set Q <- LLINK(P). If Q = ^, set Q <= AVAIL and\n                # LLINK(P) <- Q and go to step A5. Otherwise if B(Q) != 0, set T <-\n                # P and S <- Q. Finally set P <- Q and return to step A2.\n                Q = P.llink\n                if Q is None:\n                    Q = Node()\n                    P.llink = Q\n                    break\n            # A4. [Move right.] Set Q <- RLINK(P). If Q = ^, set Q <= AVAIL and\n            # RLINK(P) <- Q and go to step A5. Otherwise if B(Q) != 0, set T <- P\n            # and S <- Q. Finally set P <- Q and return to step A2.\n            elif K > P.key:\n                Q = P.rlink\n                if Q is None:\n                    Q = Node()\n                    P.rlink = Q\n                    break\n            if Q.balance != 0:\n                T = P\n                S = Q\n            P = Q\n        # A5. [Insert.] Set KEY(Q) <- K, LLINK(Q) <- RLINK(Q) <- ^, and B(Q) <- 0.\n        Q.key = K\n        Q.llink = Q.rlink = None\n        Q.balance = 0\n\n        # A6. [Adjust balance factors.] If K < KEY(S) set a <- -1, otherwise set a\n        # <- +1. Then set R <- P <- LINK(a,S), and repeatedly do the following\n        # operations zero or more times until P = Q: If K < KEY(P) set B(P) <- -1\n        # and P <- LLINK(P); if K > KEY(P), set B(P) <- +1 and P <- RLINK(P).\n        if K < S.key:\n            a = -1\n        else:\n            a = 1\n        R = P = get_link(a, S)\n        while P != Q:\n            if K < P.key:\n                P.balance = -1\n                P = P.llink\n            elif K > P.key:\n                P.balance = 1\n                P = P.rlink\n\n        # A7. [Balancing act.] Several cases now arise:\n        #\n        #  i) If B(S) = 0, set B(S) <- a, LLINK(HEAD) <- LLINK(HEAD) + 1, and\n        #  terminate the algorithm.\n        #\n        if S.balance == 0:\n            S.balance = a\n            self.height += 1\n\n        # ii) If B(S) = -a, set B(S) <- 0 and terminate the algorithm.\n\n        elif S.balance == -a:\n            S.balance = 0\n\n        # iii) If B(S) = a, go to step A8 if B(R) = a, to A9 if B(R) = -a.\n        else:\n            if R.balance == a:\n                # A8. [Single rotation.] Set P <- R, LINK(a,S) <- LINK(-a,R),\n                # LINK(-a,R) <- S,B(S) <- B(R) <- 0. Go to A10.\n                P = R\n                set_link(a, S, get_link(-a, R))\n                set_link(-a, R, S)\n                S.balance = R.balance = 0\n            elif R.balance == -a:\n                # A9. [Double rotation.] Set P <- LINK(-a,R),\n                #  LINK(-a,R) <- LINK(a,P),LINK(a,P) <- R, LINK(a,S)\n                #  <- LINK(-a,P), LINK(-a,P) <- S. Now set\n                #\n                #               { (-a,0), if B(P) =  a;\n                #  (B(S),B(R))<-{ ( 0,0), if B(P) =  0;\n                #               { ( 0,a), if B(P) = -a;\n                #\n                #  and then set B(P) <- 0\n                P = get_link(-a, R)\n                set_link(-a, R, get_link(a, P))\n                set_link(a, P, R)\n                set_link(a, S, get_link(-a, P))\n                set_link(-a, P, S)\n                if P.balance == a:\n                    S.balance = -a\n                    R.balance = 0\n                elif P.balance == 0:\n                    S.balance = 0\n                    R.balance = 0\n                else:\n                    S.balance = 0\n                    R.balance = a\n                P.balance = 0\n\n            # A10. [Finishing touch.] If S = RLINK(T) then set RLINK(T) <- P,\n            # otherwise set LLINK(T) <- P.\n            if S == T.rlink:\n                T.rlink = P\n            else:\n                T.llink = P\n\n        return Q\n\n    def insert(self, key):\n        if self.size == 0:\n            return self.__insert_empty(key)\n        return self.__insert(key)\n\n\nclass TestAvlTree:\n    def verify_tree(self, tree):\n        \"\"\"\n        Check that the tree fits the AVL tree properties.\n        \"\"\"\n        # The height of a node is its maximum distance to a leaf\n        node_height = {}\n\n        def compute_height(node):\n            if node is None:\n                return 0\n            val = 1 + max([compute_height(node.llink), compute_height(node.rlink)])\n            node_height[node] = val\n            return val\n\n        compute_height(tree.head.rlink)\n        assert tree.height == max(node_height.values())\n        assert tree.height == node_height[tree.head.rlink]\n        # print(tree)\n\n        # The balance factor B is the height of the right subtree\n        # minus the height of the left subtree\n        stack = [tree.head.rlink]\n        while len(stack) > 0:\n            node = stack.pop()\n            # print(node, node_height[node])\n            assert node.balance in [-1, 0, 1]\n            lheight = None\n            if node.llink is not None:\n                lheight = node_height[node.llink]\n                stack.append(node.llink)\n            rheight = None\n            if node.rlink is not None:\n                rheight = node_height[node.rlink]\n                stack.append(node.rlink)\n            if lheight is not None and rheight is not None:\n                balance_factor = rheight - lheight\n                assert node.balance == balance_factor\n            elif lheight is None and rheight is None:\n                assert node_height[node] == 1\n                assert node.balance == 0\n            else:\n                # if one child is None, the height of this node must be 2\n                assert node_height[node] == 2\n                if lheight is None:\n                    assert node.balance == 1\n                else:\n                    assert node.balance == -1\n\n    def verify(self, keys):\n        tree = AvlTree()\n        key_set = set()\n        for k in keys:\n            node = tree.search(k)\n            if k in key_set:\n                assert node is not None\n                assert node.key == k\n            else:\n                assert node is None\n            node = tree.insert(k)\n            key_set.add(k)\n            self.verify_tree(tree)\n            assert tree.search(k) is node\n        for k in range(100):\n            node = tree.search(k)\n            if k in key_set:\n                assert node is not None\n                assert node.key == k\n            else:\n                assert node is None\n        ordered_keys = list(tree.ordered_keys())\n        assert ordered_keys == list(sorted(set(keys)))\n\n        # Implement the inorder on an existing list to mimic C algorithm\n        l2 = [None for _ in ordered_keys]\n\n        def visit(node, index, out):\n            if node is None:\n                return index\n            index = visit(node.llink, index, out)\n            out[index] = node.key\n            return visit(node.rlink, index + 1, out)\n\n        visit(tree.root, 0, l2)\n        assert l2 == ordered_keys\n\n    @pytest.mark.parametrize(\"n\", [0, 1, 10, 33, 64, 127, 133])\n    def test_sequential(self, n):\n        self.verify(range(n))\n\n    @pytest.mark.parametrize(\"n\", [0, 1, 10, 33, 64, 127, 133])\n    def test_sequential_reversed(self, n):\n        self.verify(range(n)[::-1])\n\n    @pytest.mark.parametrize(\"n\", [0, 1, 10, 33, 64, 127, 133])\n    def test_random_integers(self, n):\n        rng = np.random.RandomState(42)\n        values = rng.randint(-100, 100, size=n)\n        self.verify(values)\n\n    @pytest.mark.parametrize(\"n\", [0, 1, 10, 33, 64, 127, 133])\n    def test_random_floats(self, n):\n        rng = np.random.RandomState(42)\n        values = rng.random(size=n)\n        self.verify(values)\n"
  },
  {
    "path": "python/tests/test_balance_metrics.py",
    "content": "# MIT License\n#\n# Copyright (c) 2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for tree balance/imbalance metrics.\n\"\"\"\n\nimport math\n\nimport numpy as np\nimport pytest\n\nimport tests\nimport tskit\nfrom tests.tsutil import get_example_tree_sequences\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this.\n\n\ndef sackin_index_definition(tree):\n    return sum(tree.depth(u) for u in tree.leaves())\n\n\ndef colless_index_definition(tree):\n    is_binary = all(\n        tree.num_children(u) == 2 for u in tree.nodes() if tree.is_internal(u)\n    )\n    if tree.num_roots != 1:\n        raise ValueError(\"Colless index not defined for multiroot trees\")\n    if not is_binary:\n        raise ValueError(\"Colless index not defined for nonbinary trees\")\n\n    return sum(\n        abs(\n            len(list(tree.leaves(tree.left_child(u))))\n            - len(list(tree.leaves(tree.right_child(u))))\n        )\n        for u in tree.nodes()\n        if tree.is_internal(u)\n    )\n\n\ndef b1_index_definition(tree):\n    return sum(\n        1 / max(tree.path_length(n, leaf) for leaf in tree.leaves(n))\n        for n in tree.nodes()\n        if tree.parent(n) != tskit.NULL and tree.is_internal(n)\n    )\n\n\ndef b2_index_definition(tree, base=10):\n    if tree.num_roots != 1:\n        raise ValueError(\"B2 index is only defined for trees with one root\")\n    proba = [\n        np.prod([1 / tree.num_children(u) for u in tree.ancestors(leaf)])\n        for leaf in tree.leaves()\n    ]\n    return -sum(p * math.log(p, base) for p in proba)\n\n\nclass TestDefinitions:\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_sackin(self, ts):\n        for tree in ts.trees():\n            assert tree.sackin_index() == sackin_index_definition(tree)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_colless(self, ts):\n        for tree in ts.trees():\n            is_binary = all(\n                tree.num_children(u) == 2 for u in tree.nodes() if tree.is_internal(u)\n            )\n            if tree.num_roots != 1 or not is_binary:\n                with pytest.raises(tskit.LibraryError):\n                    tree.colless_index()\n                with pytest.raises(ValueError):\n                    colless_index_definition(tree)\n            else:\n                assert tree.colless_index() == colless_index_definition(tree)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_b1(self, ts):\n        for tree in ts.trees():\n            assert tree.b1_index() == pytest.approx(b1_index_definition(tree))\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_b2(self, ts):\n        for tree in ts.trees():\n            if tree.num_roots != 1:\n                with pytest.raises(tskit.LibraryError, match=\"MULTIROOT\"):\n                    tree.b2_index()\n                with pytest.raises(ValueError):\n                    b2_index_definition(tree)\n            else:\n                assert tree.b2_index() == pytest.approx(b2_index_definition(tree))\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"base\", [0.1, 1.1, 2, 10, math.e, np.array([3])[0]])\n    def test_b2_base(self, ts, base):\n        for tree in ts.trees():\n            if tree.num_roots != 1:\n                with pytest.raises(tskit.LibraryError, match=\"MULTIROOT\"):\n                    tree.b2_index(base)\n                with pytest.raises(ValueError):\n                    b2_index_definition(tree, base)\n            else:\n                assert tree.b2_index(base) == pytest.approx(\n                    b2_index_definition(tree, base)\n                )\n\n\nclass TestBalancedBinaryOdd:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0      1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(3)\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 5\n\n    def test_colless(self):\n        assert self.tree().colless_index() == 1\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 1\n\n    def test_b2(self):\n        assert self.tree().b2_index(base=10) == pytest.approx(0.4515, rel=1e-3)\n\n\nclass TestBalancedBinaryEven:\n    # 2.00┊    6    ┊\n    #     ┊  ┏━┻━┓  ┊\n    # 1.00┊  4   5  ┊\n    #     ┊ ┏┻┓ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 ┊\n    #     0         1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(4)\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 8\n\n    def test_colless(self):\n        assert self.tree().colless_index() == 0\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 2\n\n    def test_b2(self):\n        assert self.tree().b2_index() == pytest.approx(0.602, rel=1e-3)\n\n    @pytest.mark.parametrize(\n        (\"base\", \"expected\"),\n        [\n            (2, 2),\n            (3, 1.2618595071429148),\n            (4, 1.0),\n            (5, 0.8613531161467861),\n            (10, 0.6020599913279623),\n            (100, 0.30102999566398114),\n            (1000000, 0.10034333188799373),\n            (2.718281828459045, 1.3862943611198906),\n        ],\n    )\n    def test_b2_base(self, base, expected):\n        assert self.tree().b2_index(base) == expected\n\n    @pytest.mark.parametrize(\"base\", [0, -0.001, -1, -1e-6, -1e200])\n    def test_b2_bad_base(self, base):\n        with pytest.raises(ValueError):\n            self.tree().b2_index(base=base)\n\n    def test_b2_base1(self):\n        with pytest.raises(ZeroDivisionError):\n            self.tree().b2_index(base=1)\n\n\nclass TestBalancedTernary:\n    # 2.00┊        12         ┊\n    #     ┊   ┏━━━━━╋━━━━━┓   ┊\n    # 1.00┊   9    10    11   ┊\n    #     ┊ ┏━╋━┓ ┏━╋━┓ ┏━╋━┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 ┊\n    #     0                   1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(9, arity=3)\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 18\n\n    def test_colless(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_NONBINARY\"):\n            self.tree().colless_index()\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 3\n\n    def test_b2(self):\n        assert self.tree().b2_index() == pytest.approx(0.954, rel=1e-3)\n\n\nclass TestStarN10:\n    # 1.00┊         10          ┊\n    #     ┊ ┏━┳━┳━┳━┳┻┳━┳━┳━┳━┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 9 ┊\n    #     0                     1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_star(10)\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 10\n\n    def test_colless(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_NONBINARY\"):\n            self.tree().colless_index()\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 0\n\n    def test_b2(self):\n        assert self.tree().b2_index() == pytest.approx(0.9999, rel=1e-3)\n\n\nclass TestCombN5:\n    # 4.00┊   8       ┊\n    #     ┊ ┏━┻━┓     ┊\n    # 3.00┊ ┃   7     ┊\n    #     ┊ ┃ ┏━┻━┓   ┊\n    # 2.00┊ ┃ ┃   6   ┊\n    #     ┊ ┃ ┃ ┏━┻┓  ┊\n    # 1.00┊ ┃ ┃ ┃  5  ┊\n    #     ┊ ┃ ┃ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 4 ┊\n    #     0           1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_comb(5)\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 14\n\n    def test_colless(self):\n        assert self.tree().colless_index() == 6\n\n    def test_b1(self):\n        assert self.tree().b1_index() == pytest.approx(1.833, rel=1e-3)\n\n    def test_b2(self):\n        assert self.tree().b2_index() == pytest.approx(0.564, rel=1e-3)\n\n\nclass TestMultiRootBinary:\n    # 3.00┊            15     ┊\n    #     ┊          ┏━━┻━┓   ┊\n    # 2.00┊   11     ┃   14   ┊\n    #     ┊  ┏━┻━┓   ┃  ┏━┻┓  ┊\n    # 1.00┊  9  10  12  ┃ 13  ┊\n    #     ┊ ┏┻┓ ┏┻┓ ┏┻┓ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 ┊\n    #     0                   1\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.Tree.generate_balanced(9, arity=2).tree_sequence.dump_tables()\n        edges = tables.edges.copy()\n        tables.edges.clear()\n        for edge in edges:\n            if edge.parent != 16:\n                tables.edges.append(edge)\n        return tables.tree_sequence().first()\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 20\n\n    def test_colless(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().colless_index()\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 4.5\n\n    def test_b2(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().b2_index()\n\n\nclass TestEmpty:\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.TableCollection(1)\n        return tables.tree_sequence().first()\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 0\n\n    def test_colless(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().colless_index()\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 0\n\n    def test_b2(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().b2_index()\n\n\nclass TestTreeInNullState:\n    @tests.cached_example\n    def tree(self):\n        tree = tskit.Tree.generate_comb(5)\n        tree.clear()\n        return tree\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 0\n\n    def test_colless(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().colless_index()\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 0\n\n    def test_b2(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().b2_index()\n\n\nclass TestAllRootsN5:\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(5):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        return tables.tree_sequence().first()\n\n    def test_sackin(self):\n        assert self.tree().sackin_index() == 0\n\n    def test_colless(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().colless_index()\n\n    def test_b1(self):\n        assert self.tree().b1_index() == 0\n\n    def test_b2(self):\n        with pytest.raises(tskit.LibraryError, match=\"UNDEFINED_MULTIROOT\"):\n            self.tree().b2_index()\n"
  },
  {
    "path": "python/tests/test_cli.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for the command line interfaces to tskit\n\"\"\"\n\nimport io\nimport os\nimport sys\nimport tempfile\nimport unittest\nfrom unittest import mock\n\nimport msprime\nimport pytest\n\nimport tskit\nimport tskit.cli as cli\n\nfrom . import tsutil\n\n\nclass TestException(Exception):\n    __test__ = False\n    \"\"\"\n    Custom exception we can throw for testing.\n    \"\"\"\n\n\ndef capture_output(func, *args, **kwargs):\n    \"\"\"\n    Runs the specified function and arguments, and returns the\n    tuple (stdout, stderr) as strings.\n    \"\"\"\n    buffer_class = io.StringIO\n    stdout = sys.stdout\n    sys.stdout = buffer_class()\n    stderr = sys.stderr\n    sys.stderr = buffer_class()\n\n    try:\n        # Recent versions of MacOS seem to have issues with us calling signal\n        # during tests.\n        with mock.patch(\"signal.signal\"):\n            func(*args, **kwargs)\n        stdout_output = sys.stdout.getvalue()\n        stderr_output = sys.stderr.getvalue()\n    finally:\n        sys.stdout.close()\n        sys.stdout = stdout\n        sys.stderr.close()\n        sys.stderr = stderr\n    return stdout_output, stderr_output\n\n\nclass TestCli(unittest.TestCase):\n    \"\"\"\n    Superclass of tests for the CLI needing temp files.\n    \"\"\"\n\n    def setUp(self):\n        fd, self.temp_file = tempfile.mkstemp(prefix=\"tsk_cli_testcase_\")\n        os.close(fd)\n\n    def tearDown(self):\n        os.unlink(self.temp_file)\n\n\nclass TestTskitArgumentParser:\n    \"\"\"\n    Tests for the argument parsers in msp.\n    \"\"\"\n\n    def test_individuals_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"individuals\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 6\n\n    def test_individuals_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"individuals\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-p\", \"8\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 8\n\n    def test_individuals_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"individuals\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"5\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 5\n\n    def test_nodes_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"nodes\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 6\n\n    def test_nodes_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"nodes\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-p\", \"8\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 8\n\n    def test_nodes_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"nodes\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"5\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 5\n\n    def test_edges_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"edges\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 6\n\n    def test_edges_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"edges\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-p\", \"8\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 8\n\n    def test_edges_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"edges\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"5\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 5\n\n    def test_sites_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"sites\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 6\n\n    def test_sites_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"sites\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-p\", \"8\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 8\n\n    def test_sites_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"sites\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"5\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 5\n\n    def test_mutations_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"mutations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 6\n\n    def test_mutations_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"mutations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-p\", \"4\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 4\n\n    def test_mutations_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"mutations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"9\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 9\n\n    def test_provenances_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"provenances\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert not args.human\n\n    def test_provenances_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"provenances\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-H\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.human\n\n    def test_provenances_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"provenances\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--human\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.human\n\n    @pytest.mark.skip(reason=\"fasta output temporarily disabled\")\n    def test_fasta_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"fasta\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.wrap == 60\n\n    @pytest.mark.skip(reason=\"fasta output temporarily disabled\")\n    def test_fasta_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"fasta\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-w\", \"100\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.wrap == 100\n\n    @pytest.mark.skip(reason=\"fasta output temporarily disabled\")\n    def test_fasta_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"fasta\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--wrap\", \"50\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.wrap == 50\n\n    @pytest.mark.parametrize(\n        \"flags,expected\",\n        (\n            [[], None],\n            [[\"-P\", \"2\"], 2],\n            [[\"--ploidy\", \"5\"], 5],\n        ),\n    )\n    def test_vcf_ploidy(self, flags, expected):\n        parser = cli.get_tskit_parser()\n        cmd = \"vcf\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, *flags])\n        assert args.tree_sequence == tree_sequence\n        assert args.ploidy == expected\n\n    @pytest.mark.parametrize(\n        \"flags,expected\",\n        (\n            [[], \"1\"],\n            [[\"-c\", \"chrX\"], \"chrX\"],\n            [[\"--contig-id\", \"chr20\"], \"chr20\"],\n        ),\n    )\n    def test_vcf_contig_id(self, flags, expected):\n        parser = cli.get_tskit_parser()\n        cmd = \"vcf\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, *flags])\n        assert args.tree_sequence == tree_sequence\n        assert args.contig_id == expected\n\n    @pytest.mark.parametrize(\n        \"flags,expected\",\n        (\n            [[], False],\n            [[\"-0\"], True],\n            [[\"--allow-position-zero\"], True],\n        ),\n    )\n    def test_vcf_allow_position_zero(self, flags, expected):\n        parser = cli.get_tskit_parser()\n        cmd = \"vcf\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, *flags])\n        assert args.tree_sequence == tree_sequence\n        assert args.allow_position_zero == expected\n\n    def test_info_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"info\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n\n    def test_populations_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"populations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n\n    def test_migrations_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"migrations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n\n    def test_migrations_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"migrations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-p\", \"2\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 2\n\n    def test_migrations_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"migrations\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"5\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 5\n\n    def test_trees_default_values(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"trees\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 6\n        assert not args.draw\n\n    def test_trees_short_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"trees\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"-d\", \"-p\", \"8\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 8\n        assert args.draw\n\n    def test_trees_long_args(self):\n        parser = cli.get_tskit_parser()\n        cmd = \"trees\"\n        tree_sequence = \"test.trees\"\n        args = parser.parse_args([cmd, tree_sequence, \"--precision\", \"5\", \"--draw\"])\n        assert args.tree_sequence == tree_sequence\n        assert args.precision == 5\n        assert args.draw\n\n\nclass TestTskitConversionOutput(unittest.TestCase):\n    \"\"\"\n    Tests the output of tskit to ensure it's correct.\n    \"\"\"\n\n    @classmethod\n    def setUpClass(cls):\n        ts = msprime.simulate(\n            length=1,\n            recombination_rate=2,\n            mutation_rate=2,\n            random_seed=1,\n            migration_matrix=[[0, 1], [1, 0]],\n            population_configurations=[\n                msprime.PopulationConfiguration(5) for _ in range(2)\n            ],\n            record_migrations=True,\n        )\n        assert ts.num_migrations > 0\n        cls._tree_sequence = tsutil.insert_random_ploidy_individuals(\n            ts, samples_only=True\n        )\n        fd, cls._tree_sequence_file = tempfile.mkstemp(prefix=\"tsk_cli\", suffix=\".trees\")\n        os.close(fd)\n        cls._tree_sequence.dump(cls._tree_sequence_file)\n\n    @classmethod\n    def tearDownClass(cls):\n        os.unlink(cls._tree_sequence_file)\n\n    def verify_individuals(self, output_individuals, precision):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(individuals=f, precision=precision)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_individuals\n\n    def test_individuals(self):\n        cmd = \"individuals\"\n        precision = 8\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, self._tree_sequence_file, \"-p\", str(precision)]\n        )\n        assert len(stderr) == 0\n        output_individuals = stdout.splitlines()\n        self.verify_individuals(output_individuals, precision)\n\n    def verify_nodes(self, output_nodes, precision):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(nodes=f, precision=precision)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_nodes\n\n    def test_nodes(self):\n        cmd = \"nodes\"\n        precision = 8\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, self._tree_sequence_file, \"-p\", str(precision)]\n        )\n        assert len(stderr) == 0\n        output_nodes = stdout.splitlines()\n        self.verify_nodes(output_nodes, precision)\n\n    def verify_edges(self, output_edges, precision):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(edges=f, precision=precision)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_edges\n\n    def test_edges(self):\n        cmd = \"edges\"\n        precision = 8\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, self._tree_sequence_file, \"-p\", str(precision)]\n        )\n        assert len(stderr) == 0\n        output_edges = stdout.splitlines()\n        self.verify_edges(output_edges, precision)\n\n    def verify_sites(self, output_sites, precision):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(sites=f, precision=precision)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_sites\n\n    def test_sites(self):\n        cmd = \"sites\"\n        precision = 8\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, self._tree_sequence_file, \"-p\", str(precision)]\n        )\n        assert len(stderr) == 0\n        output_sites = stdout.splitlines()\n        self.verify_sites(output_sites, precision)\n\n    def verify_mutations(self, output_mutations, precision):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(mutations=f, precision=precision)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_mutations\n\n    def test_mutations(self):\n        cmd = \"mutations\"\n        precision = 8\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, self._tree_sequence_file, \"-p\", str(precision)]\n        )\n        assert len(stderr) == 0\n        output_mutations = stdout.splitlines()\n        self.verify_mutations(output_mutations, precision)\n\n    def verify_migrations(self, output_migrations, precision):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(migrations=f, precision=precision)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_migrations\n\n    def test_migrations(self):\n        cmd = \"migrations\"\n        precision = 4\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, self._tree_sequence_file, \"-p\", str(precision)]\n        )\n        assert len(stderr) == 0\n        output_migrations = stdout.splitlines()\n        self.verify_migrations(output_migrations, precision)\n\n    def verify_provenances(self, output_provenances):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.dump_text(provenances=f)\n            f.seek(0)\n            output = f.read().splitlines()\n        assert output == output_provenances\n\n    def test_provenances(self):\n        cmd = \"provenances\"\n        stdout, stderr = capture_output(cli.tskit_main, [cmd, self._tree_sequence_file])\n        assert len(stderr) == 0\n        output_provenances = stdout.splitlines()\n        self.verify_provenances(output_provenances)\n\n    def test_provenances_human(self):\n        cmd = \"provenances\"\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, \"-H\", self._tree_sequence_file]\n        )\n        assert len(stderr) == 0\n        output_provenances = stdout.splitlines()\n        # TODO Check the actual output here.\n        assert len(output_provenances) > 0\n\n    def verify_fasta(self, output_fasta):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.write_fasta(f)\n            f.seek(0)\n            fasta = f.read()\n        assert output_fasta == fasta\n\n    @pytest.mark.skip(reason=\"fasta output temporarily disabled\")\n    def test_fasta(self):\n        cmd = \"fasta\"\n        stdout, stderr = capture_output(cli.tskit_main, [cmd, self._tree_sequence_file])\n        assert len(stderr) == 0\n        self.verify_fasta(stdout)\n\n    def verify_vcf(self, output_vcf):\n        with tempfile.TemporaryFile(\"w+\") as f:\n            self._tree_sequence.write_vcf(f, allow_position_zero=True)\n            f.seek(0)\n            vcf = f.read()\n        assert output_vcf == vcf\n\n    def test_vcf(self):\n        cmd = \"vcf\"\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, \"-0\", self._tree_sequence_file]\n        )\n        assert len(stderr) == 0\n        self.verify_vcf(stdout)\n\n    def verify_info(self, ts, output_info):\n        assert str(ts) == output_info\n\n    def test_info(self):\n        cmd = \"info\"\n        stdout, stderr = capture_output(cli.tskit_main, [cmd, self._tree_sequence_file])\n        assert len(stderr) == 0\n        ts = tskit.load(self._tree_sequence_file)\n        self.verify_info(ts, stdout[:-1])\n\n    def test_trees_no_draw(self):\n        cmd = \"trees\"\n        stdout, stderr = capture_output(cli.tskit_main, [cmd, self._tree_sequence_file])\n        assert len(stderr) == 0\n        ts = tskit.load(self._tree_sequence_file)\n        assert len(stdout.splitlines()) == 3 * ts.num_trees\n\n    def test_trees_draw(self):\n        cmd = \"trees\"\n        stdout, stderr = capture_output(\n            cli.tskit_main, [cmd, \"-d\", self._tree_sequence_file]\n        )\n        assert len(stderr) == 0\n        ts = tskit.load(self._tree_sequence_file)\n        assert len(stdout.splitlines()) > 3 * ts.num_trees\n\n\nclass TestVCFZeroPosition:\n    \"\"\"\n    Tests that we can write VCF files with position 0.\n    \"\"\"\n\n    def test_zero_position(self, tmp_path):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=1)\n        ts.dump(tmp_path / \"test.trees\")\n        with pytest.raises(ValueError):\n            capture_output(cli.tskit_main, [\"vcf\", str(tmp_path / \"test.trees\")])\n        stdout, stderr = capture_output(\n            cli.tskit_main, [\"vcf\", \"-0\", str(tmp_path / \"test.trees\")]\n        )\n        assert len(stderr) == 0\n\n\nclass TestBadFile:\n    \"\"\"\n    Tests that we deal with IO errors appropriately.\n    \"\"\"\n\n    def verify(self, command):\n        with mock.patch(\"sys.exit\", side_effect=TestException) as mocked_exit:\n            with pytest.raises(TestException):\n                capture_output(cli.tskit_main, [\"info\", \"/no/such/file\"])\n            mocked_exit.assert_called_once_with(\n                \"Load error: [Errno 2] No such file or directory: '/no/such/file'\"\n            )\n\n    def test_info(self):\n        self.verify(\"info\")\n\n    def test_fasta(self):\n        self.verify(\"fasta\")\n\n    def test_vcf(self):\n        self.verify(\"vcf\")\n\n    def test_nodes(self):\n        self.verify(\"nodes\")\n\n    def test_edges(self):\n        self.verify(\"edges\")\n\n    def test_sites(self):\n        self.verify(\"sites\")\n\n    def test_mutations(self):\n        self.verify(\"mutations\")\n\n    def test_migrations(self):\n        self.verify(\"migrations\")\n\n    def test_provenances(self):\n        self.verify(\"provenances\")\n"
  },
  {
    "path": "python/tests/test_coalrate.py",
    "content": "# MIT License\n#\n# Copyright (c) 2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for coalescence rate calculation in tskit.\n\"\"\"\n\nimport itertools\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests\nimport tskit\nfrom tests import tsutil\n\n\ndef _single_tree_example(L, T):\n    \"\"\"\n    For testing numerical issues with sequence scaling\n    \"\"\"\n    tables = tskit.TableCollection(sequence_length=L)\n    tables.nodes.set_columns(\n        time=np.array([0.0] * 8 + [0.1, 0.2, 0.2, 0.6, 0.8, 1.0]) * T,\n        flags=np.repeat([1, 0], [8, 6]).astype(\"uint32\"),\n    )\n    tables.edges.set_columns(\n        left=np.repeat([0], 13),\n        right=np.repeat([L], 13),\n        parent=np.array([8, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 13, 13], dtype=\"int32\"),\n        child=np.array([1, 2, 3, 8, 0, 7, 4, 5, 10, 6, 11, 9, 12], dtype=\"int32\"),\n    )\n    tables.populations.add_row()\n    tables.populations.add_row()\n    tables.nodes.population = np.array(\n        [0, 1, 1, 1, 0, 0, 1, 0] + [tskit.NULL] * 6, dtype=\"int32\"\n    )\n    return tables.tree_sequence()\n\n\n# --- prototype --- #\n\n\ndef _nonmissing_window_span(ts, windows):\n    num_windows = windows.size - 1\n    sequence_length = ts.sequence_length\n    missing_span = np.zeros(num_windows)\n    missing = 0.0\n    num_edges = 0\n    w = 0\n    position = tsutil.TreeIndexes(ts)\n    while position.interval.right < sequence_length:\n        position.next()\n        left, right = position.interval.left, position.interval.right\n        out_range, in_range = position.out_range, position.in_range\n        for _ in range(out_range.start, out_range.stop):  # edges_out\n            num_edges -= 1\n        for _ in range(in_range.start, in_range.stop):  # edges_out\n            num_edges += 1\n        if num_edges == 0:\n            missing += right - left\n        while w < num_windows and windows[w + 1] <= right:  # flush window\n            missing_span[w] = missing\n            missing = 0.0\n            if num_edges == 0:\n                x = max(0, right - windows[w + 1])\n                missing_span[w] -= x\n                missing += x\n            w += 1\n    window_span = np.diff(windows) - missing_span\n    return window_span\n\n\ndef _pair_coalescence_weights(\n    coalescing_pairs,\n    nodes_time,\n):\n    return coalescing_pairs\n\n\ndef _pair_coalescence_rates(\n    coalescing_pairs,\n    nodes_time,\n    time_windows,\n):\n    \"\"\"\n    Estimate pair coalescence rate from empirical CDF. `coalescing_pairs` and\n    `nodes_time` are assumed to have been aggregated into time bins (by\n    summation/averaging respectively). The terminal bin(s) use a different\n    estimator (the mean time since the start of the first terminal bin).\n    \"\"\"\n    assert time_windows.size - 1 == coalescing_pairs.size\n    assert time_windows.size - 1 == nodes_time.size\n    assert np.all(np.diff(time_windows) > 0)\n    assert np.isfinite(time_windows[0])\n    assert time_windows[-1] == np.inf\n    num_time_windows = time_windows.size - 1\n    coalescence_rate = np.full(num_time_windows, np.nan)\n    coalesced = 0.0\n    for j in np.arange(num_time_windows, 0, -1):  # find last window containing nodes\n        if not np.isnan(nodes_time[j - 1]):\n            break\n    for i in range(j):\n        a, b = time_windows[i : i + 2]\n        assert 0.0 <= coalescing_pairs[i] <= 1.0\n        if i + 1 == j:\n            coalescence_rate[i] = 1 / (nodes_time[i] - a)\n            break\n        else:\n            rate = -np.log(1 - coalescing_pairs[i] / (1 - coalesced)) / (b - a)\n            assert rate >= 0\n            coalescence_rate[i] = abs(rate)\n        coalesced += coalescing_pairs[i]\n    return coalescence_rate\n\n\ndef _pair_coalescence_quantiles(\n    coalescing_pairs,\n    nodes_time,\n    quantiles,\n):\n    \"\"\"\n    Estimate `quantiles` of the distribution of `nodes_time` weighted by\n    `coalescing_pairs`, by inverting the empirical CDF. Nodes are assumed\n    to be sorted in ascending time order.\n    \"\"\"\n    assert nodes_time.size == coalescing_pairs.size\n    assert np.all(np.diff(quantiles) > 0)\n    assert np.all(np.logical_and(0 <= quantiles, quantiles <= 1))\n    num_nodes = coalescing_pairs.size\n    num_quantiles = quantiles.size\n    output = np.full(num_quantiles, np.nan)\n    i, j = 0, 0\n    coalesced = 0.0\n    time = -np.inf\n    while i < num_nodes:\n        if coalescing_pairs[i] > 0:\n            coalesced += coalescing_pairs[i]\n            assert nodes_time[i] > time\n            time = nodes_time[i]\n            while j < num_quantiles and quantiles[j] <= coalesced:\n                output[j] = time\n                j += 1\n        i += 1\n    if quantiles[-1] == 1.0:\n        output[-1] = time\n    return output\n\n\ndef _pair_coalescence_stat(\n    ts,\n    summary_func,\n    summary_func_dim,\n    summary_func_kwargs,\n    sample_sets=None,\n    indexes=None,\n    windows=None,\n    time_windows=None,\n    span_normalise=True,\n    pair_normalise=False,\n):\n    \"\"\"\n    Apply `summary_func(node_weights, node_times, node_order, **summary_func_kwargs)` to\n    the empirical distribution of pair coalescence times for each index / window.\n    \"\"\"\n\n    if sample_sets is None:\n        sample_sets = [list(ts.samples())]\n    for s in sample_sets:\n        if len(s) == 0:\n            raise ValueError(\"Sample sets must contain at least one element\")\n        if not (min(s) >= 0 and max(s) < ts.num_nodes):\n            raise ValueError(\"Sample is out of bounds\")\n\n    drop_middle_dimension = False\n    if indexes is None:\n        drop_middle_dimension = True\n        if len(sample_sets) == 1:\n            indexes = [(0, 0)]\n        elif len(sample_sets) == 2:\n            indexes = [(0, 1)]\n        else:\n            raise ValueError(\n                \"Must specify indexes if there are more than two sample sets\"\n            )\n    for i in indexes:\n        if not len(i) == 2:\n            raise ValueError(\"Sample set indexes must be length two\")\n        if not (min(i) >= 0 and max(i) < len(sample_sets)):\n            raise ValueError(\"Sample set index is out of bounds\")\n\n    drop_left_dimension = False\n    if windows is None:\n        drop_left_dimension = True\n        windows = np.array([0.0, ts.sequence_length])\n    if not (isinstance(windows, np.ndarray) and windows.size > 1):\n        raise ValueError(\"Windows must be an array of breakpoints\")\n    if not (windows[0] == 0.0 and windows[-1] == ts.sequence_length):\n        raise ValueError(\"First and last window breaks must be sequence boundary\")\n    if not np.all(np.diff(windows) > 0):\n        raise ValueError(\"Window breaks must be strictly increasing\")\n\n    if isinstance(time_windows, str) and time_windows == \"nodes\":\n        nodes_map = np.arange(ts.num_nodes)\n        num_time_windows = ts.num_nodes\n    else:\n        if not (isinstance(time_windows, np.ndarray) and time_windows.size > 1):\n            raise ValueError(\"Time windows must be an array of breakpoints\")\n        if not np.all(np.diff(time_windows) > 0):\n            raise ValueError(\"Time windows must be strictly increasing\")\n        if ts.time_units == tskit.TIME_UNITS_UNCALIBRATED:\n            raise ValueError(\"Time windows require calibrated node times\")\n        nodes_map = np.searchsorted(time_windows, ts.nodes_time, side=\"right\") - 1\n        nodes_oob = np.logical_or(nodes_map < 0, nodes_map >= time_windows.size)\n        nodes_map[nodes_oob] = tskit.NULL\n        num_time_windows = time_windows.size - 1\n\n    num_nodes = ts.num_nodes\n    num_windows = windows.size - 1\n    num_sample_sets = len(sample_sets)\n    num_indexes = len(indexes)\n\n    edges_child = ts.edges_child\n    edges_parent = ts.edges_parent\n    nodes_time = ts.nodes_time\n    sequence_length = ts.sequence_length\n    output_size = summary_func_dim\n    samples = np.concatenate(sample_sets)\n\n    nodes_parent = np.full(num_nodes, tskit.NULL)\n    nodes_sample = np.zeros((num_nodes, num_sample_sets))\n    nodes_weight = np.zeros((num_time_windows, num_indexes))\n    nodes_values = np.zeros((num_time_windows, num_indexes))\n    coalescing_pairs = np.zeros((num_time_windows, num_indexes))\n    coalescence_time = np.zeros((num_time_windows, num_indexes))\n    output = np.zeros((num_windows, output_size, num_indexes))\n    visited = np.full(num_nodes, False)\n\n    total_pairs = np.zeros(num_indexes)\n    sizes = [len(s) for s in sample_sets]\n    for i, (j, k) in enumerate(indexes):\n        if j == k:\n            total_pairs[i] = sizes[j] * (sizes[k] - 1) / 2\n        else:\n            total_pairs[i] = sizes[j] * sizes[k]\n\n    if span_normalise:\n        window_span = _nonmissing_window_span(ts, windows)\n\n    for i, s in enumerate(sample_sets):  # initialize\n        nodes_sample[s, i] = 1\n    sample_counts = nodes_sample.copy()\n\n    w = 0\n    position = tsutil.TreeIndexes(ts)\n    while position.interval.right < sequence_length:\n        position.next()\n        left, right = position.interval.left, position.interval.right\n        out_range, in_range = position.out_range, position.in_range\n        remainder = sequence_length - left\n\n        for b in range(out_range.start, out_range.stop):  # edges_out\n            e = out_range.order[b]\n            p = edges_parent[e]\n            c = edges_child[e]\n            nodes_parent[c] = tskit.NULL\n            inside = sample_counts[c]\n            while p != tskit.NULL:\n                u = nodes_map[p]\n                t = nodes_time[p]\n                if u != tskit.NULL:\n                    outside = sample_counts[p] - sample_counts[c] - nodes_sample[p]\n                    for i, (j, k) in enumerate(indexes):\n                        weight = inside[j] * outside[k]\n                        if j != k:\n                            weight += inside[k] * outside[j]\n                        coalescing_pairs[u, i] -= weight * remainder\n                        coalescence_time[u, i] -= weight * remainder * t\n                c, p = p, nodes_parent[p]\n            p = edges_parent[e]\n            while p != tskit.NULL:\n                sample_counts[p] -= inside\n                p = nodes_parent[p]\n\n        for b in range(in_range.start, in_range.stop):  # edges_in\n            e = in_range.order[b]\n            p = edges_parent[e]\n            c = edges_child[e]\n            nodes_parent[c] = p\n            inside = sample_counts[c]\n            while p != tskit.NULL:\n                sample_counts[p] += inside\n                p = nodes_parent[p]\n            p = edges_parent[e]\n            while p != tskit.NULL:\n                u = nodes_map[p]\n                t = nodes_time[p]\n                if u != tskit.NULL:\n                    outside = sample_counts[p] - sample_counts[c] - nodes_sample[p]\n                    for i, (j, k) in enumerate(indexes):\n                        weight = inside[j] * outside[k]\n                        if j != k:\n                            weight += inside[k] * outside[j]\n                        coalescing_pairs[u, i] += weight * remainder\n                        coalescence_time[u, i] += weight * remainder * t\n                c, p = p, nodes_parent[p]\n\n        while w < num_windows and windows[w + 1] <= right:  # flush window\n            remainder = sequence_length - windows[w + 1]\n            nodes_weight[:] = coalescing_pairs[:]\n            nodes_values[:] = coalescence_time[:]\n            coalescing_pairs[:] = 0.0\n            coalescence_time[:] = 0.0\n            for c in samples:\n                p = nodes_parent[c]\n                while not visited[c] and p != tskit.NULL:\n                    u = nodes_map[p]\n                    t = nodes_time[p]\n                    if u != tskit.NULL:\n                        inside = sample_counts[c]\n                        outside = sample_counts[p] - sample_counts[c] - nodes_sample[p]\n                        for i, (j, k) in enumerate(indexes):\n                            weight = inside[j] * outside[k]\n                            if j != k:\n                                weight += inside[k] * outside[j]\n                            x = weight * remainder / 2\n                            nodes_weight[u, i] -= x\n                            nodes_values[u, i] -= t * x\n                            coalescing_pairs[u, i] += x\n                            coalescence_time[u, i] += t * x\n                    visited[c] = True\n                    p, c = nodes_parent[p], p\n            for c in samples:\n                p = nodes_parent[c]\n                while visited[c] and p != tskit.NULL:\n                    visited[c] = False\n                    p, c = nodes_parent[p], p\n            for i in range(num_indexes):  # normalise values\n                nonzero = nodes_weight[:, i] > 0\n                nodes_values[nonzero, i] /= nodes_weight[nonzero, i]\n                nodes_values[~nonzero, i] = np.nan\n            if span_normalise:\n                nodes_weight /= window_span[w]\n            if pair_normalise:\n                nodes_weight /= total_pairs[np.newaxis, :]\n            for i in range(num_indexes):  # apply function to empirical distribution\n                output[w, :, i] = summary_func(\n                    nodes_weight[:, i],\n                    nodes_values[:, i],\n                    **summary_func_kwargs,\n                )\n            w += 1\n\n    output = output.transpose(0, 2, 1)\n    if drop_middle_dimension:\n        output = output.squeeze(1)\n    if drop_left_dimension:\n        output = output.squeeze(0)\n\n    return output\n\n\ndef proto_pair_coalescence_counts(\n    ts,\n    sample_sets=None,\n    indexes=None,\n    windows=None,\n    span_normalise=True,\n    pair_normalise=False,\n    time_windows=\"nodes\",\n):\n    \"\"\"\n    Prototype for ts.pair_coalescence_counts.\n\n    Calculate the number of coalescing sample pairs per node, summed over\n    trees and weighted by tree span.\n\n    The number of coalescing pairs may be calculated within or between the\n    non-overlapping lists of samples contained in `sample_sets`. In the\n    latter case, pairs are counted if they have exactly one member in each\n    of two sample sets. If `sample_sets` is omitted, a single group\n    containing all samples is assumed.\n\n    The argument `indexes` may be used to specify which pairs of sample\n    sets to compute the statistic between, and in what order. If\n    `indexes=None`, then `indexes` is assumed to equal `[(0,0)]` for a\n    single sample set and `[(0,1)]` for two sample sets. For more than two\n    sample sets, `indexes` must be explicitly passed.\n\n    The argument `time_windows` may be used to count coalescence\n    events within time intervals (if an array of breakpoints is supplied)\n    rather than for individual nodes (the default).\n\n    The output array has dimension `(windows, indexes, nodes)` with\n    dimensions dropped when the corresponding argument is set to None.\n\n    :param list sample_sets: A list of lists of Node IDs, specifying the\n        groups of nodes to compute the statistic with, or None.\n    :param list indexes: A list of 2-tuples, or None.\n    :param list windows: An increasing list of breakpoints between the\n        sequence windows to compute the statistic in, or None.\n    :param bool span_normalise: Whether to divide the result by the span of\n        the window (defaults to True).\n    :param bool pair_normalise: Whether to divide the result by the total\n        number of pairs for a given index (defaults to False).\n    :param time_windows: Either a string \"nodes\" or an increasing\n        list of breakpoints between time intervals.\n    \"\"\"\n\n    if isinstance(time_windows, str) and time_windows == \"nodes\":\n        summary_func_dim = ts.num_nodes\n    else:\n        if not (isinstance(time_windows, np.ndarray) and time_windows.size > 1):\n            raise ValueError(\"Time windows must be an array of breakpoints\")\n        if not np.all(np.diff(time_windows) > 0):\n            raise ValueError(\"Time windows must be strictly increasing\")\n        if ts.time_units == tskit.TIME_UNITS_UNCALIBRATED:\n            raise ValueError(\"Time windows require calibrated node times\")\n        summary_func_dim = time_windows.size - 1\n\n    summary_func = _pair_coalescence_weights\n    summary_func_kwargs = {}\n\n    return _pair_coalescence_stat(\n        ts,\n        summary_func=summary_func,\n        summary_func_dim=summary_func_dim,\n        summary_func_kwargs=summary_func_kwargs,\n        sample_sets=sample_sets,\n        indexes=indexes,\n        windows=windows,\n        time_windows=time_windows,\n        span_normalise=span_normalise,\n        pair_normalise=pair_normalise,\n    )\n\n\ndef proto_pair_coalescence_rates(\n    ts,\n    time_windows,\n    sample_sets=None,\n    indexes=None,\n    windows=None,\n):\n    r\"\"\"\n    Prototype for ts.pair_coalescence_rates.\n\n    Estimate the rate at which pairs of samples coalesce within time windows,\n    from the empirical CDF of pair coalescence times.  Assuming that pair\n    coalescence events follow a nonhomogeneous Poisson process, the empirical\n    rate for a time window :math:`[a, b)` where `ecdf(b) < 1` is,\n\n    ..math:\n\n        log(1 - \\frac{ecdf(b) - ecdf(a)}{1 - ecdf(a)}) / (a - b)\n\n    If the last coalescence event is within `[a, b)` so that `ecdf(b) = 1`, then\n    an estimate of the empirical rate is\n\n    ..math:\n\n        (\\mathbb{E}[t | t > a] - a)^{-1}\n\n    where :math:`\\mathbb{E}[t | t < a]` is the average pair coalescence time\n    conditional on coalescence after the start of the last epoch.\n\n    The first breakpoint in `time_windows` must start at the age of the\n    samples, and the last must end at infinity.\n\n    Pair coalescence rates may be calculated within or between the\n    non-overlapping lists of samples contained in `sample_sets`. In the\n    latter case, pairs are counted if they have exactly one member in each\n    of two sample sets. If `sample_sets` is omitted, a single group\n    containing all samples is assumed.\n\n    The argument `indexes` may be used to specify which pairs of sample\n    sets to compute the statistic between, and in what order. If\n    `indexes=None`, then `indexes` is assumed to equal `[(0,0)]` for a\n    single sample set and `[(0,1)]` for two sample sets. For more than two\n    sample sets, `indexes` must be explicitly passed.\n\n    The output array has dimension `(windows, indexes, time_windows)` with\n    dimensions dropped when the corresponding argument is set to None.\n\n    :param time_windows: An increasing list of breakpoints between time\n        intervals, starting at the age of the samples and ending at\n        infinity.\n    :param list sample_sets: A list of lists of Node IDs, specifying the\n        groups of nodes to compute the statistic with, or None.\n    :param list indexes: A list of 2-tuples, or None.\n    :param list windows: An increasing list of breakpoints between the\n        sequence windows to compute the statistic in, or None.\n    \"\"\"\n    # TODO^^^\n\n    if not (isinstance(time_windows, np.ndarray) and time_windows.size > 1):\n        raise ValueError(\"Time windows must be an array of breakpoints\")\n    if not np.all(np.diff(time_windows) > 0):\n        raise ValueError(\"Time windows must be strictly increasing\")\n    if ts.time_units == tskit.TIME_UNITS_UNCALIBRATED:\n        raise ValueError(\"Time windows require calibrated node times\")\n\n    summary_func = _pair_coalescence_rates\n    summary_func_dim = time_windows.size - 1\n    summary_func_kwargs = {\"time_windows\": time_windows}\n\n    return _pair_coalescence_stat(\n        ts,\n        summary_func=summary_func,\n        summary_func_dim=summary_func_dim,\n        summary_func_kwargs=summary_func_kwargs,\n        sample_sets=sample_sets,\n        indexes=indexes,\n        windows=windows,\n        time_windows=time_windows,\n        span_normalise=True,\n        pair_normalise=True,\n    )\n\n\ndef proto_pair_coalescence_quantiles(\n    ts,\n    quantiles,\n    sample_sets=None,\n    indexes=None,\n    windows=None,\n):\n    \"\"\"\n    Prototype for ts.pair_coalescence_quantiles.\n\n    Estimate quantiles of pair coalescence times by inverting the empirical\n    CDF. This is equivalent to the \"inverted_cdf\" method of `numpy.quantile`\n    applied to node times, with weights proportional to the number of\n    coalescing pairs per node (averaged over trees). The weights are calculated\n    using `pair_coalescence_counts`.\n\n    Quantiles of pair coalescence times may be calculated within or\n    between the non-overlapping lists of samples contained in `sample_sets`. In\n    the latter case, pairs are counted if they have exactly one member in each\n    of two sample sets. If `sample_sets` is omitted, a single group containing\n    all samples is assumed.\n\n    The argument `indexes` may be used to specify which pairs of sample sets to\n    compute coalescences between, and in what order. If `indexes=None`, then\n    `indexes` is assumed to equal `[(0,0)]` for a single sample set and\n    `[(0,1)]` for two sample sets. For more than two sample sets, `indexes`\n    must be explicitly passed.\n\n    The output array has dimension `(windows, indexes, quantiles)` with\n    dimensions dropped when the corresponding argument is set to None.\n\n    :param quantiles: A list of breakpoints between [0, 1].\n    :param list sample_sets: A list of lists of Node IDs, specifying the\n        groups of nodes to compute the statistic with, or None.\n    :param list indexes: A list of 2-tuples, or None.\n    :param list windows: An increasing list of breakpoints between the\n        sequence windows to compute the statistic in, or None.\n    \"\"\"\n\n    if not isinstance(quantiles, np.ndarray):\n        raise ValueError(\"Quantiles must be an array of breakpoints\")\n    if not np.all(np.logical_and(quantiles >= 0, quantiles <= 1.0)):\n        raise ValueError(\"Quantiles must be in [0, 1]\")\n\n    summary_func = _pair_coalescence_quantiles\n    summary_func_dim = quantiles.size\n    summary_func_kwargs = {\"quantiles\": quantiles}\n    time_windows = np.append(\n        np.unique(ts.nodes_time), np.inf\n    )  # sort nodes in time order\n\n    return _pair_coalescence_stat(\n        ts,\n        summary_func=summary_func,\n        summary_func_dim=summary_func_dim,\n        summary_func_kwargs=summary_func_kwargs,\n        sample_sets=sample_sets,\n        indexes=indexes,\n        windows=windows,\n        time_windows=time_windows,\n        span_normalise=True,\n        pair_normalise=True,\n    )\n\n\n# --- testing --- #\n\n\ndef naive_pair_coalescence_counts(ts, sample_set_0, sample_set_1):\n    \"\"\"\n    Naive implementation of ts.pair_coalescence_counts.\n\n    Count pairwise coalescences tree by tree, by enumerating nodes in each\n    tree. For a binary node, the number of pairs of samples that coalesce in a\n    given node is the product of the number of samples subtended by the left\n    and right child. For higher arities, the count is summed over all possible\n    pairs of children.\n    \"\"\"\n    output = np.zeros(ts.num_nodes)\n    for t in ts.trees():\n        sample_counts = np.zeros((ts.num_nodes, 2), dtype=np.int32)\n        pair_counts = np.zeros(ts.num_nodes)\n        for p in t.postorder():\n            samples = list(t.samples(p))\n            sample_counts[p, 0] = np.intersect1d(samples, sample_set_0).size\n            sample_counts[p, 1] = np.intersect1d(samples, sample_set_1).size\n            for i, j in itertools.combinations(t.children(p), 2):\n                pair_counts[p] += sample_counts[i, 0] * sample_counts[j, 1]\n                pair_counts[p] += sample_counts[i, 1] * sample_counts[j, 0]\n        output += pair_counts * t.span\n    return output\n\n\ndef _numpy_weighted_quantile(values, weights, quantiles):\n    \"\"\"\n    Requires numpy 2.0. Enforcing `weights > 0` avoids odd behaviour where\n    numpy assigns the 0th quantile to the sample minimum, even if this minimum\n    has zero weight.\n    \"\"\"\n    assert np.all(weights >= 0.0)\n    return np.quantile(\n        values[weights > 0],\n        quantiles,\n        weights=weights[weights > 0] / weights.sum(),\n        method=\"inverted_cdf\",\n    )\n\n\ndef _numpy_hazard_rate(values, weights, breaks):\n    \"\"\"\n    Estimate hazard rate from empirical CDF over intervals\n    \"\"\"\n    assert np.all(weights >= 0)\n    assert np.all(np.diff(breaks) >= 0)\n    assert np.isfinite(breaks[0])  # should equal sample time\n    assert ~np.isfinite(breaks[-1])\n    assert np.sum(weights) < 1.0 or np.isclose(np.sum(weights), 1.0)\n    values = values[weights > 0]\n    weights = weights[weights > 0]\n    assert breaks[0] < np.min(values)\n    max_value = np.max(values)\n    rates = np.full(breaks.size - 1, np.nan)\n    for i, (a, b) in enumerate(zip(breaks[:-1], breaks[1:])):\n        if a < max_value <= b:  # terminal window\n            keep = values >= a\n            mean = np.sum(values[keep] * weights[keep]) / np.sum(weights[keep])\n            rates[i] = 1.0 / (mean - a)\n            break\n        else:\n            wa = np.sum(weights[values < a])\n            wb = np.sum(weights[values < b])\n            rates[i] = np.log(1 - (wb - wa) / (1 - wa)) / (b - a)\n            assert rates[i] <= 0.0\n            rates[i] = abs(rates[i])\n    return rates\n\n\ndef convert_to_nonsuccinct(ts):\n    \"\"\"\n    Give the edges and internal nodes in each tree distinct IDs\n    \"\"\"\n    tables = tskit.TableCollection(sequence_length=ts.sequence_length)\n    for _ in range(ts.num_populations):\n        tables.populations.add_row()\n    nodes_count = 0\n    for n in ts.samples():\n        tables.nodes.add_row(\n            time=ts.nodes_time[n],\n            flags=ts.nodes_flags[n],\n            population=ts.nodes_population[n],\n        )\n        nodes_count += 1\n    for t in ts.trees():\n        nodes_map = {n: n for n in ts.samples()}\n        for n in t.nodes():\n            if t.num_samples(n) > 1:\n                tables.nodes.add_row(\n                    time=ts.nodes_time[n],\n                    flags=ts.nodes_flags[n],\n                    population=ts.nodes_population[n],\n                )\n                nodes_map[n] = nodes_count\n                nodes_count += 1\n        for n in t.nodes():\n            if t.edge(n) != tskit.NULL:\n                tables.edges.add_row(\n                    parent=nodes_map[t.parent(n)],\n                    child=nodes_map[n],\n                    left=t.interval.left,\n                    right=t.interval.right,\n                )\n    tables.sort()\n    ts_unroll = tables.tree_sequence()\n    assert nodes_count == ts_unroll.num_nodes\n    return ts_unroll\n\n\nclass TestCoalescingPairsOneTree:\n    \"\"\"\n    Test against worked example (single tree)\n    \"\"\"\n\n    def example_ts(self):\n        \"\"\"\n        10.0┊         13      ┊\n            ┊       ┏━━┻━━┓   ┊\n         8.0┊      12     ┃   ┊\n            ┊     ┏━┻━┓   ┃   ┊\n         6.0┊    11   ┃   ┃   ┊\n            ┊  ┏━━╋━┓ ┃   ┃   ┊\n         2.0┊ 10  ┃ ┃ ┃   9   ┊\n            ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓ ┊\n         1.0┊ ┃ ┃ ┃ ┃ ┃  8  ┃ ┊\n            ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃ ┊\n         0.0┊ 0 7 4 5 6 1 2 3 ┊\n            ┊ A A A A B B B B ┊\n        \"\"\"\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.nodes.set_columns(\n            time=np.array([0] * 8 + [1, 2, 2, 6, 8, 10]),\n            flags=np.repeat([1, 0], [8, 6]).astype(\"uint32\"),\n        )\n        tables.edges.set_columns(\n            left=np.repeat([0], 13),\n            right=np.repeat([100], 13),\n            parent=np.array(\n                [8, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 13, 13], dtype=\"int32\"\n            ),\n            child=np.array([1, 2, 3, 8, 0, 7, 4, 5, 10, 6, 11, 9, 12], dtype=\"int32\"),\n        )\n        tables.populations.add_row()\n        tables.populations.add_row()\n        tables.nodes.population = np.array(\n            [0, 1, 1, 1, 0, 0, 1, 0] + [tskit.NULL] * 6, dtype=\"int32\"\n        )\n        return tables.tree_sequence()\n\n    def test_total_pairs(self):\n        \"\"\"\n        ┊         15 pairs ┊\n        ┊       ┏━━┻━━┓    ┊\n        ┊       4     ┃    ┊\n        ┊     ┏━┻━┓   ┃    ┊\n        ┊     5   ┃   ┃    ┊\n        ┊  ┏━━╋━┓ ┃   ┃    ┊\n        ┊  1  ┃ ┃ ┃   2    ┊\n        ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊\n        ┊ ┃ ┃ ┃ ┃ ┃  1  ┃  ┊\n        ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊\n        ┊ 0 0 0 0 0 0 0 0  ┊\n        \"\"\"\n        ts = self.example_ts()\n        check = np.array([0.0] * 8 + [1, 2, 1, 5, 4, 15])\n        implm = ts.pair_coalescence_counts()\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts)\n        np.testing.assert_allclose(proto, check)\n\n    def test_population_pairs(self):\n        \"\"\"\n        ┊ AA       0 pairs ┊ AB      12 pairs ┊ BB       3 pairs ┊\n        ┊       ┏━━┻━━┓    ┊       ┏━━┻━━┓    ┊       ┏━━┻━━┓    ┊\n        ┊       0     ┃    ┊       4     ┃    ┊       0     ┃    ┊\n        ┊     ┏━┻━┓   ┃    ┊     ┏━┻━┓   ┃    ┊     ┏━┻━┓   ┃    ┊\n        ┊     5   ┃   ┃    ┊     0   ┃   ┃    ┊     0   ┃   ┃    ┊\n        ┊  ┏━━╋━┓ ┃   ┃    ┊  ┏━━╋━┓ ┃   ┃    ┊  ┏━━╋━┓ ┃   ┃    ┊\n        ┊  1  ┃ ┃ ┃   0    ┊  0  ┃ ┃ ┃   0    ┊  0  ┃ ┃ ┃   2    ┊\n        ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊\n        ┊ ┃ ┃ ┃ ┃ ┃  0  ┃  ┊ ┃ ┃ ┃ ┃ ┃  0  ┃  ┊ ┃ ┃ ┃ ┃ ┃  1  ┃  ┊\n        ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊\n        ┊ A A A A B B B B  ┊ A A A A B B B B  ┊ A A A A B B B B  ┊\n        \"\"\"\n        ts = self.example_ts()\n        ss0 = np.flatnonzero(ts.nodes_population == 0)\n        ss1 = np.flatnonzero(ts.nodes_population == 1)\n        indexes = [(0, 0), (0, 1), (1, 1)]\n        implm = ts.pair_coalescence_counts(sample_sets=[ss0, ss1], indexes=indexes)\n        check = np.full(implm.shape, np.nan)\n        check[0] = np.array([0.0] * 8 + [0, 0, 1, 5, 0, 0])\n        check[1] = np.array([0.0] * 8 + [0, 0, 0, 0, 4, 12])\n        check[2] = np.array([0.0] * 8 + [1, 2, 0, 0, 0, 3])\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, sample_sets=[ss0, ss1], indexes=indexes\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_internal_samples(self):\n        \"\"\"\n        ┊          Not     ┊         24 pairs ┊\n        ┊       ┏━━┻━━┓    ┊       ┏━━┻━━┓    ┊\n        ┊       N     ┃    ┊       5     ┃    ┊\n        ┊     ┏━┻━┓   ┃    ┊     ┏━┻━┓   ┃    ┊\n        ┊     S   ┃   ┃    ┊     5   ┃   ┃    ┊\n        ┊  ┏━━╋━┓ ┃   ┃    ┊  ┏━━╋━┓ ┃   ┃    ┊\n        ┊  N  ┃ ┃ ┃   Samp ┊  1  ┃ ┃ ┃   2    ┊\n        ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊\n        ┊ ┃ ┃ ┃ ┃ ┃  N  ┃  ┊ ┃ ┃ ┃ ┃ ┃  1  ┃  ┊\n        ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊\n        ┊ S S S S S S S S  ┊ 0 0 0 0 0 0 0 0  ┊\n        \"\"\"\n        ts = self.example_ts()\n        tables = ts.dump_tables()\n        nodes_flags = tables.nodes.flags.copy()\n        nodes_flags[9] = tskit.NODE_IS_SAMPLE\n        nodes_flags[11] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = nodes_flags\n        ts = tables.tree_sequence()\n        assert ts.num_samples == 10\n        implm = ts.pair_coalescence_counts(span_normalise=False)\n        check = np.array([0] * 8 + [1, 2, 1, 5, 5, 24]) * ts.sequence_length\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, span_normalise=False)\n        np.testing.assert_allclose(proto, check)\n\n    def test_windows(self):\n        ts = self.example_ts()\n        check = np.array([0.0] * 8 + [1, 2, 1, 5, 4, 15]) * ts.sequence_length / 2\n        implm = ts.pair_coalescence_counts(\n            windows=np.linspace(0, ts.sequence_length, 3), span_normalise=False\n        )\n        np.testing.assert_allclose(implm[0], check)\n        np.testing.assert_allclose(implm[1], check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, windows=np.linspace(0, ts.sequence_length, 3), span_normalise=False\n        )\n        np.testing.assert_allclose(proto[0], check)\n        np.testing.assert_allclose(proto[1], check)\n\n    def test_time_windows(self):\n        \"\"\"\n           ┊         15 pairs ┊\n           ┊       ┏━━┻━━┓    ┊\n           ┊       4     ┃    ┊\n        7.0┊-----┏━┻━┓---┃----┊\n           ┊     5   ┃   ┃    ┊\n        5.0┊--┏━━╋━┓-┃---┃----┊\n           ┊  1  ┃ ┃ ┃   2    ┊\n           ┊ ┏┻┓ ┃ ┃ ┃  ┏┻━┓  ┊\n           ┊ ┃ ┃ ┃ ┃ ┃  1  ┃  ┊\n           ┊ ┃ ┃ ┃ ┃ ┃ ┏┻┓ ┃  ┊\n        0.0┊ 0 0 0 0 0 0 0 0  ┊\n        \"\"\"\n        ts = self.example_ts()\n        time_windows = np.array([0.0, 5.0, 7.0, np.inf])\n        check = np.array([4, 5, 19]) * ts.sequence_length\n        implm = ts.pair_coalescence_counts(\n            span_normalise=False, time_windows=time_windows\n        )\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, span_normalise=False, time_windows=time_windows\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_pair_normalise(self):\n        ts = self.example_ts()\n        ss0 = np.flatnonzero(ts.nodes_population == 0)\n        ss1 = np.flatnonzero(ts.nodes_population == 1)\n        indexes = [(0, 0), (0, 1), (1, 1)]\n        implm = ts.pair_coalescence_counts(\n            sample_sets=[ss0, ss1],\n            indexes=indexes,\n            pair_normalise=True,\n        )\n        check = np.full(implm.shape, np.nan)\n        check[0] = np.array([0.0] * 8 + [0, 0, 1, 5, 0, 0])\n        check[1] = np.array([0.0] * 8 + [0, 0, 0, 0, 4, 12])\n        check[2] = np.array([0.0] * 8 + [1, 2, 0, 0, 0, 3])\n        total_pairs = np.array([6, 16, 6])\n        check /= total_pairs[:, np.newaxis]\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts,\n            sample_sets=[ss0, ss1],\n            indexes=indexes,\n            pair_normalise=True,\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_multiple_roots(self):\n        ts = self.example_ts().decapitate(6.0)\n        implm = ts.pair_coalescence_counts(pair_normalise=True)\n        total_pairs = ts.num_samples * (ts.num_samples - 1) / 2\n        check = np.array([0.0] * 8 + [1, 2, 1, 5, 0, 0, 0, 0])\n        check /= total_pairs\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, pair_normalise=True)\n        np.testing.assert_allclose(proto, check)\n\n\nclass TestCoalescingPairsTwoTree:\n    \"\"\"\n    Test against worked example (two trees)\n    \"\"\"\n\n    def example_ts(self, S, L):\n        \"\"\"\n           0         S         L\n        4.0┊   7     ┊   7     ┊\n           ┊ ┏━┻━┓   ┊ ┏━┻━┓   ┊\n        3.0┊ ┃   6   ┊ ┃   ┃   ┊\n           ┊ ┃ ┏━┻┓  ┊ ┃   ┃   ┊\n        2.0┊ ┃ ┃  5  ┊ ┃   5   ┊\n           ┊ ┃ ┃ ┏┻┓ ┊ ┃  ┏┻━┓ ┊\n        1.0┊ ┃ ┃ ┃ ┃ ┊ ┃  4  ┃ ┊\n           ┊ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┊\n        0.0┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n             A A B B   A A B B\n        \"\"\"\n        tables = tskit.TableCollection(sequence_length=L)\n        tables.nodes.set_columns(\n            time=np.array([0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0]),\n            flags=np.array([1, 1, 1, 1, 0, 0, 0, 0], dtype=\"uint32\"),\n        )\n        tables.edges.set_columns(\n            left=np.array([S, S, 0, 0, S, 0, 0, 0, S, 0]),\n            right=np.array([L, L, S, L, L, S, S, L, L, S]),\n            parent=np.array([4, 4, 5, 5, 5, 6, 6, 7, 7, 7], dtype=\"int32\"),\n            child=np.array([1, 2, 2, 3, 4, 1, 5, 0, 5, 6], dtype=\"int32\"),\n        )\n        return tables.tree_sequence()\n\n    def test_total_pairs(self):\n        \"\"\"\n        ┊   3 pairs   3     ┊\n        ┊ ┏━┻━┓     ┏━┻━┓   ┊\n        ┊ ┃   2     ┃   ┃   ┊\n        ┊ ┃ ┏━┻┓    ┃   ┃   ┊\n        ┊ ┃ ┃  1    ┃   2   ┊\n        ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃  1  ┃ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊\n        ┊ 0 0 0 0   0 0 0 0 ┊\n        0         S         L\n        \"\"\"\n        L, S = 1e8, 1.0\n        ts = self.example_ts(S, L)\n        implm = ts.pair_coalescence_counts(span_normalise=False)\n        check = np.array([0] * 4 + [1 * (L - S), 2 * (L - S) + 1 * S, 2 * S, 3 * L])\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, span_normalise=False)\n        np.testing.assert_allclose(proto, check)\n\n    def test_population_pairs(self):\n        \"\"\"\n        ┊AA                 ┊AB                 ┊BB                 ┊\n        ┊   1 pairs   1     ┊   2 pairs   2     ┊   0 pairs   0     ┊\n        ┊ ┏━┻━┓     ┏━┻━┓   ┊ ┏━┻━┓     ┏━┻━┓   ┊ ┏━┻━┓     ┏━┻━┓   ┊\n        ┊ ┃   0     ┃   ┃   ┊ ┃   2     ┃   ┃   ┊ ┃   0     ┃   ┃   ┊\n        ┊ ┃ ┏━┻┓    ┃   ┃   ┊ ┃ ┏━┻┓    ┃   ┃   ┊ ┃ ┏━┻┓    ┃   ┃   ┊\n        ┊ ┃ ┃  0    ┃   0   ┊ ┃ ┃  0    ┃   1   ┊ ┃ ┃  1    ┃   1   ┊\n        ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃  0  ┃ ┊ ┃ ┃ ┃ ┃   ┃  1  ┃ ┊ ┃ ┃ ┃ ┃   ┃  0  ┃ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊\n        ┊ A A B B   A A B B ┊ A A B B   A A B B ┊ A A B B   A A B B ┊\n        0         S         L         S         L         S         L\n        \"\"\"\n        L, S = 1e8, 1.0\n        ts = self.example_ts(S, L)\n        indexes = [(0, 0), (0, 1), (1, 1)]\n        implm = ts.pair_coalescence_counts(\n            sample_sets=[[0, 1], [2, 3]], indexes=indexes, span_normalise=False\n        )\n        check = np.empty(implm.shape)\n        check[0] = np.array([0] * 4 + [0, 0, 0, 1 * L])\n        check[1] = np.array([0] * 4 + [1 * (L - S), 1 * (L - S), 2 * S, 2 * L])\n        check[2] = np.array([0] * 4 + [0, 1 * L, 0, 0])\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, sample_sets=[[0, 1], [2, 3]], indexes=indexes, span_normalise=False\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_internal_samples(self):\n        \"\"\"\n        ┊   Not       N     ┊   4 pairs   4     ┊\n        ┊ ┏━┻━┓     ┏━┻━┓   ┊ ┏━┻━┓     ┏━┻━┓   ┊\n        ┊ ┃   N     ┃   ┃   ┊ ┃   3     ┃   ┃   ┊\n        ┊ ┃ ┏━┻┓    ┃   ┃   ┊ ┃ ┏━┻┓    ┃   ┃   ┊\n        ┊ ┃ ┃  Samp ┃   S   ┊ ┃ ┃  1    ┃   2   ┊\n        ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃  N  ┃ ┊ ┃ ┃ ┃ ┃   ┃  1  ┃ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊\n        ┊ S S S S   S S S S ┊ 0 0 0 0   0 0 0 0 ┊\n        \"\"\"\n        L, S = 200, 100\n        ts = self.example_ts(S, L)\n        tables = ts.dump_tables()\n        nodes_flags = tables.nodes.flags.copy()\n        nodes_flags[5] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = nodes_flags\n        ts = tables.tree_sequence()\n        assert ts.num_samples == 5\n        implm = ts.pair_coalescence_counts(span_normalise=False)\n        check = np.array([0.0] * 4 + [(L - S), S + 2 * (L - S), 3 * S, 4 * L])\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, span_normalise=False)\n        np.testing.assert_allclose(proto, check)\n\n    def test_windows(self):\n        \"\"\"\n        ┊   3 pairs   3     ┊\n        ┊ ┏━┻━┓     ┏━┻━┓   ┊\n        ┊ ┃   2     ┃   ┃   ┊\n        ┊ ┃ ┏━┻┓    ┃   ┃   ┊\n        ┊ ┃ ┃  1    ┃   2   ┊\n        ┊ ┃ ┃ ┏┻┓   ┃  ┏┻━┓ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃  1  ┃ ┊\n        ┊ ┃ ┃ ┃ ┃   ┃ ┏┻┓ ┃ ┊\n        ┊ 0 0 0 0   0 0 0 0 ┊\n        0         S         L\n        \"\"\"\n        L, S = 200, 100\n        ts = self.example_ts(S, L)\n        windows = np.array(list(ts.breakpoints()))\n        check_0 = np.array([0.0] * 4 + [0, 1, 2, 3]) * S\n        check_1 = np.array([0.0] * 4 + [1, 2, 0, 3]) * (L - S)\n        implm = ts.pair_coalescence_counts(windows=windows, span_normalise=False)\n        np.testing.assert_allclose(implm[0], check_0)\n        np.testing.assert_allclose(implm[1], check_1)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, windows=windows, span_normalise=False)\n        np.testing.assert_allclose(proto[0], check_0)\n        np.testing.assert_allclose(proto[1], check_1)\n\n    def test_time_windows(self):\n        \"\"\"\n           ┊   3 pairs   3     ┊\n        3.5┊-┏━┻━┓---┊-┏━┻━┓---┊\n           ┊ ┃   2   ┊ ┃   ┃   ┊\n           ┊ ┃ ┏━┻┓  ┊ ┃   ┃   ┊\n           ┊ ┃ ┃  1  ┊ ┃   2   ┊\n        1.5┊-┃-┃-┏┻┓-┊-┃--┏┻━┓-┊\n           ┊ ┃ ┃ ┃ ┃ ┊ ┃  1  ┃ ┊\n           ┊ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┊\n        0.0┊ 0 0 0 0 ┊ 0 0 0 0 ┊\n           0         S         L\n        \"\"\"\n        L, S = 200, 100\n        ts = self.example_ts(S, L)\n        time_windows = np.array([0.0, 1.5, 3.5, np.inf])\n        windows = np.array(list(ts.breakpoints()))\n        check_0 = np.array([0.0, 3.0, 3.0]) * S\n        check_1 = np.array([1.0, 2.0, 3.0]) * (L - S)\n        implm = ts.pair_coalescence_counts(\n            span_normalise=False,\n            windows=windows,\n            time_windows=time_windows,\n        )\n        np.testing.assert_allclose(implm[0], check_0)\n        np.testing.assert_allclose(implm[1], check_1)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts,\n            span_normalise=False,\n            windows=windows,\n            time_windows=time_windows,\n        )\n        np.testing.assert_allclose(proto[0], check_0)\n        np.testing.assert_allclose(proto[1], check_1)\n\n    def test_pair_normalise(self):\n        L, S = 200, 100\n        ts = self.example_ts(S, L)\n        indexes = [(0, 0), (0, 1), (1, 1)]\n        implm = ts.pair_coalescence_counts(\n            sample_sets=[[0, 1], [2, 3]],\n            indexes=indexes,\n            span_normalise=False,\n            pair_normalise=True,\n        )\n        check = np.empty(implm.shape)\n        check[0] = np.array([0] * 4 + [0, 0, 0, 1 * L])\n        check[1] = np.array([0] * 4 + [1 * (L - S), 1 * (L - S), 2 * S, 2 * L])\n        check[2] = np.array([0] * 4 + [0, 1 * L, 0, 0])\n        total_pairs = np.array([1, 4, 1])\n        check /= total_pairs[:, np.newaxis]\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts,\n            sample_sets=[[0, 1], [2, 3]],\n            indexes=indexes,\n            span_normalise=False,\n            pair_normalise=True,\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_multiple_roots(self):\n        L, S = 200, 100\n        ts = self.example_ts(S, L).decapitate(2.0)\n        implm = ts.pair_coalescence_counts(pair_normalise=True, span_normalise=False)\n        total_pairs = ts.num_samples * (ts.num_samples - 1) / 2\n        check = np.array([0.0] * 4 + [1 * (L - S), 2 * (L - S) + 1 * S, 0, 0, 0, 0])\n        check /= total_pairs\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, pair_normalise=True, span_normalise=False\n        )\n        np.testing.assert_allclose(proto, check)\n\n\nclass TestCoalescingPairsSimulated:\n    \"\"\"\n    Test against a naive implementation on simulated data.\n    \"\"\"\n\n    @tests.cached_example\n    def example_ts(self):\n        n = 10\n        model = msprime.BetaCoalescent(alpha=1.5)  # polytomies\n        tables = msprime.sim_ancestry(\n            samples=n,\n            recombination_rate=1e-8,\n            sequence_length=1e6,\n            population_size=1e4,\n            random_seed=1024,\n            model=model,\n        ).dump_tables()\n        tables.populations.add_row(metadata={\"name\": \"foo\", \"description\": \"bar\"})\n        tables.populations.add_row(metadata={\"name\": \"bar\", \"description\": \"foo\"})\n        tables.nodes.population = np.repeat(\n            [0, 1, 2, tskit.NULL],\n            [n, n // 2, n - n // 2, tables.nodes.num_rows - 2 * n],\n        ).astype(\"int32\")\n        ts = tables.tree_sequence()\n        assert ts.num_trees > 1\n        return ts\n\n    @staticmethod\n    def _check_total_pairs(ts, windows):\n        samples = list(ts.samples())\n        implm = ts.pair_coalescence_counts(windows=windows, span_normalise=False)\n        dim = (windows.size - 1, ts.num_nodes)\n        check = np.full(dim, np.nan)\n        for w, (a, b) in enumerate(zip(windows[:-1], windows[1:])):\n            tsw = ts.keep_intervals(np.array([[a, b]]), simplify=False)\n            check[w] = naive_pair_coalescence_counts(tsw, samples, samples) / 2\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, windows=windows, span_normalise=False)\n        np.testing.assert_allclose(proto, check)\n\n    @staticmethod\n    def _check_subset_pairs(ts, windows):\n        ss0 = np.flatnonzero(ts.nodes_population == 0)\n        ss1 = np.flatnonzero(ts.nodes_population == 1)\n        idx = [(0, 1), (1, 1), (0, 0)]\n        implm = ts.pair_coalescence_counts(\n            sample_sets=[ss0, ss1], indexes=idx, windows=windows, span_normalise=False\n        )\n        dim = (windows.size - 1, len(idx), ts.num_nodes)\n        check = np.full(dim, np.nan)\n        for w, (a, b) in enumerate(zip(windows[:-1], windows[1:])):\n            tsw = ts.keep_intervals(np.array([[a, b]]), simplify=False)\n            check[w, 0] = naive_pair_coalescence_counts(tsw, ss0, ss1)\n            check[w, 1] = naive_pair_coalescence_counts(tsw, ss1, ss1) / 2\n            check[w, 2] = naive_pair_coalescence_counts(tsw, ss0, ss0) / 2\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts,\n            sample_sets=[ss0, ss1],\n            indexes=idx,\n            windows=windows,\n            span_normalise=False,\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_sequence(self):\n        ts = self.example_ts()\n        windows = np.array([0.0, ts.sequence_length])\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_missing_interval(self):\n        \"\"\"\n        test case where three segments have all samples missing\n        \"\"\"\n        ts = self.example_ts()\n        windows = np.array([0.0, ts.sequence_length])\n        intervals = np.array([[0.0, 0.1], [0.4, 0.6], [0.9, 1.0]]) * ts.sequence_length\n        ts = ts.delete_intervals(intervals)\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_missing_leaves(self):\n        \"\"\"\n        test case where 1/2 of samples are missing\n        \"\"\"\n        t = self.example_ts().dump_tables()\n        ss0 = np.flatnonzero(t.nodes.population == 0)\n        remove = np.isin(t.edges.child, ss0)\n        assert np.any(remove)\n        t.edges.set_columns(\n            left=t.edges.left[~remove],\n            right=t.edges.right[~remove],\n            parent=t.edges.parent[~remove],\n            child=t.edges.child[~remove],\n        )\n        t.sort()\n        ts = t.tree_sequence()\n        windows = np.array([0.0, ts.sequence_length])\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_multiple_roots(self):\n        \"\"\"\n        test case where all trees have multiple roots\n        \"\"\"\n        ts = self.example_ts()\n        ts = ts.decapitate(np.quantile(ts.nodes_time, 0.75))\n        windows = np.array([0.0, ts.sequence_length])\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_windows(self):\n        ts = self.example_ts()\n        windows = np.linspace(0.0, ts.sequence_length, 9)\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_windows_are_trees(self):\n        \"\"\"\n        test case where window breakpoints coincide with tree breakpoints\n        \"\"\"\n        ts = self.example_ts()\n        windows = np.array(list(ts.breakpoints()))\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_windows_inside_trees(self):\n        \"\"\"\n        test case where windows are nested within trees\n        \"\"\"\n        ts = self.example_ts()\n        windows = np.array(list(ts.breakpoints()))\n        windows = np.sort(np.append(windows[:-1] / 2 + windows[1:] / 2, windows))\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_nonsuccinct_sequence(self):\n        \"\"\"\n        test case where each tree has distinct nodes\n        \"\"\"\n        ts = convert_to_nonsuccinct(self.example_ts())\n        windows = np.linspace(0, ts.sequence_length, 9)\n        self._check_total_pairs(ts, windows)\n        self._check_subset_pairs(ts, windows)\n\n    def test_span_normalise(self):\n        \"\"\"\n        test case where span is normalised\n        \"\"\"\n        ts = self.example_ts()\n        windows = np.array([0.0, 0.33, 1.0]) * ts.sequence_length\n        window_size = np.diff(windows)\n        implm = ts.pair_coalescence_counts(windows=windows, span_normalise=False)\n        check = ts.pair_coalescence_counts(windows=windows) * window_size[:, np.newaxis]\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, windows=windows, span_normalise=False)\n        np.testing.assert_allclose(proto, check)\n\n    def test_span_normalise_with_missing_flanks(self):\n        \"\"\"\n        test case where span is normalised and there are flanking intervals without trees\n        \"\"\"\n        ts = self.example_ts()\n        missing = np.array([[0.0, 0.1], [0.8, 1.0]]) * ts.sequence_length\n        ts = ts.delete_intervals(missing)\n        windows = np.array([0.0, 0.33, 1.0]) * ts.sequence_length\n        window_size = np.diff(windows) - np.diff(missing, axis=1).flatten()\n        check = (\n            ts.pair_coalescence_counts(windows=windows, span_normalise=False)\n            / window_size[:, np.newaxis]\n        )\n        implm = ts.pair_coalescence_counts(windows=windows, span_normalise=True)\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, windows=windows, span_normalise=True)\n        np.testing.assert_allclose(proto, check)\n\n    def test_span_normalise_with_missing_interior(self):\n        \"\"\"\n        test that span normalisation correctly calculates internal missing data\n        \"\"\"\n        ts = msprime.sim_ancestry(samples=1, discrete_genome=False)\n        missing_interval = np.array([[0.3, 0.6]]) * ts.sequence_length\n        windows = np.array([0.0, 0.31, 1.0]) * ts.sequence_length\n        time_windows = np.array([0.0, np.inf])\n        ts = ts.delete_intervals(missing_interval)\n        check = np.ones(windows.size - 1)\n        implm = ts.pair_coalescence_counts(\n            windows=windows,\n            time_windows=time_windows,\n            span_normalise=True,\n        ).flatten()\n        np.testing.assert_array_almost_equal(implm, check)\n        proto = proto_pair_coalescence_counts(\n            ts,\n            windows=windows,\n            time_windows=time_windows,\n            span_normalise=True,\n        ).flatten()\n        np.testing.assert_array_almost_equal(proto, check)\n\n    def test_empty_windows(self):\n        \"\"\"\n        test that windows without nodes contain zeros\n        \"\"\"\n        ts = self.example_ts()\n        missing = np.array([[0.0, 0.1], [0.8, 1.0]]) * ts.sequence_length\n        ts = ts.delete_intervals(missing)\n        windows = np.concatenate(missing)\n        check = ts.pair_coalescence_counts(windows=windows, span_normalise=False)\n        implm = ts.pair_coalescence_counts(windows=windows, span_normalise=True)\n        np.testing.assert_allclose(check[0], 0.0)\n        np.testing.assert_allclose(check[2], 0.0)\n        np.testing.assert_allclose(implm[0], 0.0)\n        np.testing.assert_allclose(implm[2], 0.0)\n\n    def test_pair_normalise(self):\n        ts = self.example_ts()\n        windows = np.array([0.0, 0.33, 1.0]) * ts.sequence_length\n        window_size = np.diff(windows)\n        total_pairs = ts.num_samples * (ts.num_samples - 1) / 2\n        implm = ts.pair_coalescence_counts(\n            windows=windows, span_normalise=False, pair_normalise=True\n        )\n        check = ts.pair_coalescence_counts(windows=windows) * window_size[:, np.newaxis]\n        check /= total_pairs\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, windows=windows, span_normalise=False, pair_normalise=True\n        )\n        np.testing.assert_allclose(proto, check)\n\n    def test_internal_nodes_are_samples(self):\n        \"\"\"\n        test case where some samples are descendants of other samples\n        \"\"\"\n        ts = self.example_ts()\n        tables = ts.dump_tables()\n        nodes_flags = tables.nodes.flags.copy()\n        nodes_sample = np.arange(ts.num_samples, ts.num_nodes, 10)\n        nodes_flags[nodes_sample] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = nodes_flags\n        ts_modified = tables.tree_sequence()\n        assert ts_modified.num_samples > ts.num_samples\n        windows = np.linspace(0.0, 1.0, 9) * ts_modified.sequence_length\n        self._check_total_pairs(ts_modified, windows)\n        self._check_subset_pairs(ts_modified, windows)\n\n    def test_time_windows(self):\n        ts = self.example_ts()\n        total_pair_count = ts.pair_coalescence_counts(\n            time_windows=np.array([0.0, np.inf]),\n            span_normalise=False,\n        )[0]\n        samples = list(ts.samples())\n        time_windows = np.quantile(ts.nodes_time, [0.0, 0.25, 0.5, 0.75])\n        time_windows = np.append(time_windows, np.inf)\n        implm = ts.pair_coalescence_counts(\n            span_normalise=False, time_windows=time_windows\n        )\n        assert np.isclose(np.sum(implm), total_pair_count)\n        check = naive_pair_coalescence_counts(ts, samples, samples).squeeze() / 2\n        nodes_map = np.searchsorted(time_windows, ts.nodes_time, side=\"right\") - 1\n        check = np.bincount(nodes_map, weights=check)\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, span_normalise=False, time_windows=time_windows\n        )\n        assert np.isclose(np.sum(proto), total_pair_count)\n        np.testing.assert_allclose(proto, check)\n\n    def test_time_windows_truncated(self):\n        \"\"\"\n        test case where some nodes fall outside of time bins\n        \"\"\"\n        ts = self.example_ts()\n        total_pair_count = ts.pair_coalescence_counts(\n            time_windows=np.array([0.0, np.inf]),\n            span_normalise=False,\n        )[0]\n        samples = list(ts.samples())\n        time_windows = np.quantile(ts.nodes_time, [0.5, 0.75])\n        assert time_windows[0] > 0.0\n        time_windows = np.append(time_windows, np.inf)\n        implm = ts.pair_coalescence_counts(\n            span_normalise=False, time_windows=time_windows\n        )\n        assert np.sum(implm) < total_pair_count\n        check = naive_pair_coalescence_counts(ts, samples, samples).squeeze() / 2\n        nodes_map = np.searchsorted(time_windows, ts.nodes_time, side=\"right\") - 1\n        oob = np.logical_or(nodes_map < 0, nodes_map >= time_windows.size)\n        check = np.bincount(nodes_map[~oob], weights=check[~oob])\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, span_normalise=False, time_windows=time_windows\n        )\n        assert np.sum(proto) < total_pair_count\n        np.testing.assert_allclose(proto, check)\n\n    def test_time_windows_unique(self):\n        ts = self.example_ts()\n        total_pair_count = ts.pair_coalescence_counts(\n            time_windows=np.array([0.0, np.inf]),\n            span_normalise=False,\n        )[0]\n        samples = list(ts.samples())\n        time_windows = np.unique(ts.nodes_time)\n        time_windows = np.append(time_windows, np.inf)\n        implm = ts.pair_coalescence_counts(\n            span_normalise=False, time_windows=time_windows\n        )\n        assert np.isclose(np.sum(implm), total_pair_count)\n        check = naive_pair_coalescence_counts(ts, samples, samples).squeeze() / 2\n        nodes_map = np.searchsorted(time_windows, ts.nodes_time, side=\"right\") - 1\n        check = np.bincount(nodes_map, weights=check)\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, span_normalise=False, time_windows=time_windows\n        )\n        assert np.isclose(np.sum(proto), total_pair_count)\n        np.testing.assert_allclose(proto, check)\n\n    def test_diversity(self):\n        \"\"\"\n        test that weighted mean of node times equals branch diversity\n        \"\"\"\n        ts = self.example_ts()\n        windows = np.linspace(0.0, ts.sequence_length, 9)\n        check = ts.diversity(mode=\"branch\", windows=windows)\n        implm = ts.pair_coalescence_counts(windows=windows)\n        implm = 2 * (implm @ ts.nodes_time) / implm.sum(axis=1)\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(ts, windows=windows)\n        proto = 2 * (proto @ ts.nodes_time) / proto.sum(axis=1)\n        np.testing.assert_allclose(proto, check)\n\n    def test_divergence(self):\n        \"\"\"\n        test that weighted mean of node times equals branch divergence\n        \"\"\"\n        ts = self.example_ts()\n        ss0 = np.flatnonzero(ts.nodes_population == 0)\n        ss1 = np.flatnonzero(ts.nodes_population == 1)\n        windows = np.linspace(0.0, ts.sequence_length, 9)\n        check = ts.divergence(sample_sets=[ss0, ss1], mode=\"branch\", windows=windows)\n        implm = ts.pair_coalescence_counts(sample_sets=[ss0, ss1], windows=windows)\n        implm = 2 * (implm @ ts.nodes_time) / implm.sum(axis=1)\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_counts(\n            ts, sample_sets=[ss0, ss1], windows=windows\n        )\n        proto = 2 * (proto @ ts.nodes_time) / proto.sum(axis=1)\n        np.testing.assert_allclose(proto, check)\n\n\nclass TestCoalescingPairsUsage:\n    \"\"\"\n    Test invalid inputs\n    \"\"\"\n\n    @tests.cached_example\n    def example_ts(self):\n        return msprime.sim_ancestry(\n            samples=10,\n            recombination_rate=1e-8,\n            sequence_length=1e5,\n            population_size=1e4,\n            random_seed=1024,\n        )\n\n    def test_bad_windows(self):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"too small depth\"):\n            ts.pair_coalescence_counts(windows=\"whatever\")\n        with pytest.raises(ValueError, match=\"must have at least 2 elements\"):\n            ts.pair_coalescence_counts(windows=[0.0])\n        with pytest.raises(tskit.LibraryError, match=\"must be increasing list\"):\n            ts.pair_coalescence_counts(\n                windows=np.array([0.0, 0.3, 0.2, 1.0]) * ts.sequence_length\n            )\n        with pytest.raises(tskit.LibraryError, match=\"must be increasing list\"):\n            ts.pair_coalescence_counts(windows=np.array([0.0, 2.0]) * ts.sequence_length)\n\n    def test_bad_sample_sets(self):\n        ts = self.example_ts()\n        with pytest.raises(tskit.LibraryError, match=\"out of bounds\"):\n            ts.pair_coalescence_counts(sample_sets=[[0, ts.num_nodes]])\n\n    def test_bad_indexes(self):\n        ts = self.example_ts()\n        with pytest.raises(tskit.LibraryError, match=\"out of bounds\"):\n            ts.pair_coalescence_counts(indexes=[(0, 1)])\n        with pytest.raises(ValueError, match=\"must be a k x 2 array\"):\n            ts.pair_coalescence_counts(indexes=[(0, 0, 0)])\n\n    def test_no_indexes(self):\n        ts = self.example_ts()\n        ss = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]\n        with pytest.raises(ValueError, match=\"more than two sample sets\"):\n            ts.pair_coalescence_counts(sample_sets=ss)\n\n    def test_oob_samples(self):\n        ts = self.example_ts()\n        sample_sets = [np.arange(ts.num_samples + 1)]\n        with pytest.raises(tskit.LibraryError, match=\"are not samples\"):\n            ts.pair_coalescence_counts(sample_sets=sample_sets)\n\n    def test_uncalibrated_time(self):\n        tables = self.example_ts().dump_tables()\n        tables.time_units = tskit.TIME_UNITS_UNCALIBRATED\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError, match=\"require calibrated node times\"):\n            ts.pair_coalescence_counts(time_windows=np.array([0.0, np.inf]))\n\n    @pytest.mark.parametrize(\"time_windows\", [[], [0.0], [[0.0, 1.0]], \"whatever\"])\n    def test_bad_time_windows(self, time_windows):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"too small depth\"):\n            ts.pair_coalescence_counts(time_windows=\"time_windows\")\n\n    def test_unsorted_time_windows(self):\n        ts = self.example_ts()\n        time_windows = np.array([0.0, 12.0, 6.0, np.inf])\n        with pytest.raises(ValueError, match=\"monotonically increasing or decreasing\"):\n            ts.pair_coalescence_counts(time_windows=time_windows)\n\n    def test_empty_time_windows(self):\n        ts = self.example_ts()\n        time_windows = [np.max(ts.nodes_time) + 1, np.max(ts.nodes_time) + 2]\n        time_windows = np.append(time_windows, np.inf)\n        with pytest.raises(ValueError, match=\"has null values for all nodes\"):\n            ts.pair_coalescence_counts(time_windows=time_windows)\n\n    def test_output_dim(self):\n        \"\"\"\n        test that output dimensions corresponding to None arguments are dropped\n        \"\"\"\n        ts = self.example_ts()\n        ss = [[0, 1, 2], [3, 4, 5]]\n        implm = ts.pair_coalescence_counts(sample_sets=ss, windows=None, indexes=None)\n        assert implm.shape == (ts.num_nodes,)\n        windows = np.linspace(0.0, ts.sequence_length, 2)\n        implm = ts.pair_coalescence_counts(sample_sets=ss, windows=windows, indexes=None)\n        assert implm.shape == (1, ts.num_nodes)\n        indexes = [(0, 1), (1, 1)]\n        implm = ts.pair_coalescence_counts(\n            sample_sets=ss, windows=windows, indexes=indexes\n        )\n        assert implm.shape == (1, 2, ts.num_nodes)\n        implm = ts.pair_coalescence_counts(sample_sets=ss, windows=None, indexes=indexes)\n        assert implm.shape == (2, ts.num_nodes)\n\n    def test_extra_time_windows(self):\n        \"\"\"\n        test that output dimensions match number of time windows\n        and windows without nodes have zero counts\n        \"\"\"\n        ts = self.example_ts()\n        ss = [[0, 1, 2], [3, 4, 5]]\n        max_time = ts.nodes_time.max()\n        time_windows = np.linspace(0, max_time * 2, 10)\n        implm = ts.pair_coalescence_counts(\n            sample_sets=ss,\n            windows=None,\n            indexes=None,\n            time_windows=time_windows,\n        )\n        assert implm.shape == (time_windows.size - 1,)\n        max_idx = np.searchsorted(time_windows, max_time, side=\"right\")\n        np.testing.assert_allclose(implm[max_idx:], 0.0)\n\n\nclass TestPairCoalescenceQuantiles:\n    \"\"\"\n    Test quantile reduction\n    \"\"\"\n\n    @tests.cached_example\n    def example_ts(self):\n        n = 10\n        model = msprime.BetaCoalescent(alpha=1.5)  # polytomies\n        tables = msprime.sim_ancestry(\n            samples=n,\n            recombination_rate=1e-8,\n            sequence_length=1e6,\n            population_size=1e4,\n            random_seed=1024,\n            model=model,\n        ).dump_tables()\n        tables.populations.add_row(metadata={\"name\": \"foo\", \"description\": \"bar\"})\n        tables.nodes.population = np.repeat(\n            [0, 1, tskit.NULL], [n, n, tables.nodes.num_rows - 2 * n]\n        ).astype(\"int32\")\n        ts = tables.tree_sequence()\n        assert ts.num_trees > 1\n        return ts\n\n    def test_quantiles(self):\n        ts = self.example_ts()\n        quantiles = np.linspace(0, 1, 10)\n        weights = ts.pair_coalescence_counts()\n        check = _numpy_weighted_quantile(ts.nodes_time, weights, quantiles)\n        implm = ts.pair_coalescence_quantiles(quantiles)\n        np.testing.assert_allclose(implm, check)\n        # TODO: remove with prototype\n        proto = proto_pair_coalescence_quantiles(ts, quantiles=quantiles)\n        np.testing.assert_allclose(proto, check)\n\n    def test_windows(self):\n        ts = self.example_ts()\n        quantiles = np.linspace(0, 1, 10)\n        windows = np.array([0, 0.5, 1.0]) * ts.sequence_length\n        implm = ts.pair_coalescence_quantiles(quantiles, windows=windows)\n        weights = ts.pair_coalescence_counts(windows=windows)\n        check = np.empty_like(implm)\n        for i, w in enumerate(weights):\n            check[i] = _numpy_weighted_quantile(ts.nodes_time, w, quantiles)\n        np.testing.assert_allclose(implm, check)\n\n    def test_sample_sets(self):\n        ts = self.example_ts()\n        sample_sets = [\n            np.flatnonzero(ts.nodes_population[: ts.num_samples] == i) for i in range(2)\n        ]\n        quantiles = np.linspace(0, 1, 10)\n        indexes = [(0, 1)]\n        implm = ts.pair_coalescence_quantiles(\n            quantiles, sample_sets=sample_sets, indexes=indexes\n        )\n        weights = ts.pair_coalescence_counts(sample_sets=sample_sets, indexes=indexes)\n        check = _numpy_weighted_quantile(ts.nodes_time, weights.flatten(), quantiles)\n        np.testing.assert_allclose(implm.flatten(), check)\n        # check default\n        implm = ts.pair_coalescence_quantiles(quantiles, sample_sets=sample_sets)\n        np.testing.assert_allclose(implm, check)\n\n    def test_observations_are_quantiles(self):\n        \"\"\"\n        case where quantiles fall on observations\n        \"\"\"\n        ts = self.example_ts()\n        weights = ts.pair_coalescence_counts()\n        quantiles = np.unique(weights / np.sum(weights))\n        check = _numpy_weighted_quantile(ts.nodes_time, weights, quantiles)\n        implm = ts.pair_coalescence_quantiles(quantiles)\n        np.testing.assert_allclose(implm, check)\n\n    def test_errors(self):\n        ts = self.example_ts()\n        sample_sets = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]\n        quantiles = np.linspace(0, 1, 10)\n        with pytest.raises(ValueError, match=\"more than two sample sets\"):\n            ts.pair_coalescence_quantiles(quantiles, sample_sets=sample_sets)\n        tables = ts.dump_tables()\n        tables.time_units = tskit.TIME_UNITS_UNCALIBRATED\n        with pytest.raises(ValueError, match=\"require calibrated node times\"):\n            tables.tree_sequence().pair_coalescence_quantiles(quantiles=np.array([0.5]))\n\n    def test_long_sequence(self):\n        ts = _single_tree_example(L=1e8, T=10)\n        windows = np.linspace(0, ts.sequence_length, 100)\n        time_windows = np.array([0, np.inf])\n        # check that there is roundoff error present\n        weights = ts.pair_coalescence_counts(\n            windows=windows,\n            time_windows=time_windows,\n            pair_normalise=True,\n            span_normalise=True,\n        )\n        assert np.all(np.isclose(weights, 1.0))\n        assert not np.all(weights == 1.0)\n        # check that we don't error out\n        quantiles = np.linspace(0, 1, 10)\n        quants = ts.pair_coalescence_quantiles(windows=windows, quantiles=quantiles)\n        ck_quants = _numpy_weighted_quantile(\n            ts.nodes_time,\n            ts.pair_coalescence_counts(pair_normalise=True),\n            quantiles,\n        )\n        np.testing.assert_allclose(quants, np.tile(ck_quants, (windows.size - 1, 1)))\n\n    def test_empty_windows(self):\n        \"\"\"\n        test case where a window has no nodes\n        \"\"\"\n        ts = self.example_ts()\n        missing = np.array([[0.0, 0.1], [0.8, 1.0]]) * ts.sequence_length\n        ts = ts.delete_intervals(missing)\n        windows = np.concatenate(missing)\n        quantiles = np.linspace(0, 1, 10)\n        check = ts.pair_coalescence_quantiles(windows=windows, quantiles=quantiles)\n        assert np.all(np.isnan(check[0]))\n        assert np.all(np.isnan(check[2]))\n\n\nclass TestPairCoalescenceRates:\n    \"\"\"\n    Test coalescence rate reduction\n    \"\"\"\n\n    @tests.cached_example\n    def example_ts(self):\n        n = 10\n        tables = msprime.sim_ancestry(\n            samples=n,\n            recombination_rate=1e-8,\n            sequence_length=1e6,\n            population_size=1e4,\n            random_seed=1025,\n        ).dump_tables()\n        tables.populations.add_row(metadata={\"name\": \"foo\", \"description\": \"bar\"})\n        tables.nodes.population = np.repeat(\n            [0, 1, tskit.NULL], [n, n, tables.nodes.num_rows - 2 * n]\n        ).astype(\"int32\")\n        ts = tables.tree_sequence()\n        assert ts.num_trees > 1\n        return ts\n\n    def test_simulated(self):\n        ts = self.example_ts()\n        quantiles = np.linspace(0, 1, 5)\n        weights = ts.pair_coalescence_counts(pair_normalise=True)\n        breaks = _numpy_weighted_quantile(ts.nodes_time, weights, quantiles)\n        breaks[0], breaks[-1] = 0.0, np.inf\n        check = _numpy_hazard_rate(ts.nodes_time, weights, breaks)\n        implm = ts.pair_coalescence_rates(breaks)\n        np.testing.assert_allclose(implm, check)\n\n    def test_windowed(self):\n        ts = self.example_ts()\n        quantiles = np.linspace(0, 1, 5)\n        weights = ts.pair_coalescence_counts(pair_normalise=True)\n        breaks = _numpy_weighted_quantile(ts.nodes_time, weights, quantiles)\n        breaks[0], breaks[-1] = 0.0, np.inf\n        windows = np.linspace(0, ts.sequence_length, 4)\n        implm = ts.pair_coalescence_rates(breaks, windows=windows)\n        check = np.empty_like(implm)\n        weights = ts.pair_coalescence_counts(pair_normalise=True, windows=windows)\n        for i, w in enumerate(weights):\n            check[i] = _numpy_hazard_rate(ts.nodes_time, w, breaks)\n        np.testing.assert_allclose(implm, check)\n\n    def test_truncated(self):\n        ts = self.example_ts()\n        max_time = np.max(ts.nodes_time)\n        breaks = np.array([0.0, 0.5, 1.0, 2, np.inf]) * np.ceil(max_time)\n        weights = ts.pair_coalescence_counts(pair_normalise=True)\n        check = _numpy_hazard_rate(ts.nodes_time, weights, breaks)\n        implm = ts.pair_coalescence_rates(breaks)\n        np.testing.assert_allclose(implm, check)\n\n    def test_empty(self):\n        ts = self.example_ts()\n        i = ts.num_nodes // 2\n        assert ts.nodes_time[i] < ts.nodes_time[i + 1]\n        empty_time_window = [\n            ts.nodes_time[i] * 0.75 + ts.nodes_time[i + 1] * 0.25,\n            ts.nodes_time[i] * 0.25 + ts.nodes_time[i + 1] * 0.75,\n        ]\n        max_time = np.max(ts.nodes_time)\n        breaks = np.array([0.0, *empty_time_window, max_time + 1, max_time + 2, np.inf])\n        weights = ts.pair_coalescence_counts(pair_normalise=True)\n        check = _numpy_hazard_rate(ts.nodes_time, weights, breaks)\n        implm = ts.pair_coalescence_rates(breaks)\n        np.testing.assert_allclose(implm, check)\n\n    def test_single(self):\n        ts = self.example_ts()\n        breaks = np.array([0.0, np.inf])\n        indexes = [(0, 0)]\n        weights = ts.pair_coalescence_counts(pair_normalise=True)\n        check = _numpy_hazard_rate(ts.nodes_time, weights, breaks).reshape(-1, 1)\n        implm = ts.pair_coalescence_rates(breaks, indexes=indexes)\n        np.testing.assert_allclose(implm, check)\n\n    def test_indexes(self):\n        ts = self.example_ts()\n        breaks = np.array([0.0, np.inf])\n        sample_sets = [[0, 1, 2], [3, 4, 5]]\n        weights = ts.pair_coalescence_counts(\n            sample_sets=sample_sets, pair_normalise=True\n        )\n        check = _numpy_hazard_rate(ts.nodes_time, weights, breaks)\n        implm = ts.pair_coalescence_rates(breaks, sample_sets=sample_sets)\n        np.testing.assert_allclose(implm, check)\n\n    def test_errors(self):\n        ts = self.example_ts()\n        sample_sets = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]\n        time_windows = np.array([0, np.inf])\n        with pytest.raises(ValueError, match=\"more than two sample sets\"):\n            ts.pair_coalescence_rates(time_windows, sample_sets=sample_sets)\n        tables = ts.dump_tables()\n        tables.time_units = tskit.TIME_UNITS_UNCALIBRATED\n        with pytest.raises(ValueError, match=\"require calibrated node times\"):\n            tables.tree_sequence().pair_coalescence_rates(\n                time_windows=np.array([0.0, np.inf])\n            )\n\n    def test_long_sequence(self):\n        ts = _single_tree_example(L=1e8, T=10)\n        windows = np.linspace(0, ts.sequence_length, 100)\n        time_windows = np.array([0, np.inf])\n        # check that there is roundoff error present\n        weights = ts.pair_coalescence_counts(\n            windows=windows,\n            time_windows=time_windows,\n            pair_normalise=True,\n            span_normalise=True,\n        )\n        assert np.all(np.isclose(weights, 1.0))\n        assert not np.all(weights == 1.0)\n        # check that we don't error out\n        rates = ts.pair_coalescence_rates(windows=windows, time_windows=time_windows)\n        ck_rates = _numpy_hazard_rate(\n            ts.nodes_time,\n            ts.pair_coalescence_counts(pair_normalise=True),\n            time_windows,\n        )\n        np.testing.assert_allclose(\n            rates.flatten(), np.repeat(ck_rates, windows.size - 1)\n        )\n\n    def test_extra_time_windows(self):\n        \"\"\"\n        test that output dimensions match number of time windows\n        and windows without nodes have NaN rates\n        \"\"\"\n        ts = self.example_ts()\n        ss = [[0, 1, 2], [3, 4, 5]]\n        max_time = ts.nodes_time.max()\n        time_windows = np.append(np.linspace(0, max_time * 2, 10), np.inf)\n        implm = ts.pair_coalescence_rates(\n            time_windows,\n            sample_sets=ss,\n            windows=None,\n            indexes=None,\n        )\n        assert implm.shape == (time_windows.size - 1,)\n        max_idx = np.searchsorted(time_windows, max_time, side=\"right\")\n        assert np.all(np.isnan(implm[max_idx:]))\n\n    def test_missing_sequence(self):\n        \"\"\"\n        test that missing intervals are ignored when calculating rates\n        \"\"\"\n        ts = self.example_ts()\n        missing = np.array([[0.0, 0.1], [0.9, 1.0]]) * ts.sequence_length\n        ts = ts.delete_intervals(missing)\n        windows = np.array([0.0, 0.5, 1.0]) * ts.sequence_length\n        ts_trim = ts.trim()\n        windows_trim = np.array([0.0, 0.5, 1.0]) * ts_trim.sequence_length\n        time_windows = np.linspace(0, ts.nodes_time.max() * 2, 10)\n        time_windows[-1] = np.inf\n        implm = ts.pair_coalescence_rates(time_windows, windows=windows)\n        check = ts_trim.pair_coalescence_rates(time_windows, windows=windows_trim)\n        np.testing.assert_allclose(implm, check)\n\n    def test_empty_windows(self):\n        \"\"\"\n        test case where a window has no nodes\n        \"\"\"\n        ts = self.example_ts()\n        missing = np.array([[0.0, 0.1], [0.8, 1.0]]) * ts.sequence_length\n        ts = ts.delete_intervals(missing)\n        windows = np.concatenate(missing)\n        time_windows = np.linspace(0, ts.nodes_time.max() * 2, 10)\n        time_windows[-1] = np.inf\n        check = ts.pair_coalescence_rates(time_windows, windows=windows)\n        assert np.all(np.isnan(check[0]))\n        assert np.all(np.isnan(check[2]))\n"
  },
  {
    "path": "python/tests/test_combinatorics.py",
    "content": "#\n# MIT License\n#\n# Copyright (c) 2020-2023 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for combinatorial algorithms.\n\"\"\"\n\nimport collections\nimport io\nimport itertools\nimport json\nimport math\nimport random\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests.test_wright_fisher as wf\nimport tskit\nimport tskit.combinatorics as comb\nfrom tests import test_stats\nfrom tskit.combinatorics import Rank, RankTree\n\n\nclass TestCombination:\n    def test_combination_with_replacement_rank_unrank(self):\n        for n in range(9):\n            for k in range(n):\n                nums = list(range(n))\n                combs = itertools.combinations_with_replacement(nums, k)\n                for exp_rank, c in enumerate(combs):\n                    c = list(c)\n                    actual_rank = comb.Combination.with_replacement_rank(c, n)\n                    assert actual_rank == exp_rank\n                    unranked = comb.Combination.with_replacement_unrank(exp_rank, n, k)\n                    assert unranked == c\n\n    def test_combination_rank_unrank(self):\n        for n in range(11):\n            for k in range(n):\n                nums = list(range(n))\n                for rank, c in enumerate(itertools.combinations(nums, k)):\n                    c = list(c)\n                    assert comb.Combination.rank(c, nums) == rank\n                    assert comb.Combination.unrank(rank, nums, k) == c\n\n    def test_combination_unrank_errors(self):\n        self.verify_unrank_errors(1, 1, 1)\n        self.verify_unrank_errors(2, 0, 1)\n\n    def verify_unrank_errors(self, rank, n, k):\n        with pytest.raises(ValueError):\n            comb.Combination.unrank(rank, list(range(n)), k)\n\n\nclass TestPartition:\n    def test_rule_asc(self):\n        self.verify_rule_asc(1, [[1]])\n        self.verify_rule_asc(2, [[1, 1], [2]])\n        self.verify_rule_asc(3, [[1, 1, 1], [1, 2], [3]])\n        self.verify_rule_asc(4, [[1, 1, 1, 1], [1, 1, 2], [1, 3], [2, 2], [4]])\n        self.verify_rule_asc(\n            5,\n            [[1, 1, 1, 1, 1], [1, 1, 1, 2], [1, 1, 3], [1, 2, 2], [1, 4], [2, 3], [5]],\n        )\n        self.verify_rule_asc(\n            6,\n            [\n                [1, 1, 1, 1, 1, 1],\n                [1, 1, 1, 1, 2],\n                [1, 1, 1, 3],\n                [1, 1, 2, 2],\n                [1, 1, 4],\n                [1, 2, 3],\n                [1, 5],\n                [2, 2, 2],\n                [2, 4],\n                [3, 3],\n                [6],\n            ],\n        )\n\n    def verify_rule_asc(self, n, partitions):\n        assert list(comb.rule_asc(n)) == partitions\n\n    def test_partitions(self):\n        assert list(comb.partitions(0)) == []\n        for n in range(1, 7):\n            assert list(comb.partitions(n)) == list(comb.rule_asc(n))[:-1]\n\n    def test_group_partition(self):\n        assert comb.group_partition([1]) == [[1]]\n        assert comb.group_partition([1, 2]) == [[1], [2]]\n        assert comb.group_partition([1, 1, 1]) == [[1, 1, 1]]\n        assert comb.group_partition([1, 1, 2, 3, 3]) == [[1, 1], [2], [3, 3]]\n\n\nclass TestRankTree:\n    @pytest.mark.parametrize(\"n\", range(11))\n    def test_num_shapes(self, n):\n        all_trees = RankTree.all_unlabelled_trees(n)\n        assert len(list(all_trees)) == comb.num_shapes(n)\n\n    @pytest.mark.parametrize(\"n\", range(2, 8))\n    def test_num_labellings(self, n):\n        for tree in RankTree.all_unlabelled_trees(n):\n            tree = tree.label_unrank(0)\n            tree2 = tree.to_tsk_tree()\n            n_labellings = sum(1 for _ in RankTree.all_labellings(tree))\n            assert n_labellings == RankTree.from_tsk_tree(tree2).num_labellings()\n\n    def test_num_labelled_trees(self):\n        # Number of leaf-labelled trees with n leaves on OEIS\n        n_trees = [0, 1, 1, 4, 26, 236, 2752, 39208]\n        for i, expected in zip(range(len(n_trees)), n_trees):\n            actual = sum(1 for _ in RankTree.all_labelled_trees(i))\n            assert actual == expected\n\n    def test_all_labelled_trees_3(self):\n        expected = [\"(0,1,2);\", \"(0,(1,2));\", \"(1,(0,2));\", \"(2,(0,1));\"]\n        actual = [t.newick() for t in RankTree.all_labelled_trees(3)]\n        assert expected == actual\n\n    def test_all_labelled_trees_4(self):\n        expected = [\n            # 1 + 1 + 1 + 1 (partition of num leaves)\n            \"(0,1,2,3);\",\n            # 1 + 1 + 2\n            \"(0,1,(2,3));\",\n            \"(0,2,(1,3));\",\n            \"(0,3,(1,2));\",\n            \"(1,2,(0,3));\",\n            \"(1,3,(0,2));\",\n            \"(2,3,(0,1));\",\n            # 1 + 3\n            # partition of 3 = 1 + 1 + 1\n            \"(0,(1,2,3));\",\n            \"(1,(0,2,3));\",\n            \"(2,(0,1,3));\",\n            \"(3,(0,1,2));\",\n            # partition of 3 = 1 + 2\n            \"(0,(1,(2,3)));\",\n            \"(0,(2,(1,3)));\",\n            \"(0,(3,(1,2)));\",\n            \"(1,(0,(2,3)));\",\n            \"(1,(2,(0,3)));\",\n            \"(1,(3,(0,2)));\",\n            \"(2,(0,(1,3)));\",\n            \"(2,(1,(0,3)));\",\n            \"(2,(3,(0,1)));\",\n            \"(3,(0,(1,2)));\",\n            \"(3,(1,(0,2)));\",\n            \"(3,(2,(0,1)));\",\n            # 2 + 2\n            \"((0,1),(2,3));\",\n            \"((0,2),(1,3));\",\n            \"((0,3),(1,2));\",\n        ]\n        actual = [t.newick() for t in RankTree.all_labelled_trees(4)]\n        assert expected == actual\n\n    def test_generate_trees_roundtrip(self):\n        n = 5\n        all_rank_trees = RankTree.all_labelled_trees(n)\n        all_tsk_trees = tskit.all_trees(n)\n        for rank_tree, tsk_tree in zip(all_rank_trees, all_tsk_trees):\n            assert rank_tree == RankTree.from_tsk_tree(tsk_tree)\n\n    def test_generate_treeseq_roundtrip(self):\n        n = 5\n        span = 9\n        all_rank_trees = RankTree.all_labelled_trees(n)\n        all_tsk_trees = tskit.all_trees(n, span=span)\n        for rank_tree, tsk_tree in zip(all_rank_trees, all_tsk_trees):\n            ts1 = tsk_tree.tree_sequence\n            ts2 = rank_tree.to_tsk_tree(span=span).tree_sequence\n            assert ts1.tables.equals(ts2.tables, ignore_provenance=True)\n\n    def test_all_shapes_roundtrip(self):\n        n = 5\n        all_rank_tree_shapes = RankTree.all_unlabelled_trees(n)\n        all_tsk_tree_shapes = tskit.all_tree_shapes(n)\n        for rank_tree, tsk_tree in zip(all_rank_tree_shapes, all_tsk_tree_shapes):\n            assert rank_tree.shape_equal(RankTree.from_tsk_tree(tsk_tree))\n\n    def test_all_labellings_roundtrip(self):\n        n = 5\n        rank_tree = RankTree.unrank(n, (comb.num_shapes(n) - 1, 0))\n        tsk_tree = rank_tree.to_tsk_tree()\n        rank_tree_labellings = RankTree.all_labellings(rank_tree)\n        tsk_tree_labellings = tskit.all_tree_labellings(tsk_tree)\n        for rank_t, tsk_t in zip(rank_tree_labellings, tsk_tree_labellings):\n            assert rank_t == RankTree.from_tsk_tree(tsk_t)\n\n    @pytest.mark.parametrize(\"n\", range(6))\n    def test_unrank_labelled(self, n):\n        for shape_rank, t in enumerate(RankTree.all_unlabelled_trees(n)):\n            for label_rank, labelled_tree in enumerate(RankTree.all_labellings(t)):\n                unranked = RankTree.unrank(n, (shape_rank, label_rank))\n                assert labelled_tree == unranked\n\n    @pytest.mark.parametrize(\"n\", range(10))\n    def test_unrank_unlabelled(self, n):\n        for shape_rank in range(comb.num_shapes(n)):\n            rank = Rank(shape_rank, 0)\n            unranked = RankTree.unrank(n, rank)\n            assert rank, unranked.rank()\n\n            rank = (shape_rank, comb.num_labellings(n, shape_rank) - 1)\n            unranked = RankTree.unrank(n, rank)\n            assert rank, unranked.rank()\n\n    def test_unrank_errors(self):\n        self.verify_unrank_errors((-1, 0), 1)\n        self.verify_unrank_errors((0, -1), 1)\n        self.verify_unrank_errors((-1, 0), 2)\n        self.verify_unrank_errors((0, -1), 2)\n        self.verify_unrank_errors((-1, 0), 10)\n        self.verify_unrank_errors((0, -1), 10)\n\n        self.verify_unrank_errors((0, 1), 1)\n        self.verify_unrank_errors((1, 0), 2)\n        self.verify_unrank_errors((0, 1), 2)\n        self.verify_unrank_errors((2, 0), 3)\n        self.verify_unrank_errors((0, 1), 3)\n        self.verify_unrank_errors((1, 3), 3)\n\n        invalid_shape = (comb.num_shapes(10), 0)\n        self.verify_unrank_errors(invalid_shape, 10)\n        invalid_labelling = (0, comb.num_labellings(10, 0))\n        self.verify_unrank_errors(invalid_labelling, 10)\n\n    def verify_unrank_errors(self, rank, n):\n        with pytest.raises(ValueError):\n            RankTree.unrank(n, rank)\n        with pytest.raises(ValueError):\n            tskit.Tree.unrank(n, rank)\n\n    @pytest.mark.parametrize(\"n\", range(6))\n    def test_shape_rank(self, n):\n        for rank, tree in enumerate(RankTree.all_unlabelled_trees(n)):\n            assert tree.shape_rank() == rank\n\n    @pytest.mark.parametrize(\"n\", range(6))\n    def test_shape_unrank(self, n):\n        for rank, tree in enumerate(RankTree.all_unlabelled_trees(n)):\n            t = RankTree.shape_unrank(n, rank)\n            assert tree.shape_equal(t)\n\n    @pytest.mark.parametrize(\"n\", range(2, 9))\n    def test_shape_unrank_tsk_tree(self, n):\n        for shape_rank, tree in enumerate(RankTree.all_unlabelled_trees(n)):\n            tsk_tree = tskit.Tree.unrank(n, (shape_rank, 0))\n            assert shape_rank == tree.shape_rank()\n            shape_rank, _ = tsk_tree.rank()\n            assert shape_rank == tree.shape_rank()\n\n    @pytest.mark.parametrize(\"n\", range(7))\n    def test_label_rank(self, n):\n        for tree in RankTree.all_unlabelled_trees(n):\n            for rank, labelled_tree in enumerate(RankTree.all_labellings(tree)):\n                assert labelled_tree.label_rank() == rank\n\n    @pytest.mark.parametrize(\"n\", range(7))\n    def test_label_unrank(self, n):\n        for shape_rank, tree in enumerate(RankTree.all_unlabelled_trees(n)):\n            for label_rank, labelled_tree in enumerate(RankTree.all_labellings(tree)):\n                rank = (shape_rank, label_rank)\n                unranked = tree.label_unrank(label_rank)\n                assert labelled_tree.rank() == rank\n                assert unranked.rank() == rank\n\n    def test_rank_names(self):\n        shape = 1\n        label = 0\n        n = 3\n        tree = tskit.Tree.unrank(n, (shape, label))\n        rank = tree.rank()\n        assert rank.shape == shape\n        assert rank.label == label\n\n    @pytest.mark.parametrize(\"n\", range(6))\n    def test_unrank_rank_round_trip(self, n):\n        for shape_rank in range(comb.num_shapes(n)):\n            tree = RankTree.shape_unrank(n, shape_rank)\n            tree = tree.label_unrank(0)\n            assert tree.shape_rank() == shape_rank\n            for label_rank in range(tree.num_labellings()):\n                tree = tree.label_unrank(label_rank)\n                assert tree.label_rank() == label_rank\n                tsk_tree = tree.label_unrank(label_rank).to_tsk_tree()\n                _, tsk_label_rank = tsk_tree.rank()\n                assert tsk_label_rank == label_rank\n\n    def test_is_canonical(self):\n        for n in range(7):\n            for tree in RankTree.all_labelled_trees(n):\n                assert tree.is_canonical()\n\n        shape_not_canonical = RankTree(\n            children=[\n                RankTree(children=[], label=0),\n                RankTree(\n                    children=[\n                        RankTree(\n                            children=[\n                                RankTree(children=[], label=1),\n                                RankTree(children=[], label=2),\n                            ]\n                        ),\n                        RankTree(children=[], label=3),\n                    ]\n                ),\n            ]\n        )\n        assert not shape_not_canonical.is_canonical()\n\n        labels_not_canonical = RankTree(\n            children=[\n                RankTree(children=[], label=0),\n                RankTree(\n                    children=[\n                        RankTree(\n                            children=[\n                                RankTree(children=[], label=2),\n                                RankTree(children=[], label=3),\n                            ]\n                        ),\n                        RankTree(\n                            children=[\n                                RankTree(children=[], label=1),\n                                RankTree(children=[], label=4),\n                            ]\n                        ),\n                    ]\n                ),\n            ]\n        )\n        assert not labels_not_canonical.is_canonical()\n\n    @pytest.mark.parametrize(\"n\", range(7))\n    def test_unranking_is_canonical(self, n):\n        for shape_rank in range(comb.num_shapes(n)):\n            for label_rank in range(comb.num_labellings(n, shape_rank)):\n                t = RankTree.shape_unrank(n, shape_rank)\n                assert t.is_canonical()\n                t = t.label_unrank(label_rank)\n                assert t.is_canonical()\n                t = tskit.Tree.unrank(n, (shape_rank, label_rank))\n                assert RankTree.from_tsk_tree(t).is_canonical()\n\n    @pytest.mark.parametrize(\"n\", range(5))\n    def test_to_from_tsk_tree(self, n):\n        for tree in RankTree.all_labelled_trees(n):\n            assert tree.is_canonical()\n            tsk_tree = tree.to_tsk_tree()\n            reconstructed = RankTree.from_tsk_tree(tsk_tree)\n            assert tree.is_canonical()\n            assert tree == reconstructed\n\n    @pytest.mark.parametrize(\"n\", range(6))\n    def test_to_tsk_tree_internal_nodes(self, n):\n        branch_length = 1234\n        for tree in RankTree.all_labelled_trees(n):\n            tsk_tree = tree.to_tsk_tree(branch_length=branch_length)\n            internal_nodes = [\n                u for u in tsk_tree.nodes(order=\"postorder\") if tsk_tree.is_internal(u)\n            ]\n            assert np.all(internal_nodes == n + np.arange(len(internal_nodes)))\n            for u in tsk_tree.nodes():\n                if tsk_tree.is_internal(u):\n                    max_child_time = max(tsk_tree.time(v) for v in tsk_tree.children(u))\n                    assert tsk_tree.time(u) == max_child_time + branch_length\n                else:\n                    assert tsk_tree.time(u) == 0\n\n    def test_from_unary_tree(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        c = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        p = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c)\n\n        t = tables.tree_sequence().first()\n        with pytest.raises(ValueError):\n            RankTree.from_tsk_tree(t)\n\n    def test_to_tsk_tree_errors(self):\n        alpha_tree = RankTree.unrank(3, (0, 0), [\"A\", \"B\", \"C\"])\n        out_of_bounds_tree = RankTree.unrank(3, (0, 0), [2, 3, 4])\n        with pytest.raises(ValueError):\n            alpha_tree.to_tsk_tree()\n        with pytest.raises(ValueError):\n            out_of_bounds_tree.to_tsk_tree()\n\n    def test_rank_errors_multiple_roots(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n\n        # Nodes\n        sv = [True, True]\n        tv = [0.0, 0.0]\n\n        for is_sample, t in zip(sv, tv):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables.nodes.add_row(flags=flags, time=t)\n\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError):\n            ts.first().rank()\n\n    def test_span(self):\n        n = 5\n        span = 8\n        # Create a start tree, with a single root\n        tsk_tree = tskit.Tree.unrank(n, (0, 0), span=span)\n        assert tsk_tree.tree_sequence.num_nodes == n + 1\n        assert tsk_tree.interval.left == 0\n        assert tsk_tree.interval.right == span\n        assert tsk_tree.tree_sequence.sequence_length == span\n\n    def test_big_trees(self):\n        n = 14\n        shape = 22\n        labelling = 0\n        tree = RankTree.unrank(n, (shape, labelling))\n        tsk_tree = tskit.Tree.unrank(n, (shape, labelling))\n        assert tree.rank() == tsk_tree.rank()\n\n        n = 10\n        shape = 95\n        labelling = comb.num_labellings(n, shape) // 2\n        tree = RankTree.unrank(n, (shape, labelling))\n        tsk_tree = tskit.Tree.unrank(n, (shape, labelling))\n        assert tree.rank() == tsk_tree.rank()\n\n    def test_symmetrical_trees(self):\n        for n in range(2, 18, 2):\n            last_rank = comb.num_shapes(n) - 1\n            t = RankTree.shape_unrank(n, last_rank)\n            assert t.is_symmetrical()\n\n    def test_equal(self):\n        unlabelled_leaf = RankTree(children=[])\n        assert unlabelled_leaf == unlabelled_leaf\n        assert unlabelled_leaf.shape_equal(unlabelled_leaf)\n\n        leaf_zero = RankTree(children=[], label=0)\n        leaf_one = RankTree(children=[], label=1)\n        leaf_two = RankTree(children=[], label=2)\n        assert leaf_zero == leaf_zero\n        assert leaf_zero != leaf_one\n        assert leaf_zero.shape_equal(leaf_one)\n\n        tree1 = RankTree(children=[leaf_zero, leaf_one])\n        assert tree1 == tree1\n        assert tree1 != unlabelled_leaf\n        assert not tree1.shape_equal(unlabelled_leaf)\n\n        tree2 = RankTree(children=[leaf_two, leaf_one])\n        assert tree1 != tree2\n        assert tree1.shape_equal(tree2)\n\n    def test_is_symmetrical(self):\n        unlabelled_leaf = RankTree(children=[])\n        assert unlabelled_leaf.is_symmetrical()\n        three_leaf_asym = RankTree(\n            children=[\n                unlabelled_leaf,\n                RankTree(children=[unlabelled_leaf, unlabelled_leaf]),\n            ]\n        )\n        assert not three_leaf_asym.is_symmetrical()\n        six_leaf_sym = RankTree(children=[three_leaf_asym, three_leaf_asym])\n        assert six_leaf_sym.is_symmetrical()\n\n\nclass TestPartialTopologyCounter:\n    def test_add_sibling_topologies_simple(self):\n        a = RankTree(children=[], label=\"A\")\n        b = RankTree(children=[], label=\"B\")\n        ab = RankTree(children=[a, b])\n\n        a_counter = comb.TopologyCounter()\n        a_counter[\"A\"][a.rank()] = 1\n        assert a_counter == comb.TopologyCounter.from_sample(\"A\")\n\n        b_counter = comb.TopologyCounter()\n        b_counter[\"B\"][b.rank()] = 1\n        assert b_counter == comb.TopologyCounter.from_sample(\"B\")\n\n        partial_counter = comb.PartialTopologyCounter()\n        partial_counter.add_sibling_topologies(a_counter)\n        partial_counter.add_sibling_topologies(b_counter)\n\n        expected = comb.TopologyCounter()\n        expected[\"A\"][a.rank()] = 1\n        expected[\"B\"][b.rank()] = 1\n        expected[\"A\", \"B\"][ab.rank()] = 1\n        joined_counter = partial_counter.join_all_combinations()\n        assert joined_counter == expected\n\n    def test_add_sibling_topologies_polytomy(self):\n        \"\"\"\n        Goes through the topology-merging step at the root\n        of this tree:\n                    |\n                    |\n            +----+-----+----+\n            |    |     |    |\n            |    |     |    |\n            |    |     |  +---+\n            |    |     |  |   |\n            |    |     |  |   |\n            A    A     B  A   C\n        \"\"\"\n        partial_counter = comb.PartialTopologyCounter()\n        a = RankTree(children=[], label=\"A\")\n        c = RankTree(children=[], label=\"C\")\n        ac = RankTree(children=[a, c])\n\n        expected = collections.defaultdict(collections.Counter)\n\n        a_counter = comb.TopologyCounter.from_sample(\"A\")\n        b_counter = comb.TopologyCounter.from_sample(\"B\")\n        ac_counter = comb.TopologyCounter()\n        ac_counter[\"A\"][a.rank()] = 1\n        ac_counter[\"C\"][c.rank()] = 1\n        ac_counter[\"A\", \"C\"][ac.rank()] = 1\n\n        partial_counter.add_sibling_topologies(a_counter)\n        expected[(\"A\",)] = collections.Counter({(((\"A\",), (0, 0)),): 1})\n        assert partial_counter.partials == expected\n\n        partial_counter.add_sibling_topologies(a_counter)\n        expected[(\"A\",)][(((\"A\",), (0, 0)),)] += 1\n        assert partial_counter.partials == expected\n\n        partial_counter.add_sibling_topologies(b_counter)\n        expected[(\"B\",)][(((\"B\",), (0, 0)),)] = 1\n        expected[(\"A\", \"B\")][(((\"A\",), (0, 0)), ((\"B\",), (0, 0)))] = 2\n        assert partial_counter.partials == expected\n\n        partial_counter.add_sibling_topologies(ac_counter)\n        expected[(\"A\",)][(((\"A\",), (0, 0)),)] += 1\n        expected[(\"C\",)][(((\"C\",), (0, 0)),)] = 1\n        expected[(\"A\", \"B\")][(((\"A\",), (0, 0)), ((\"B\",), (0, 0)))] += 1\n        expected[(\"A\", \"C\")][(((\"A\",), (0, 0)), ((\"C\",), (0, 0)))] = 2\n        expected[(\"A\", \"C\")][(((\"A\", \"C\"), (0, 0)),)] = 1\n        expected[(\"B\", \"C\")][(((\"B\",), (0, 0)), ((\"C\",), (0, 0)))] = 1\n        expected[(\"A\", \"B\", \"C\")][\n            (((\"A\",), (0, 0)), ((\"B\",), (0, 0)), ((\"C\",), (0, 0)))\n        ] = 2\n        expected[(\"A\", \"B\", \"C\")][(((\"A\", \"C\"), (0, 0)), ((\"B\",), (0, 0)))] = 1\n        assert partial_counter.partials == expected\n\n        expected_topologies = comb.TopologyCounter()\n        expected_topologies[\"A\"][(0, 0)] = 3\n        expected_topologies[\"B\"][(0, 0)] = 1\n        expected_topologies[\"C\"][(0, 0)] = 1\n        expected_topologies[\"A\", \"B\"][(0, 0)] = 3\n        expected_topologies[\"A\", \"C\"][(0, 0)] = 3\n        expected_topologies[\"B\", \"C\"][(0, 0)] = 1\n        expected_topologies[\"A\", \"B\", \"C\"][(0, 0)] = 2\n        expected_topologies[\"A\", \"B\", \"C\"][(1, 1)] = 1\n        joined_topologies = partial_counter.join_all_combinations()\n        assert joined_topologies == expected_topologies\n\n    def test_join_topologies(self):\n        a = RankTree(children=[], label=\"A\")\n        b = RankTree(children=[], label=\"B\")\n        c = RankTree(children=[], label=\"C\")\n        a_tuple = ((\"A\"), a.rank())\n        b_tuple = ((\"B\"), b.rank())\n        c_tuple = ((\"C\"), c.rank())\n        ab_tuple = ((\"A\", \"B\"), RankTree(children=[a, b]).rank())\n        ac_tuple = ((\"A\", \"C\"), RankTree(children=[a, c]).rank())\n        bc_tuple = ((\"B\", \"C\"), RankTree(children=[b, c]).rank())\n\n        self.verify_join_topologies((a_tuple, b_tuple), (0, 0))\n        self.verify_join_topologies((b_tuple, a_tuple), (0, 0))\n        self.verify_join_topologies((b_tuple, c_tuple), (0, 0))\n\n        self.verify_join_topologies((a_tuple, b_tuple, c_tuple), (0, 0))\n        self.verify_join_topologies((a_tuple, bc_tuple), (1, 0))\n        self.verify_join_topologies((b_tuple, ac_tuple), (1, 1))\n        self.verify_join_topologies((c_tuple, ab_tuple), (1, 2))\n\n    def verify_join_topologies(self, topologies, expected_topology):\n        actual_topology = comb.PartialTopologyCounter.join_topologies(topologies)\n        assert actual_topology == expected_topology\n\n\nclass TestCountTopologies:\n    def verify_topologies(self, ts, sample_sets=None, expected=None):\n        if sample_sets is None:\n            sample_sets = [ts.samples(population=pop.id) for pop in ts.populations()]\n        topologies = [t.count_topologies(sample_sets) for t in ts.trees()]\n        inc_topologies = list(ts.count_topologies(sample_sets))\n        # count_topologies calculates the embedded topologies for every\n        # combination of populations, so we need to check the results\n        # of subsampling for every combination.\n        for num_sample_sets in range(1, len(sample_sets) + 1):\n            for i, t in enumerate(ts.trees()):\n                just_t = ts.keep_intervals([t.interval], simplify=False)\n                for sample_set_indexes in itertools.combinations(\n                    range(len(sample_sets)), num_sample_sets\n                ):\n                    actual_topologies = topologies[i][sample_set_indexes]\n                    actual_inc_topologies = inc_topologies[i][sample_set_indexes]\n                    if len(t.roots) == 1:\n                        subsampled_topologies = self.subsample_topologies(\n                            just_t, sample_sets, sample_set_indexes\n                        )\n                        assert actual_topologies == subsampled_topologies\n                    if expected is not None:\n                        assert actual_topologies == expected[i][sample_set_indexes]\n                    assert actual_topologies == actual_inc_topologies\n\n    def test_no_iterate(self):\n        with pytest.raises(TypeError, match=\"not iterable\"):\n            for _ in tskit.Tree.generate_star(3).count_topologies():\n                pass\n\n    def subsample_topologies(self, ts, sample_sets, sample_set_indexes):\n        subsample_sets = [sample_sets[i] for i in sample_set_indexes]\n        topologies = collections.Counter()\n        for subsample in itertools.product(*subsample_sets):\n            for pop_tree in ts.simplify(samples=subsample).trees():\n                # regions before and after keep interval have all samples as roots\n                # so don't count those\n                # The single tree of interest should have one root\n                if len(pop_tree.roots) == 1:\n                    topologies[pop_tree.rank()] += 1\n        return topologies\n\n    def test_single_population(self):\n        n = 10\n        ts = msprime.simulate(n, recombination_rate=10)\n        expected = comb.TopologyCounter()\n        expected[0] = collections.Counter({(0, 0): n})\n        self.verify_topologies(ts, expected=[expected] * ts.num_trees)\n\n    def test_three_populations(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1\n        1   1   0.000000    1   -1\n        2   1   0.000000    1   -1\n        3   1   0.000000    2   -1\n        4   1   0.000000    2   -1\n        5   1   0.000000    0   -1\n        6   0   1.000000    0   -1\n        7   0   2.000000    0   -1\n        8   0   2.000000    0   -1\n        9   0   3.000000    0   -1\n        10  0   4.000000    0   -1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    6  4\n        0.000000    1.000000    6  5\n        0.000000    1.000000    7  1\n        0.000000    1.000000    7  2\n        0.000000    1.000000    8  3\n        0.000000    1.000000    8  6\n        0.000000    1.000000    9  7\n        0.000000    1.000000    9  8\n        0.000000    1.000000    10  0\n        0.000000    1.000000    10  9\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes, edges, sequence_length=1, strict=False, base64_metadata=False\n        )\n\n        expected = comb.TopologyCounter()\n        expected[0] = collections.Counter({(0, 0): 2})\n        expected[1] = collections.Counter({(0, 0): 2})\n        expected[2] = collections.Counter({(0, 0): 2})\n        expected[0, 1] = collections.Counter({(0, 0): 4})\n        expected[0, 2] = collections.Counter({(0, 0): 4})\n        expected[1, 2] = collections.Counter({(0, 0): 4})\n        expected[0, 1, 2] = collections.Counter({(1, 0): 4, (1, 1): 4})\n        self.verify_topologies(ts, expected=[expected])\n\n    def test_multiple_roots(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        tables.populations.add_row()\n        tables.populations.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=1)\n\n        # Not samples so they are ignored\n        tables.nodes.add_row(time=1)\n        tables.nodes.add_row(time=1, population=1)\n\n        expected = comb.TopologyCounter()\n        expected[0] = collections.Counter({(0, 0): 1})\n        expected[1] = collections.Counter({(0, 0): 1})\n        self.verify_topologies(tables.tree_sequence(), expected=[expected])\n\n    def test_no_sample_subtrees(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        c1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        c2 = tables.nodes.add_row(time=0)\n        c3 = tables.nodes.add_row(time=0)\n        p1 = tables.nodes.add_row(time=1)\n        p2 = tables.nodes.add_row(time=1)\n\n        tables.edges.add_row(left=0, right=1, parent=p1, child=c2)\n        tables.edges.add_row(left=0, right=1, parent=p1, child=c3)\n        tables.edges.add_row(left=0, right=1, parent=p2, child=c1)\n\n        expected = comb.TopologyCounter()\n        expected[0] = collections.Counter({(0, 0): 1})\n        self.verify_topologies(tables.tree_sequence(), expected=[expected])\n\n    def test_no_full_topology(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        tables.populations.add_row()\n        tables.populations.add_row()\n        tables.populations.add_row()\n        child1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)\n        child2 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=1)\n        parent = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=1, parent=parent, child=child1)\n        tables.edges.add_row(left=0, right=1, parent=parent, child=child2)\n\n        # Left as root so there is no topology with all three populations\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=2)\n\n        expected = comb.TopologyCounter()\n        for pop_combo in [(0,), (1,), (2,), (0, 1)]:\n            expected[pop_combo] = collections.Counter({(0, 0): 1})\n        self.verify_topologies(tables.tree_sequence(), expected=[expected])\n\n    def test_polytomies(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        tables.populations.add_row()\n        tables.populations.add_row()\n        tables.populations.add_row()\n        c1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)\n        c2 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=1)\n        c3 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=2)\n        p = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c2)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c3)\n\n        expected = comb.TopologyCounter()\n        for pop_combos in [0, 1, 2, (0, 1), (0, 2), (1, 2), (0, 1, 2)]:\n            expected[pop_combos] = collections.Counter({(0, 0): 1})\n        self.verify_topologies(tables.tree_sequence(), expected=[expected])\n\n    def test_custom_key(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1\n        1   1   0.000000    0   -1\n        2   1   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   1   0.000000    0   -1\n        5   0   1.000000    0   -1\n        6   0   1.000000    0   -1\n        7   0   2.000000    0   -1\n        8   0   3.000000    0   -1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    5  0\n        0.000000    1.000000    5  1\n        0.000000    1.000000    6  2\n        0.000000    1.000000    6  3\n        0.000000    1.000000    7  5\n        0.000000    1.000000    7  6\n        0.000000    1.000000    8  4\n        0.000000    1.000000    8  7\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes, edges, sequence_length=1, strict=False, base64_metadata=False\n        )\n\n        sample_sets = [[0, 1], [2, 3], [4]]\n\n        expected = comb.TopologyCounter()\n        expected[0] = collections.Counter({(0, 0): 2})\n        expected[1] = collections.Counter({(0, 0): 2})\n        expected[2] = collections.Counter({(0, 0): 1})\n        expected[0, 1] = collections.Counter({(0, 0): 4})\n        expected[0, 2] = collections.Counter({(0, 0): 2})\n        expected[1, 2] = collections.Counter({(0, 0): 2})\n        expected[0, 1, 2] = collections.Counter({(1, 2): 4})\n\n        tree_topologies = ts.first().count_topologies(sample_sets)\n        treeseq_topologies = list(ts.count_topologies(sample_sets))\n        assert tree_topologies == expected\n        assert treeseq_topologies == [expected]\n\n    def test_ignores_non_sample_leaves(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1\n        1   0   0.000000    0   -1\n        2   1   0.000000    0   -1\n        3   0   0.000000    0   -1\n        4   1   0.000000    0   -1\n        5   0   1.000000    0   -1\n        6   0   1.000000    0   -1\n        7   0   2.000000    0   -1\n        8   0   3.000000    0   -1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    5  0\n        0.000000    1.000000    5  1\n        0.000000    1.000000    6  2\n        0.000000    1.000000    6  3\n        0.000000    1.000000    7  5\n        0.000000    1.000000    7  6\n        0.000000    1.000000    8  4\n        0.000000    1.000000    8  7\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes, edges, sequence_length=1, strict=False, base64_metadata=False\n        )\n\n        sample_sets = [[0], [2], [4]]\n\n        expected = comb.TopologyCounter()\n        expected[0] = collections.Counter({(0, 0): 1})\n        expected[1] = collections.Counter({(0, 0): 1})\n        expected[2] = collections.Counter({(0, 0): 1})\n        expected[0, 1] = collections.Counter({(0, 0): 1})\n        expected[0, 2] = collections.Counter({(0, 0): 1})\n        expected[1, 2] = collections.Counter({(0, 0): 1})\n        expected[0, 1, 2] = collections.Counter({(1, 2): 1})\n\n        tree_topologies = ts.first().count_topologies(sample_sets)\n        treeseq_topologies = list(ts.count_topologies(sample_sets))\n        assert tree_topologies == expected\n        assert treeseq_topologies == [expected]\n\n    def test_internal_samples_errors(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n\n        c1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        c2 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        p = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=1)\n\n        tables.edges.add_row(left=0, right=1, parent=p, child=c1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c2)\n\n        self.verify_value_error(tables.tree_sequence())\n\n    def test_non_sample_nodes_errors(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n\n        c1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        c2 = tables.nodes.add_row(time=0)\n        p = tables.nodes.add_row(time=1)\n\n        tables.edges.add_row(left=0, right=1, parent=p, child=c1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c2)\n\n        sample_sets = [[0], [1]]\n        self.verify_value_error(tables.tree_sequence(), sample_sets)\n\n        sample_sets = [[0], [tables.nodes.num_rows]]\n        self.verify_node_out_of_bounds_error(tables.tree_sequence(), sample_sets)\n\n    def verify_value_error(self, ts, sample_sets=None):\n        with pytest.raises(ValueError):\n            ts.first().count_topologies(sample_sets)\n        with pytest.raises(ValueError):\n            list(ts.count_topologies(sample_sets))\n\n    def verify_node_out_of_bounds_error(self, ts, sample_sets=None):\n        with pytest.raises(ValueError):\n            ts.first().count_topologies(sample_sets)\n        with pytest.raises(IndexError):\n            list(ts.count_topologies(sample_sets))\n\n    def test_standard_msprime_migrations(self):\n        for num_populations in range(2, 5):\n            samples = [5] * num_populations\n            ts = self.simulate_multiple_populations(samples)\n            self.verify_topologies(ts)\n\n    def simulate_multiple_populations(self, sample_sizes):\n        d = len(sample_sizes)\n        M = 0.2\n        m = M / (2 * (d - 1))\n\n        migration_matrix = [\n            [m if k < d and k == i + 1 else 0 for k in range(d)] for i in range(d)\n        ]\n\n        pop_configurations = [\n            msprime.PopulationConfiguration(sample_size=size) for size in sample_sizes\n        ]\n        return msprime.simulate(\n            population_configurations=pop_configurations,\n            migration_matrix=migration_matrix,\n            recombination_rate=0.1,\n        )\n\n    def test_msprime_dtwf(self):\n        migration_matrix = np.zeros((4, 4))\n        population_configurations = [\n            msprime.PopulationConfiguration(\n                sample_size=10, initial_size=10, growth_rate=0\n            ),\n            msprime.PopulationConfiguration(\n                sample_size=10, initial_size=10, growth_rate=0\n            ),\n            msprime.PopulationConfiguration(\n                sample_size=10, initial_size=10, growth_rate=0\n            ),\n            msprime.PopulationConfiguration(\n                sample_size=0, initial_size=10, growth_rate=0\n            ),\n        ]\n        demographic_events = [\n            msprime.PopulationParametersChange(population=1, time=0.1, initial_size=5),\n            msprime.PopulationParametersChange(population=0, time=0.2, initial_size=5),\n            msprime.MassMigration(time=1.1, source=0, dest=2),\n            msprime.MassMigration(time=1.2, source=1, dest=3),\n            msprime.MigrationRateChange(time=2.1, rate=0.3, matrix_index=(2, 3)),\n            msprime.MigrationRateChange(time=2.2, rate=0.3, matrix_index=(3, 2)),\n        ]\n        ts = msprime.simulate(\n            migration_matrix=migration_matrix,\n            population_configurations=population_configurations,\n            demographic_events=demographic_events,\n            random_seed=2,\n            model=\"dtwf\",\n        )\n\n        self.verify_topologies(ts)\n\n    def test_forward_time_wright_fisher_unsimplified_all_sample_sets(self):\n        tables = wf.wf_sim(\n            4,\n            5,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=10,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        for S in test_stats.set_partitions(list(ts.samples())):\n            self.verify_topologies(ts, sample_sets=S)\n\n    def test_forward_time_wright_fisher_unsimplified(self):\n        tables = wf.wf_sim(\n            20,\n            15,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=20,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify_topologies(ts, sample_sets=[samples[:10], samples[10:]])\n\n    def test_forward_time_wright_fisher_simplified(self):\n        tables = wf.wf_sim(\n            30,\n            10,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify_topologies(ts, sample_sets=[samples[:10], samples[10:]])\n\n\nclass TestTreeNode:\n    \"\"\"\n    Tests for the TreeNode class used to build simple trees in memory.\n    \"\"\"\n\n    def verify_tree(self, root, labels):\n        # Note this doesn't check any statistical properties of the returned\n        # trees, just that a single instance returned in a valid binary tree.\n        # Structural properties are best verified using the tskit API, and so\n        # we test these properties elsewhere.\n        stack = [root]\n        num_nodes = 0\n        recovered_labels = []\n        while len(stack) > 0:\n            node = stack.pop()\n            num_nodes += 1\n            if node.label is not None:\n                assert len(node.children) == 0\n                recovered_labels.append(node.label)\n            for child in node.children:\n                assert child.parent == node\n                stack.append(child)\n        assert sorted(recovered_labels) == list(labels)\n\n    @pytest.mark.parametrize(\"n\", range(1, 16))\n    def test_random_binary_tree(self, n):\n        rng = random.Random(32)\n        labels = range(n)\n        root = comb.TreeNode.random_binary_tree(labels, rng)\n        self.verify_tree(root, range(n))\n\n    @pytest.mark.parametrize(\"n\", range(1, 16))\n    def test_balanced_binary(self, n):\n        root = comb.TreeNode.balanced_tree(range(n), 2)\n        self.verify_tree(root, range(n))\n\n    @pytest.mark.parametrize(\"arity\", range(2, 8))\n    def test_balanced_arity(self, arity):\n        labels = range(30)\n        root = comb.TreeNode.balanced_tree(labels, arity)\n        self.verify_tree(root, labels)\n\n\ndef num_leaf_labelled_binary_trees(n):\n    \"\"\"\n    Returns the number of leaf labelled binary trees with n leaves.\n\n    TODO: this would probably be helpful to have in the combinatorics\n    module.\n\n    https://oeis.org/A005373/\n    \"\"\"\n    return int(math.factorial(2 * n - 3) / (2 ** (n - 2) * math.factorial(n - 2)))\n\n\nclass TestPolytomySplitting:\n    \"\"\"\n    Test the ability to randomly split polytomies\n    \"\"\"\n\n    # A complex ts with polytomies\n    #\n    # 1.00┊    6      ┊      6    ┊       6   ┊           ┊      6    ┊\n    #     ┊ ┏━┳┻┳━┓   ┊   ┏━┳┻┳━┓ ┊    ┏━━╋━┓ ┊           ┊   ┏━┳┻┳━┓ ┊\n    # 0.50┊ 5 ┃ ┃ ┃   ┊   5 ┃ ┃ ┃ ┊    5  ┃ ┃ ┊      5    ┊   ┃ ┃ ┃ ┃ ┊\n    #     ┊ ┃ ┃ ┃ ┃ . ┊   ┃ ┃ ┃ ┃ ┊ . ┏┻┓ ┃ ┃ ┊ . ┏━┳┻┳━┓ ┊ . ┃ ┃ ┃ ┃ ┊\n    # 0.00┊ 0 2 3 4 1 ┊ 0 1 2 3 4 ┊ 0 1 2 3 4 ┊ 0 1 2 3 4 ┊ 0 1 2 3 4 ┊\n    #   0.00        0.20        0.40        0.60        0.80        1.00\n    nodes_polytomy_44344 = \"\"\"\\\n    id      is_sample   population      time\n    0       1           0               0.0\n    1       1           0               0.0\n    2       1           0               0.0\n    3       1           0               0.0\n    4       1           0               0.0\n    5       0           0               0.5\n    6       0           0               1.0\n    \"\"\"\n    edges_polytomy_44344 = \"\"\"\\\n    id      left     right    parent  child\n    0       0.0      0.2      5       0\n    1       0.0      0.8      5       1\n    2       0.0      0.4      6       2\n    3       0.4      0.8      5       2\n    4       0.0      0.6      6       3,4\n    5       0.0      0.6      6       5\n    6       0.6      0.8      5       3,4\n    7       0.8      1.0      6       1,2,3,4\n    \"\"\"\n\n    def ts_polytomy_44344(self):\n        return tskit.load_text(\n            nodes=io.StringIO(self.nodes_polytomy_44344),\n            edges=io.StringIO(self.edges_polytomy_44344),\n            strict=False,\n        )\n\n    def verify_trees(self, source_tree, split_tree, epsilon=None):\n        N = 0\n        for u in split_tree.nodes():\n            assert split_tree.num_children(u) < 3\n            N += 1\n            if u >= source_tree.tree_sequence.num_nodes:\n                # This is a new node\n                branch_length = split_tree.branch_length(u)\n                if epsilon is not None:\n                    assert epsilon == pytest.approx(branch_length)\n                else:\n                    assert branch_length > 0\n                    assert 0 == pytest.approx(branch_length)\n\n        assert N == len(list(split_tree.leaves())) * 2 - 1\n        for u in source_tree.nodes():\n            if source_tree.num_children(u) <= 2:\n                assert source_tree.children(u) == split_tree.children(u)\n            else:\n                assert len(split_tree.children(u)) == 2\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 4, 5, 6])\n    def test_resolve_star(self, n):\n        tree = tskit.Tree.generate_star(n)\n        self.verify_trees(tree, tree.split_polytomies(random_seed=12))\n\n    def test_large_epsilon(self):\n        tree = tskit.Tree.generate_star(10, branch_length=100)\n        eps = 10\n        split = tree.split_polytomies(random_seed=12234, epsilon=eps)\n        self.verify_trees(tree, split, epsilon=eps)\n\n    def test_small_epsilon(self):\n        tree = tskit.Tree.generate_star(10, branch_length=1e-20)\n        eps = 1e-22\n        split = tree.split_polytomies(random_seed=12234, epsilon=eps)\n        self.verify_trees(tree, split, epsilon=eps)\n\n    def test_nextafter_near_zero(self):\n        tree = tskit.Tree.generate_star(3, branch_length=np.finfo(float).tiny)\n        split = tree.split_polytomies(random_seed=234)\n        self.verify_trees(tree, split)\n\n    def test_nextafter_large_tree(self):\n        tree = tskit.Tree.generate_star(100)\n        split = tree.split_polytomies(random_seed=32)\n        self.verify_trees(tree, split)\n        for u in tree.nodes():\n            if tree.parent(u) != tskit.NULL and not tree.is_leaf(u):\n                parent_time = tree.time(tree.parent(u))\n                child_time = tree.time(u)\n                assert child_time == np.nextafter(parent_time, 0)\n            if tree.is_leaf(u):\n                assert tree.branch_length(u) == pytest.approx(1)\n\n    def test_epsilon_near_one(self):\n        tree = tskit.Tree.generate_star(3, branch_length=1)\n        split = tree.split_polytomies(random_seed=234, epsilon=np.finfo(float).eps)\n        self.verify_trees(tree, split)\n\n    def verify_tree_sequence_splits(self, ts):\n        n_poly = 0\n        for e in ts.edgesets():\n            if len(e.children) > 2:\n                n_poly += 1\n        assert n_poly > 3\n        assert ts.num_trees > 3\n        for tree in ts.trees():\n            binary_tree = tree.split_polytomies(random_seed=11)\n            assert binary_tree.interval == tree.interval\n            for u in binary_tree.nodes():\n                assert binary_tree.num_children(u) < 3\n            for u in tree.nodes():\n                assert binary_tree.time(u) == tree.time(u)\n            resolved_ts = binary_tree.tree_sequence\n            assert resolved_ts.sequence_length == ts.sequence_length\n            assert resolved_ts.num_trees <= 3\n            if tree.interval.left == 0:\n                assert resolved_ts.num_trees == 2\n                null_tree = resolved_ts.last()\n                assert null_tree.num_roots == ts.num_samples\n            elif tree.interval.right == ts.sequence_length:\n                assert resolved_ts.num_trees == 2\n                null_tree = resolved_ts.first()\n                assert null_tree.num_roots == ts.num_samples\n            else:\n                null_tree = resolved_ts.first()\n                assert null_tree.num_roots == ts.num_samples\n                null_tree.next()\n                assert null_tree.num_roots == tree.num_roots\n                null_tree.next()\n                assert null_tree.num_roots == ts.num_samples\n\n    def test_complex_examples(self):\n        self.verify_tree_sequence_splits(self.ts_polytomy_44344())\n\n    def test_nonbinary_simulation(self):\n        demographic_events = [\n            msprime.SimpleBottleneck(time=1.0, population=0, proportion=0.95)\n        ]\n        ts = msprime.simulate(\n            20,\n            recombination_rate=10,\n            mutation_rate=5,\n            demographic_events=demographic_events,\n            random_seed=7,\n        )\n        self.verify_tree_sequence_splits(ts)\n\n    def test_seeds(self):\n        base = tskit.Tree.generate_star(5)\n        t1 = base.split_polytomies(random_seed=1234)\n        t2 = base.split_polytomies(random_seed=1234)\n        assert t1.tree_sequence.tables.equals(\n            t2.tree_sequence.tables, ignore_timestamps=True\n        )\n        t2 = base.split_polytomies(random_seed=1)\n        assert not t1.tree_sequence.tables.equals(\n            t2.tree_sequence.tables, ignore_provenance=True\n        )\n\n    def test_internal_polytomy(self):\n        #       9\n        # ┏━┳━━━┻┳━━━━┓\n        # ┃ ┃    8    ┃\n        # ┃ ┃ ┏━━╋━━┓ ┃\n        # ┃ ┃ ┃  7  ┃ ┃\n        # ┃ ┃ ┃ ┏┻┓ ┃ ┃\n        # 0 1 2 3 5 4 6\n        t1 = tskit.Tree.unrank(7, (6, 25))\n        t2 = t1.split_polytomies(random_seed=1234)\n        assert t2.parent(3) == 7\n        assert t2.parent(5) == 7\n        assert t2.root == 9\n        for u in t2.nodes():\n            assert t2.num_children(u) in [0, 2]\n\n    def test_binary_tree(self):\n        t1 = msprime.simulate(10, random_seed=1234).first()\n        t2 = t1.split_polytomies(random_seed=1234)\n        tables = t1.tree_sequence.dump_tables()\n        tables.assert_equals(t2.tree_sequence.tables, ignore_provenance=True)\n\n    def test_bad_method(self):\n        tree = tskit.Tree.generate_star(3)\n        with pytest.raises(ValueError, match=\"Method\"):\n            tree.split_polytomies(method=\"something_else\")\n\n    @pytest.mark.parametrize(\"epsilon\", [10, 1.1, 1.0])\n    def test_epsilon_too_large(self, epsilon):\n        tree = tskit.Tree.generate_star(3)\n        msg = (\n            \"Cannot resolve the degree 3 polytomy rooted at node 3 \"\n            \"with minimum time difference of 1.0 to the resolved leaves. \"\n            f\"The fixed epsilon value of {epsilon} is too large, resulting in the \"\n            \"parent time being less than the child time.\"\n        )\n        with pytest.raises(\n            tskit.LibraryError,\n            match=msg,\n        ):\n            tree.split_polytomies(epsilon=epsilon, random_seed=12)\n\n    def test_epsilon_too_small(self):\n        tree = tskit.Tree.generate_star(3)\n        msg = (\n            \"Cannot resolve the degree 3 polytomy rooted at node 3 \"\n            \"with minimum time difference of 1.0 to the resolved leaves. \"\n            \"The fixed epsilon value of 0 is too small, resulting in the \"\n            \"parent and child times being equal within the limits of \"\n            \"numerical precision.\"\n        )\n        with pytest.raises(\n            tskit.LibraryError,\n            match=msg,\n        ):\n            tree.split_polytomies(epsilon=0, random_seed=12)\n\n    def test_unsplittable_branch(self):\n        branch_length = np.nextafter(0, 1)\n        tree = tskit.Tree.generate_star(3, branch_length=branch_length)\n        msg = (\n            \"Cannot resolve the degree 3 polytomy rooted at node 3 with \"\n            \"minimum time difference of 5e-324 to the resolved leaves. \"\n            \"The time difference between nodes is so small that more nodes \"\n            \"cannot be inserted between within the limits of floating point \"\n            \"precision.\"\n        )\n        with pytest.raises(\n            tskit.LibraryError,\n            match=msg,\n        ):\n            tree.split_polytomies(random_seed=12)\n\n    def test_epsilon_for_mutations(self):\n        tables = tskit.Tree.generate_star(3).tree_sequence.dump_tables()\n        root_time = tables.nodes.time[-1]\n        assert root_time == 1\n        site = tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        tables.mutations.add_row(site=site, time=0.9, node=0, derived_state=\"1\")\n        tables.mutations.add_row(site=site, time=0.9, node=1, derived_state=\"1\")\n        tree = tables.tree_sequence().first()\n        with pytest.raises(\n            tskit.LibraryError,\n            match=\"not small enough to create new nodes below a polytomy\",\n        ):\n            tree.split_polytomies(epsilon=0.5, random_seed=123)\n\n    def test_mutation_within_eps_parent(self):\n        tables = tskit.Tree.generate_star(3).tree_sequence.dump_tables()\n        site = tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        branch_length = np.nextafter(1, 0)\n        tables.mutations.add_row(\n            site=site, time=branch_length, node=0, derived_state=\"1\"\n        )\n        tables.mutations.add_row(\n            site=site, time=branch_length, node=1, derived_state=\"1\"\n        )\n        tree = tables.tree_sequence().first()\n        with pytest.raises(\n            tskit.LibraryError,\n            match=\"Cannot split polytomy: mutation with numerical precision\",\n        ):\n            tree.split_polytomies(random_seed=123)\n\n    def test_provenance(self):\n        tree = tskit.Tree.generate_star(4)\n        ts_split = tree.split_polytomies(random_seed=14).tree_sequence\n        record = json.loads(ts_split.provenance(ts_split.num_provenances - 1).record)\n        assert record[\"parameters\"][\"command\"] == \"split_polytomies\"\n        ts_split = tree.split_polytomies(\n            random_seed=12, record_provenance=False\n        ).tree_sequence\n        record = json.loads(ts_split.provenance(ts_split.num_provenances - 1).record)\n        assert record[\"parameters\"][\"command\"] != \"split_polytomies\"\n\n    def test_kwargs(self):\n        tree = tskit.Tree.generate_star(4)\n        split_tree = tree.split_polytomies(random_seed=14, tracked_samples=[0, 1])\n        assert split_tree.num_tracked_samples() == 2\n\n    @pytest.mark.slow\n    @pytest.mark.parametrize(\"n\", [3, 4, 5])\n    def test_all_topologies(self, n):\n        N = num_leaf_labelled_binary_trees(n)\n        ranks = collections.Counter()\n        for seed in range(20 * N):\n            star = tskit.Tree.generate_star(n)\n            random_tree = star.split_polytomies(random_seed=seed)\n            ranks[random_tree.rank()] += 1\n        # There are N possible binary trees here, we should have seen them\n        # all with high probability after 20 N attempts.\n        assert len(ranks) == N\n\n\nclass TreeGeneratorTestBase:\n    \"\"\"\n    Abstract superclass of tree generator test methods.\n\n    Concrete subclasses should defined \"method_name\" class variable.\n    \"\"\"\n\n    def method(self, n, **kwargs):\n        return getattr(tskit.Tree, self.method_name)(n, **kwargs)\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_leaves(self, n):\n        tree = self.method(n)\n        assert list(tree.leaves()) == list(range(n))\n\n    def test_bad_n(self):\n        for n in [-1, 0, np.array([1, 2])]:\n            with pytest.raises(ValueError):\n                self.method(n)\n        for n in [None, \"\", []]:\n            with pytest.raises(TypeError):\n                self.method(n)\n\n    def test_bad_span(self):\n        with pytest.raises(tskit.LibraryError):\n            self.method(2, span=0)\n\n    def test_bad_branch_length(self):\n        with pytest.raises(tskit.LibraryError):\n            self.method(2, branch_length=0)\n\n    @pytest.mark.parametrize(\"span\", [0.1, 1, 100])\n    def test_span(self, span):\n        tree = self.method(5, span=span)\n        assert tree.tree_sequence.sequence_length == span\n\n    @pytest.mark.parametrize(\"branch_length\", [0.25, 1, 100])\n    def test_branch_length(self, branch_length):\n        tree = self.method(5, branch_length=branch_length)\n        for u in tree.nodes():\n            if u != tree.root:\n                assert tree.branch_length(u) >= branch_length\n\n    def test_provenance(self):\n        ts = self.method(2).tree_sequence\n        assert ts.num_provenances == 1\n        record = json.loads(ts.provenance(0).record)\n        assert record[\"parameters\"][\"command\"] == self.method_name\n        ts = self.method(2, record_provenance=False).tree_sequence\n        assert ts.num_provenances == 0\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_rank_unrank_round_trip(self, n):\n        tree1 = self.method(n)\n        rank = tree1.rank()\n        tree2 = tskit.Tree.unrank(n, rank)\n        tables1 = tree1.tree_sequence.tables\n        tables2 = tree2.tree_sequence.tables\n        tables1.assert_equals(tables2, ignore_provenance=True)\n\n    def test_kwargs(self):\n        tree = self.method(3, tracked_samples=[0, 1])\n        assert tree.num_tracked_samples() == 2\n\n\nclass TestGenerateStar(TreeGeneratorTestBase):\n    method_name = \"generate_star\"\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_unrank_equal(self, n):\n        for extra_params in [{}, {\"span\": 2.5}, {\"branch_length\": 3}]:\n            ts = tskit.Tree.generate_star(n, **extra_params).tree_sequence\n            equiv_ts = tskit.Tree.unrank(n, (0, 0), **extra_params).tree_sequence\n            assert ts.tables.equals(equiv_ts.tables, ignore_provenance=True)\n\n    def test_branch_length_semantics(self):\n        branch_length = 10\n        ts = tskit.Tree.generate_star(7, branch_length=branch_length).tree_sequence\n        time = ts.tables.nodes.time\n        edges = ts.tables.edges\n        length = time[edges.parent] - time[edges.child]\n        assert np.all(length == branch_length)\n\n\nclass TestGenerateBalanced(TreeGeneratorTestBase):\n    method_name = \"generate_balanced\"\n\n    @pytest.mark.parametrize(\"arity\", range(2, 10))\n    def test_arity_leaves(self, arity):\n        n = 20\n        tree = tskit.Tree.generate_balanced(n, arity=arity)\n        assert list(tree.leaves()) == list(range(n))\n\n    @pytest.mark.parametrize(\"n\", range(1, 13))\n    def test_binary_unrank_equal(self, n):\n        for extra_params in [{}, {\"span\": 2.5}, {\"branch_length\": 3}]:\n            ts = tskit.Tree.generate_balanced(n, **extra_params).tree_sequence\n            N = tskit.combinatorics.num_shapes(n)\n            equiv_ts = tskit.Tree.unrank(n, (N - 1, 0), **extra_params).tree_sequence\n            assert ts.tables.equals(equiv_ts.tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\n        (\"n\", \"arity\"), [(2, 2), (8, 2), (27, 3), (29, 3), (11, 5), (5, 10)]\n    )\n    def test_rank_unrank_round_trip_arity(self, n, arity):\n        tree1 = tskit.Tree.generate_balanced(n, arity=arity)\n        rank = tree1.rank()\n        tree2 = tskit.Tree.unrank(n, rank)\n        tables1 = tree1.tree_sequence.tables\n        tables2 = tree2.tree_sequence.tables\n        tables1.assert_equals(tables2, ignore_provenance=True)\n\n    def test_bad_arity(self):\n        for arity in [-1, 0, 1]:\n            with pytest.raises(ValueError):\n                tskit.Tree.generate_balanced(10, arity=arity)\n\n    def test_branch_length_semantics(self):\n        branch_length = 10\n        tree = tskit.Tree.generate_balanced(8, branch_length=branch_length)\n        for u in tree.nodes():\n            for v in tree.children(u):\n                # Special case cause n is a power of 2\n                assert tree.time(u) == tree.time(v) + branch_length\n\n\nclass TestGenerateRandomBinary(TreeGeneratorTestBase):\n    method_name = \"generate_random_binary\"\n\n    def method(self, n, **kwargs):\n        return tskit.Tree.generate_random_binary(n, random_seed=53, **kwargs)\n\n    @pytest.mark.slow\n    @pytest.mark.parametrize(\"n\", [3, 4, 5])\n    def test_all_topologies(self, n):\n        N = num_leaf_labelled_binary_trees(n)\n        ranks = collections.Counter()\n        for seed in range(20 * N):\n            random_tree = tskit.Tree.generate_random_binary(n, random_seed=seed)\n            ranks[random_tree.rank()] += 1\n        # There are N possible binary trees here, we should have seen them\n        # all with high probability after 20 N attempts.\n        assert len(ranks) == N\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_leaves(self, n):\n        tree = tskit.Tree.generate_random_binary(n, random_seed=1234)\n        # The leaves should be a permutation of range(n)\n        assert list(sorted(tree.leaves())) == list(range(n))\n\n    @pytest.mark.parametrize(\"seed\", range(1, 20))\n    def test_rank_unrank_round_trip_seeds(self, seed):\n        n = 10\n        tree1 = tskit.Tree.generate_random_binary(n, random_seed=seed)\n        rank = tree1.rank()\n        tree2 = tskit.Tree.unrank(n, rank)\n        tables1 = tree1.tree_sequence.tables\n        tables2 = tree2.tree_sequence.tables\n        tables1.assert_equals(tables2, ignore_provenance=True)\n\n\nclass TestGenerateComb(TreeGeneratorTestBase):\n    method_name = \"generate_comb\"\n\n    # Hard-code in some pre-computed ranks for the comb(n) tree.\n    @pytest.mark.parametrize([\"n\", \"rank\"], [(2, 0), (3, 1), (4, 3), (5, 8), (6, 20)])\n    def test_unrank_equal(self, n, rank):\n        for extra_params in [{}, {\"span\": 2.5}, {\"branch_length\": 3}]:\n            ts = tskit.Tree.generate_comb(n, **extra_params).tree_sequence\n            equiv_ts = tskit.Tree.unrank(n, (rank, 0), **extra_params).tree_sequence\n            assert ts.tables.equals(equiv_ts.tables, ignore_provenance=True)\n\n    def test_branch_length_semantics(self):\n        branch_length = 10\n        tree = tskit.Tree.generate_comb(2, branch_length=branch_length)\n        assert tree.time(tree.root) == branch_length\n\n\nclass TestEqualChunks:\n    @pytest.mark.parametrize((\"n\", \"k\"), [(2, 1), (4, 2), (9, 3), (100, 10)])\n    def test_evenly_divisible(self, n, k):\n        lst = range(n)\n        chunks = list(comb.equal_chunks(lst, k))\n        assert len(chunks) == k\n        for chunk in chunks:\n            assert len(chunk) == n // k\n        assert list(itertools.chain(*chunks)) == list(range(n))\n\n    @pytest.mark.parametrize(\"n\", range(1, 5))\n    def test_one_chunk(self, n):\n        lst = list(range(n))\n        chunks = list(comb.equal_chunks(lst, 1))\n        assert chunks == [lst]\n\n    @pytest.mark.parametrize((\"n\", \"k\"), [(1, 2), (5, 6), (10, 20), (5, 100)])\n    def test_empty_chunks(self, n, k):\n        lst = range(n)\n        chunks = list(comb.equal_chunks(lst, k))\n        assert len(chunks) == n\n        for chunk in chunks:\n            assert len(chunk) == 1\n        assert list(itertools.chain(*chunks)) == list(range(n))\n\n    @pytest.mark.parametrize((\"n\", \"k\"), [(3, 2), (10, 3), (11, 5), (13, 10)])\n    def test_trailing_chunk(self, n, k):\n        lst = range(n)\n        chunks = list(comb.equal_chunks(lst, k))\n        assert len(chunks) == k\n        assert list(itertools.chain(*chunks)) == list(range(n))\n\n    def test_empty_list(self):\n        assert len(list(comb.equal_chunks([], 1))) == 0\n        assert len(list(comb.equal_chunks([], 2))) == 0\n\n    def test_bad_num_chunks(self):\n        for bad_num_chunks in [0, -1, 0.1]:\n            with pytest.raises(ValueError):\n                list(comb.equal_chunks([1], bad_num_chunks))\n"
  },
  {
    "path": "python/tests/test_dict_encoding.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2020 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for the low-level dictionary encoding used to move\ndata around in C.\n\"\"\"\n\nimport pathlib\nimport pickle\n\nimport _tskit\nimport lwt_interface.dict_encoding_testlib\nimport tskit\n\nlwt_interface.dict_encoding_testlib.lwt_module = _tskit\n# Bring the tests defined in dict_encoding_testlib into the current namespace\n# so pytest will find and execute them.\nfrom lwt_interface.dict_encoding_testlib import *  # noqa\n\n\ndef test_pickled_examples():\n    seen_msprime = False\n    test_dir = pathlib.Path(__file__).parent / \"data/dict-encodings\"\n    for filename in test_dir.glob(\"*.pkl\"):\n        if \"msprime\" in str(filename):\n            seen_msprime = True\n        with open(test_dir / filename, \"rb\") as f:\n            d = pickle.load(f)\n            lwt = _tskit.LightweightTableCollection()\n            lwt.fromdict(d)\n            tskit.TableCollection.fromdict(d)\n    # Check we've done something\n    assert seen_msprime\n"
  },
  {
    "path": "python/tests/test_distance_metrics.py",
    "content": "# MIT License\n#\n# Copyright (c) 2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for tree distance metrics.\n\"\"\"\n\nimport io\nimport itertools\nimport math\nimport unittest\n\nimport dendropy\nimport msprime\nimport numpy as np\nimport pytest\nfrom dendropy.calculate import treecompare\n\nimport _tskit\nimport tests\nimport tests.tsutil as tsutil\nimport tskit\n\n\ndef c_kc_distance(tree1, tree2, lambda_=0):\n    \"\"\"\n    Simplified version of the naive_kc_distance() function above.\n    Written without Python features to aid writing C implementation.\n    \"\"\"\n    samples = tree1.tree_sequence.samples()\n    if tree1.tree_sequence.num_samples != tree2.tree_sequence.num_samples:\n        raise ValueError(\"Trees must have the same samples\")\n    for sample1, sample2 in zip(samples, tree2.tree_sequence.samples()):\n        if sample1 != sample2:\n            raise ValueError(\"Trees must have the same samples\")\n    if not len(tree1.roots) == len(tree2.roots) == 1:\n        raise ValueError(\"Trees must have one root\")\n    for tree in [tree1, tree2]:\n        for u in range(tree.tree_sequence.num_nodes):\n            if tree.num_children(u) == 1:\n                raise ValueError(\"Unary nodes are not supported\")\n\n    n = tree1.tree_sequence.num_samples\n    vecs1 = KCVectors(n)\n    fill_kc_vectors(tree1, vecs1)\n    vecs2 = KCVectors(n)\n    fill_kc_vectors(tree2, vecs2)\n    return norm_kc_vectors(vecs1, vecs2, lambda_)\n\n\ndef naive_kc_distance(tree1, tree2, lambda_=0):\n    \"\"\"\n    Returns the Kendall-Colijn distance between the specified pair of trees.\n    lambda_ determines weight of topology vs branch lengths in calculating\n    the distance. Set lambda_ at 0 to only consider topology, set at 1 to\n    only consider branch lengths. See Kendall & Colijn (2016):\n    https://academic.oup.com/mbe/article/33/10/2735/2925548\n    \"\"\"\n    samples = tree1.tree_sequence.samples()\n    if not np.array_equal(samples, tree2.tree_sequence.samples()):\n        raise ValueError(\"Trees must have the same samples\")\n    if not len(tree1.roots) == len(tree2.roots) == 1:\n        raise ValueError(\"Trees must have one root\")\n    for tree in [tree1, tree2]:\n        for u in tree.nodes():\n            if tree.num_children(u) == 1:\n                raise ValueError(\"Unary nodes are not supported\")\n\n    n = samples.shape[0]\n    N = (n * (n - 1)) // 2\n    m = [np.zeros(N + n), np.zeros(N + n)]\n    M = [np.zeros(N + n), np.zeros(N + n)]\n    for tree_index, tree in enumerate([tree1, tree2]):\n        for sample in range(n):\n            m[tree_index][N + sample] = 1\n            M[tree_index][N + sample] = tree.branch_length(sample)\n\n        for n1, n2 in itertools.combinations(range(n), 2):\n            mrca = tree.mrca(samples[n1], samples[n2])\n            depth = 0\n            u = tree.parent(mrca)\n            while u != tskit.NULL:\n                depth += 1\n                u = tree.parent(u)\n            pair_index = n1 * (n1 - 2 * n + 1) // -2 + n2 - n1 - 1\n            m[tree_index][pair_index] = depth\n            M[tree_index][pair_index] = tree.time(tree.root) - tree.time(mrca)\n\n    return np.linalg.norm((1 - lambda_) * (m[0] - m[1]) + lambda_ * (M[0] - M[1]))\n\n\nclass KCVectors:\n    \"\"\"\n    Manages the two vectors (m and M) of a tree used to compute the\n    KC distance between trees. For any two samples, u and v,\n    m and M capture the distance of mrca(u, v) to the root in\n    number of edges and time, respectively.\n\n    See Kendall & Colijn (2016):\n    https://academic.oup.com/mbe/article/33/10/2735/2925548\n    \"\"\"\n\n    def __init__(self, n):\n        self.n = n\n        self.N = (self.n * (self.n - 1)) // 2\n        self.m = np.zeros(self.N + self.n)\n        self.M = np.zeros(self.N + self.n)\n\n\ndef fill_kc_vectors(tree, kc_vecs):\n    sample_index_map = np.zeros(tree.tree_sequence.num_nodes)\n    for j, u in enumerate(tree.tree_sequence.samples()):\n        sample_index_map[u] = j\n    for root in tree.roots:\n        stack = [(tree.root, 0)]\n        while len(stack) > 0:\n            u, depth = stack.pop()\n            if tree.is_sample(u):\n                time = tree.branch_length(u)\n                update_kc_vectors_single_leaf(kc_vecs, u, time, sample_index_map)\n\n            c1 = tree.left_child(u)\n            while c1 != tskit.NULL:\n                stack.append((c1, depth + 1))\n                c2 = tree.right_sib(c1)\n                while c2 != tskit.NULL:\n                    update_kc_vectors_all_pairs(\n                        tree, kc_vecs, c1, c2, depth, tree.time(root) - tree.time(u)\n                    )\n                    c2 = tree.right_sib(c2)\n                c1 = tree.right_sib(c1)\n\n\ndef update_kc_vectors_single_leaf(kc_vecs, u, time, sample_index_map):\n    u_index = int(sample_index_map[u])\n    kc_vecs.m[kc_vecs.N + u_index] = 1\n    kc_vecs.M[kc_vecs.N + u_index] = time\n\n\ndef update_kc_vectors_all_pairs(tree, kc_vecs, c1, c2, depth, time):\n    s1_index = tree.left_sample(c1)\n    while True:\n        s2_index = tree.left_sample(c2)\n        while True:\n            update_kc_vectors_pair(kc_vecs, s1_index, s2_index, depth, time)\n            if s2_index == tree.right_sample(c2):\n                break\n            s2_index = tree.next_sample(s2_index)\n        if s1_index == tree.right_sample(c1):\n            break\n        s1_index = tree.next_sample(s1_index)\n\n\ndef update_kc_vectors_pair(kc_vecs, n1, n2, depth, time):\n    if n1 > n2:\n        n1, n2 = n2, n1\n    pair_index = n2 - n1 - 1 + (-1 * n1 * (n1 - 2 * kc_vecs.n + 1)) // 2\n\n    kc_vecs.m[pair_index] = depth\n    kc_vecs.M[pair_index] = time\n\n\ndef norm_kc_vectors(kc_vecs1, kc_vecs2, lambda_):\n    vT1 = 0\n    vT2 = 0\n    distance_sum = 0\n    for i in range(kc_vecs1.n + kc_vecs1.N):\n        vT1 = (kc_vecs1.m[i] * (1 - lambda_)) + (lambda_ * kc_vecs1.M[i])\n        vT2 = (kc_vecs2.m[i] * (1 - lambda_)) + (lambda_ * kc_vecs2.M[i])\n        distance_sum += (vT1 - vT2) ** 2\n\n    return math.sqrt(distance_sum)\n\n\nclass TestKCMetric(unittest.TestCase):\n    \"\"\"\n    Tests on the KC metric distances.\n    \"\"\"\n\n    def test_same_tree_zero_distance(self):\n        for n in range(2, 10):\n            for seed in range(1, 10):\n                ts = msprime.simulate(n, random_seed=seed)\n                tree = next(ts.trees(sample_lists=True))\n                assert naive_kc_distance(tree, tree) == 0\n                assert c_kc_distance(tree, tree) == 0\n                assert tree.kc_distance(tree) == 0\n                ts = msprime.simulate(n, random_seed=seed)\n                tree2 = next(ts.trees(sample_lists=True))\n                assert naive_kc_distance(tree, tree2) == 0\n                assert c_kc_distance(tree, tree2) == 0\n                assert tree.kc_distance(tree2) == 0\n\n    def test_sample_2_zero_distance(self):\n        # All trees with 2 leaves must be equal distance from each other.\n        for seed in range(1, 10):\n            ts1 = msprime.simulate(2, random_seed=seed)\n            tree1 = next(ts1.trees(sample_lists=True))\n            ts2 = msprime.simulate(2, random_seed=seed + 1)\n            tree2 = next(ts2.trees(sample_lists=True))\n            assert naive_kc_distance(tree1, tree2, 0) == 0\n            assert c_kc_distance(tree1, tree2, 0) == 0\n            assert tree1.kc_distance(tree2, 0) == 0\n\n    def test_different_samples_error(self):\n        tree1 = next(msprime.simulate(10, random_seed=1).trees(sample_lists=True))\n        tree2 = next(msprime.simulate(2, random_seed=1).trees(sample_lists=True))\n        with pytest.raises(ValueError):\n            naive_kc_distance(tree1, tree2)\n        with pytest.raises(ValueError):\n            c_kc_distance(tree1, tree2)\n        with pytest.raises(_tskit.LibraryError):\n            tree1.kc_distance(tree2)\n\n        ts1 = msprime.simulate(10, random_seed=1)\n        nmap = np.arange(0, ts1.num_nodes)[::-1]\n        ts2 = tsutil.permute_nodes(ts1, nmap)\n        tree1 = next(ts1.trees(sample_lists=True))\n        tree2 = next(ts2.trees(sample_lists=True))\n        with pytest.raises(ValueError):\n            naive_kc_distance(tree1, tree2)\n        with pytest.raises(ValueError):\n            c_kc_distance(tree1, tree2)\n        with pytest.raises(_tskit.LibraryError):\n            tree1.kc_distance(tree2)\n\n        unsimplified_ts = msprime.simulate(\n            10, random_seed=1, recombination_rate=10, record_full_arg=True\n        )\n        trees = unsimplified_ts.trees(sample_lists=True)\n        tree1 = next(trees)\n        tree2 = next(trees)\n        with pytest.raises(ValueError):\n            naive_kc_distance(tree1, tree2)\n        with pytest.raises(ValueError):\n            c_kc_distance(tree1, tree2)\n        with pytest.raises(_tskit.LibraryError):\n            tree1.kc_distance(tree2)\n\n    def validate_trees(self, n):\n        for seed in range(1, 10):\n            ts1 = msprime.simulate(n, random_seed=seed)\n            ts2 = msprime.simulate(n, random_seed=seed + 1)\n            tree1 = next(ts1.trees(sample_lists=True))\n            tree2 = next(ts2.trees(sample_lists=True))\n            kc1 = naive_kc_distance(tree1, tree2)\n            kc2 = c_kc_distance(tree1, tree2)\n            kc3 = tree1.kc_distance(tree2)\n            self.assertAlmostEqual(kc1, kc2)\n            self.assertAlmostEqual(kc1, kc3)\n            self.assertAlmostEqual(kc1, naive_kc_distance(tree2, tree1))\n            self.assertAlmostEqual(kc2, c_kc_distance(tree2, tree1))\n            self.assertAlmostEqual(kc3, tree2.kc_distance(tree1))\n\n    def test_sample_3(self):\n        self.validate_trees(3)\n\n    def test_sample_4(self):\n        self.validate_trees(4)\n\n    def test_sample_10(self):\n        self.validate_trees(10)\n\n    def test_sample_20(self):\n        self.validate_trees(20)\n\n    def validate_nonbinary_trees(self, n):\n        demographic_events = [\n            msprime.SimpleBottleneck(0.02, 0, proportion=0.25),\n            msprime.SimpleBottleneck(0.2, 0, proportion=1),\n        ]\n\n        for seed in range(1, 10):\n            ts = msprime.simulate(\n                n, random_seed=seed, demographic_events=demographic_events\n            )\n            # Check if this is really nonbinary\n            found = False\n            for edgeset in ts.edgesets():\n                if len(edgeset.children) > 2:\n                    found = True\n                    break\n            assert found\n            tree1 = next(ts.trees(sample_lists=True))\n\n            ts = msprime.simulate(\n                n, random_seed=seed + 1, demographic_events=demographic_events\n            )\n            tree2 = next(ts.trees(sample_lists=True))\n            self.do_kc_distance(tree1, tree2)\n            # compare to a binary tree also\n\n            ts = msprime.simulate(n, random_seed=seed + 1)\n            tree2 = next(ts.trees(sample_lists=True))\n            self.do_kc_distance(tree1, tree2)\n\n    def test_non_binary_sample_10(self):\n        self.validate_nonbinary_trees(10)\n\n    def test_non_binary_sample_20(self):\n        self.validate_nonbinary_trees(20)\n\n    def test_non_binary_sample_30(self):\n        self.validate_nonbinary_trees(30)\n\n    def verify_result(self, tree1, tree2, lambda_, result, places=None):\n        kc1 = naive_kc_distance(tree1, tree2, lambda_)\n        kc2 = c_kc_distance(tree1, tree2, lambda_)\n        kc3 = tree1.kc_distance(tree2, lambda_)\n        self.assertAlmostEqual(kc1, result, places=places)\n        self.assertAlmostEqual(kc2, result, places=places)\n        self.assertAlmostEqual(kc3, result, places=places)\n\n        kc1 = naive_kc_distance(tree2, tree1, lambda_)\n        kc2 = c_kc_distance(tree2, tree1, lambda_)\n        kc3 = tree2.kc_distance(tree1, lambda_)\n        self.assertAlmostEqual(kc1, result, places=places)\n        self.assertAlmostEqual(kc2, result, places=places)\n        self.assertAlmostEqual(kc3, result, places=places)\n\n    def test_known_kc_sample_3(self):\n        # Test with hardcoded known values\n        tables_1 = tskit.TableCollection(sequence_length=1.0)\n        tables_2 = tskit.TableCollection(sequence_length=1.0)\n\n        # Nodes\n        sv = [True, True, True, False, False]\n        tv_1 = [0.0, 0.0, 0.0, 2.0, 3.0]\n        tv_2 = [0.0, 0.0, 0.0, 4.0, 6.0]\n\n        for is_sample, t1, t2 in zip(sv, tv_1, tv_2):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_1.nodes.add_row(flags=flags, time=t1)\n            tables_2.nodes.add_row(flags=flags, time=t2)\n\n        # Edges\n        lv = [0.0, 0.0, 0.0, 0.0]\n        rv = [1.0, 1.0, 1.0, 1.0]\n        pv = [3, 3, 4, 4]\n        cv = [0, 1, 2, 3]\n\n        for left, right, p, c in zip(lv, rv, pv, cv):\n            tables_1.edges.add_row(left=left, right=right, parent=p, child=c)\n            tables_2.edges.add_row(left=left, right=right, parent=p, child=c)\n\n        tree_1 = next(tables_1.tree_sequence().trees(sample_lists=True))\n        tree_2 = next(tables_2.tree_sequence().trees(sample_lists=True))\n        self.verify_result(tree_1, tree_2, 0, 0)\n        self.verify_result(tree_1, tree_2, 1, 4.243, places=3)\n\n    def test_10_samples(self):\n        nodes_1 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1  b''\n        1   1   0.000000    0   -1  b''\n        2   1   0.000000    0   -1  b''\n        3   1   0.000000    0   -1  b''\n        4   1   0.000000    0   -1  b''\n        5   1   0.000000    0   -1  b''\n        6   1   0.000000    0   -1  b''\n        7   1   0.000000    0   -1  b''\n        8   1   0.000000    0   -1  b''\n        9   1   0.000000    0   -1  b''\n        10  0   0.047734    0   -1  b''\n        11  0   0.061603    0   -1  b''\n        12  0   0.189503    0   -1  b''\n        13  0   0.275885    0   -1  b''\n        14  0   0.518301    0   -1  b''\n        15  0   0.543143    0   -1  b''\n        16  0   0.865193    0   -1  b''\n        17  0   1.643658    0   -1  b''\n        18  0   2.942350    0   -1  b''\n        \"\"\"\n        )\n        edges_1 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    10  0\n        0.000000    10000.000000    10  2\n        0.000000    10000.000000    11  9\n        0.000000    10000.000000    11  10\n        0.000000    10000.000000    12  3\n        0.000000    10000.000000    12  7\n        0.000000    10000.000000    13  5\n        0.000000    10000.000000    13  11\n        0.000000    10000.000000    14  1\n        0.000000    10000.000000    14  8\n        0.000000    10000.000000    15  4\n        0.000000    10000.000000    15  14\n        0.000000    10000.000000    16  13\n        0.000000    10000.000000    16  15\n        0.000000    10000.000000    17  6\n        0.000000    10000.000000    17  12\n        0.000000    10000.000000    18  16\n        0.000000    10000.000000    18  17\n        \"\"\"\n        )\n        ts_1 = tskit.load_text(\n            nodes_1, edges_1, sequence_length=10000, strict=False, base64_metadata=False\n        )\n        nodes_2 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1  b''\n        1   1   0.000000    0   -1  b''\n        2   1   0.000000    0   -1  b''\n        3   1   0.000000    0   -1  b''\n        4   1   0.000000    0   -1  b''\n        5   1   0.000000    0   -1  b''\n        6   1   0.000000    0   -1  b''\n        7   1   0.000000    0   -1  b''\n        8   1   0.000000    0   -1  b''\n        9   1   0.000000    0   -1  b''\n        10  0   0.210194    0   -1  b''\n        11  0   0.212217    0   -1  b''\n        12  0   0.223341    0   -1  b''\n        13  0   0.272703    0   -1  b''\n        14  0   0.443553    0   -1  b''\n        15  0   0.491653    0   -1  b''\n        16  0   0.729369    0   -1  b''\n        17  0   1.604113    0   -1  b''\n        18  0   1.896332    0   -1  b''\n        \"\"\"\n        )\n        edges_2 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    10  5\n        0.000000    10000.000000    10  7\n        0.000000    10000.000000    11  3\n        0.000000    10000.000000    11  4\n        0.000000    10000.000000    12  6\n        0.000000    10000.000000    12  9\n        0.000000    10000.000000    13  10\n        0.000000    10000.000000    13  12\n        0.000000    10000.000000    14  8\n        0.000000    10000.000000    14  11\n        0.000000    10000.000000    15  1\n        0.000000    10000.000000    15  2\n        0.000000    10000.000000    16  13\n        0.000000    10000.000000    16  14\n        0.000000    10000.000000    17  0\n        0.000000    10000.000000    17  16\n        0.000000    10000.000000    18  15\n        0.000000    10000.000000    18  17\n        \"\"\"\n        )\n        ts_2 = tskit.load_text(\n            nodes_2, edges_2, sequence_length=10000, strict=False, base64_metadata=False\n        )\n\n        tree_1 = next(ts_1.trees(sample_lists=True))\n        tree_2 = next(ts_2.trees(sample_lists=True))\n        self.verify_result(tree_1, tree_2, 0, 12.85, places=2)\n        self.verify_result(tree_1, tree_2, 1, 10.64, places=2)\n\n    def test_15_samples(self):\n        nodes_1 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1\n        1   1   0.000000    0   -1\n        2   1   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   1   0.000000    0   -1\n        5   1   0.000000    0   -1\n        6   1   0.000000    0   -1\n        7   1   0.000000    0   -1\n        8   1   0.000000    0   -1\n        9   1   0.000000    0   -1\n        10  1   0.000000    0   -1\n        11  1   0.000000    0   -1\n        12  1   0.000000    0   -1\n        13  1   0.000000    0   -1\n        14  1   0.000000    0   -1\n        15  0   0.026043    0   -1\n        16  0   0.032662    0   -1\n        17  0   0.072032    0   -1\n        18  0   0.086792    0   -1\n        19  0   0.130699    0   -1\n        20  0   0.177640    0   -1\n        21  0   0.199800    0   -1\n        22  0   0.236391    0   -1\n        23  0   0.342445    0   -1\n        24  0   0.380356    0   -1\n        25  0   0.438502    0   -1\n        26  0   0.525632    0   -1\n        27  0   1.180078    0   -1\n        28  0   2.548099    0   -1\n        \"\"\"\n        )\n        edges_1 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    15  6\n        0.000000    10000.000000    15  13\n        0.000000    10000.000000    16  1\n        0.000000    10000.000000    16  4\n        0.000000    10000.000000    17  0\n        0.000000    10000.000000    17  7\n        0.000000    10000.000000    18  2\n        0.000000    10000.000000    18  17\n        0.000000    10000.000000    19  5\n        0.000000    10000.000000    19  9\n        0.000000    10000.000000    20  12\n        0.000000    10000.000000    20  15\n        0.000000    10000.000000    21  8\n        0.000000    10000.000000    21  20\n        0.000000    10000.000000    22  11\n        0.000000    10000.000000    22  21\n        0.000000    10000.000000    23  10\n        0.000000    10000.000000    23  22\n        0.000000    10000.000000    24  14\n        0.000000    10000.000000    24  16\n        0.000000    10000.000000    25  18\n        0.000000    10000.000000    25  19\n        0.000000    10000.000000    26  23\n        0.000000    10000.000000    26  24\n        0.000000    10000.000000    27  25\n        0.000000    10000.000000    27  26\n        0.000000    10000.000000    28  3\n        0.000000    10000.000000    28  27\n        \"\"\"\n        )\n        ts_1 = tskit.load_text(\n            nodes_1, edges_1, sequence_length=10000, strict=False, base64_metadata=False\n        )\n\n        nodes_2 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1\n        1   1   0.000000    0   -1\n        2   1   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   1   0.000000    0   -1\n        5   1   0.000000    0   -1\n        6   1   0.000000    0   -1\n        7   1   0.000000    0   -1\n        8   1   0.000000    0   -1\n        9   1   0.000000    0   -1\n        10  1   0.000000    0   -1\n        11  1   0.000000    0   -1\n        12  1   0.000000    0   -1\n        13  1   0.000000    0   -1\n        14  1   0.000000    0   -1\n        15  0   0.011443    0   -1\n        16  0   0.055694    0   -1\n        17  0   0.061677    0   -1\n        18  0   0.063416    0   -1\n        19  0   0.163014    0   -1\n        20  0   0.223445    0   -1\n        21  0   0.251724    0   -1\n        22  0   0.268749    0   -1\n        23  0   0.352039    0   -1\n        24  0   0.356134    0   -1\n        25  0   0.399454    0   -1\n        26  0   0.409174    0   -1\n        27  0   2.090839    0   -1\n        28  0   3.772716    0   -1\n        \"\"\"\n        )\n        edges_2 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    15  6\n        0.000000    10000.000000    15  8\n        0.000000    10000.000000    16  9\n        0.000000    10000.000000    16  12\n        0.000000    10000.000000    17  3\n        0.000000    10000.000000    17  4\n        0.000000    10000.000000    18  13\n        0.000000    10000.000000    18  16\n        0.000000    10000.000000    19  2\n        0.000000    10000.000000    19  11\n        0.000000    10000.000000    20  1\n        0.000000    10000.000000    20  17\n        0.000000    10000.000000    21  0\n        0.000000    10000.000000    21  18\n        0.000000    10000.000000    22  10\n        0.000000    10000.000000    22  15\n        0.000000    10000.000000    23  14\n        0.000000    10000.000000    23  21\n        0.000000    10000.000000    24  5\n        0.000000    10000.000000    24  7\n        0.000000    10000.000000    25  19\n        0.000000    10000.000000    25  22\n        0.000000    10000.000000    26  24\n        0.000000    10000.000000    26  25\n        0.000000    10000.000000    27  20\n        0.000000    10000.000000    27  23\n        0.000000    10000.000000    28  26\n        0.000000    10000.000000    28  27\n        \"\"\"\n        )\n        ts_2 = tskit.load_text(\n            nodes_2, edges_2, sequence_length=10000, strict=False, base64_metadata=False\n        )\n\n        tree_1 = next(ts_1.trees(sample_lists=True))\n        tree_2 = next(ts_2.trees(sample_lists=True))\n\n        self.verify_result(tree_1, tree_2, 0, 19.95, places=2)\n        self.verify_result(tree_1, tree_2, 1, 17.74, places=2)\n\n    def test_nobinary_trees(self):\n        nodes_1 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    -1  -1   e30=\n        1   1   0.000000    -1  -1   e30=\n        2   1   0.000000    -1  -1   e30=\n        3   1   0.000000    -1  -1   e30=\n        4   1   0.000000    -1  -1   e30=\n        5   1   0.000000    -1  -1   e30=\n        6   1   0.000000    -1  -1   e30=\n        7   1   0.000000    -1  -1   e30=\n        8   1   0.000000    -1  -1   e30=\n        9   1   0.000000    -1  -1\n        10  1   0.000000    -1  -1\n        11  1   0.000000    -1  -1\n        12  1   0.000000    -1  -1\n        13  1   0.000000    -1  -1\n        14  1   0.000000    -1  -1\n        15  0   2.000000    -1  -1\n        16  0   4.000000    -1  -1\n        17  0   11.000000   -1  -1\n        18  0   12.000000   -1  -1\n        \"\"\"\n        )\n        edges_1 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    15  8\n        0.000000    10000.000000    15  10\n        0.000000    10000.000000    16  6\n        0.000000    10000.000000    16  12\n        0.000000    10000.000000    16  15\n        0.000000    10000.000000    17  0\n        0.000000    10000.000000    17  1\n        0.000000    10000.000000    17  2\n        0.000000    10000.000000    17  3\n        0.000000    10000.000000    17  4\n        0.000000    10000.000000    17  5\n        0.000000    10000.000000    17  7\n        0.000000    10000.000000    17  9\n        0.000000    10000.000000    17  11\n        0.000000    10000.000000    17  13\n        0.000000    10000.000000    17  14\n        0.000000    10000.000000    18  16\n        0.000000    10000.000000    18  17\n        \"\"\"\n        )\n        ts_1 = tskit.load_text(\n            nodes_1, edges_1, sequence_length=10000, strict=False, base64_metadata=False\n        )\n\n        nodes_2 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    -1  -1   e30=\n        1   1   0.000000    -1  -1   e30=\n        2   1   0.000000    -1  -1   e30=\n        3   1   0.000000    -1  -1   e30=\n        4   1   0.000000    -1  -1   e30=\n        5   1   0.000000    -1  -1   e30=\n        6   1   0.000000    -1  -1   e30=\n        7   1   0.000000    -1  -1   e30=\n        8   1   0.000000    -1  -1   e30=\n        9   1   0.000000    -1  -1   e30=\n        10  1   0.000000    -1  -1  e30=\n        11  1   0.000000    -1  -1  e30=\n        12  1   0.000000    -1  -1  e30=\n        13  1   0.000000    -1  -1  e30=\n        14  1   0.000000    -1  -1  e30=\n        15  0   2.000000    -1  -1\n        16  0   2.000000    -1  -1\n        17  0   3.000000    -1  -1\n        18  0   3.000000    -1  -1\n        19  0   4.000000    -1  -1\n        20  0   4.000000    -1  -1\n        21  0   11.000000   -1  -1\n        22  0   12.000000   -1  -1\n        \"\"\"\n        )\n        edges_2 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    15  12\n        0.000000    10000.000000    15  14\n        0.000000    10000.000000    16  0\n        0.000000    10000.000000    16  7\n        0.000000    10000.000000    17  6\n        0.000000    10000.000000    17  15\n        0.000000    10000.000000    18  4\n        0.000000    10000.000000    18  8\n        0.000000    10000.000000    18  13\n        0.000000    10000.000000    19  11\n        0.000000    10000.000000    19  18\n        0.000000    10000.000000    20  1\n        0.000000    10000.000000    20  5\n        0.000000    10000.000000    20  9\n        0.000000    10000.000000    20  10\n        0.000000    10000.000000    21  2\n        0.000000    10000.000000    21  3\n        0.000000    10000.000000    21  16\n        0.000000    10000.000000    21  17\n        0.000000    10000.000000    21  20\n        0.000000    10000.000000    22  19\n        0.000000    10000.000000    22  21\n        \"\"\"\n        )\n        ts_2 = tskit.load_text(\n            nodes_2, edges_2, sequence_length=10000, strict=False, base64_metadata=False\n        )\n        tree_1 = next(ts_1.trees(sample_lists=True))\n        tree_2 = next(ts_2.trees(sample_lists=True))\n        self.verify_result(tree_1, tree_2, 0, 9.434, places=3)\n        self.verify_result(tree_1, tree_2, 1, 44, places=1)\n\n    def test_multiple_roots(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n\n        # Nodes\n        sv = [True, True]\n        tv = [0.0, 0.0]\n\n        for is_sample, t in zip(sv, tv):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables.nodes.add_row(flags=flags, time=t)\n\n        ts = tables.tree_sequence()\n\n        with pytest.raises(ValueError):\n            naive_kc_distance(ts.first(), ts.first(), 0)\n        with pytest.raises(ValueError):\n            c_kc_distance(ts.first(), ts.first(), 0)\n        with pytest.raises(_tskit.LibraryError):\n            ts.first().kc_distance(ts.first(), 0)\n\n    def do_kc_distance(self, t1, t2, lambda_=0):\n        kc1 = naive_kc_distance(t1, t2, lambda_)\n        kc2 = c_kc_distance(t1, t2, lambda_)\n        kc3 = t1.kc_distance(t2, lambda_)\n        self.assertAlmostEqual(kc1, kc2)\n        self.assertAlmostEqual(kc1, kc3)\n\n        kc1 = naive_kc_distance(t2, t1, lambda_)\n        kc2 = c_kc_distance(t2, t1, lambda_)\n        kc3 = t2.kc_distance(t1, lambda_)\n        self.assertAlmostEqual(kc1, kc2)\n        self.assertAlmostEqual(kc1, kc3)\n\n    def test_non_initial_samples(self):\n        ts1 = msprime.simulate(10, random_seed=1)\n        nmap = np.arange(0, ts1.num_nodes)[::-1]\n        ts2 = tsutil.permute_nodes(ts1, nmap)\n        t1 = next(ts2.trees(sample_lists=True))\n        t2 = next(ts2.trees(sample_lists=True))\n        self.do_kc_distance(t1, t2)\n\n    def test_internal_samples(self):\n        ts1 = msprime.simulate(10, random_seed=1)\n        ts2 = tsutil.jiggle_samples(ts1)\n        t1 = next(ts2.trees(sample_lists=True))\n        t2 = next(ts2.trees(sample_lists=True))\n\n        naive_kc_distance(t1, t2)\n        c_kc_distance(t1, t2)\n        t1.kc_distance(t2)\n\n    def test_root_sample(self):\n        tables1 = tskit.TableCollection(sequence_length=1.0)\n        tables1.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        only_root = next(tables1.tree_sequence().trees(sample_lists=True))\n        assert only_root.kc_distance(only_root) == 0\n        assert only_root.kc_distance(only_root, lambda_=1) == 0\n\n    def test_non_sample_leaf(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        c1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        c2 = tables.nodes.add_row(time=0)\n        p = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c2)\n        ts = tables.tree_sequence()\n        tree = next(ts.trees(sample_lists=True))\n        assert ts.kc_distance(ts) == 0\n        assert tree.kc_distance(tree) == 0\n\n        # mirrored\n        tables = tskit.TableCollection(sequence_length=1.0)\n        c1 = tables.nodes.add_row(time=0)\n        c2 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        p = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c1)\n        tables.edges.add_row(left=0, right=1, parent=p, child=c2)\n        ts = tables.tree_sequence()\n        tree = next(ts.trees(sample_lists=True))\n        assert ts.kc_distance(ts) == 0\n        assert tree.kc_distance(tree) == 0\n\n    def test_ignores_subtrees_with_no_samples(self):\n        nodes_1 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   0   0.000000    0   -1\n        1   0   0.000000    0   -1\n        2   0   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   0   0.000000    0   -1\n        5   0   0.000000    0   -1\n        6   1   1.000000    0   -1\n        7   1   2.000000    0   -1\n        8   0   2.000000    0   -1\n        9   0   3.000000    0   -1\n        \"\"\"\n        )\n        edges_1 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    6  0\n        0.000000    1.000000    6  1\n        0.000000    1.000000    7  2\n        0.000000    1.000000    7  6\n        0.000000    1.000000    8  4\n        0.000000    1.000000    8  5\n        0.000000    1.000000    9  3\n        0.000000    1.000000    9  7\n        0.000000    1.000000    9  8\n        \"\"\"\n        )\n        redundant = tskit.load_text(\n            nodes_1, edges_1, sequence_length=1, strict=False, base64_metadata=False\n        )\n\n        nodes_2 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   0   0.000000    0   -1\n        1   0   0.000000    0   -1\n        2   0   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   0   0.000000    0   -1\n        5   0   0.000000    0   -1\n        6   1   1.000000    0   -1\n        7   1   2.000000    0   -1\n        8   0   2.000000    0   -1\n        9   0   3.000000    0   -1\n        \"\"\"\n        )\n        edges_2 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    7  2\n        0.000000    1.000000    7  6\n        0.000000    1.000000    9  3\n        0.000000    1.000000    9  7\n        \"\"\"\n        )\n        simplified = tskit.load_text(\n            nodes_2, edges_2, sequence_length=1, strict=False, base64_metadata=False\n        )\n        assert redundant.kc_distance(simplified, 0) == 0\n        assert redundant.kc_distance(simplified, 1) == 0\n\n\ndef ts_kc_distance(ts1, ts2, lambda_=0):\n    check_kc_tree_sequence_inputs(ts1, ts2)\n\n    total = 0\n    left = 0\n    tree1_iter = ts1.trees(sample_lists=True)\n    tree1 = next(tree1_iter)\n    for tree2 in ts2.trees(sample_lists=True):\n        while tree1.interval.right < tree2.interval.right:\n            span = tree1.interval.right - left\n            total += tree1.kc_distance(tree2, lambda_) * span\n\n            left = tree1.interval.right\n            tree1 = next(tree1_iter)\n        span = tree2.interval.right - left\n        left = tree2.interval.right\n        total += tree1.kc_distance(tree2, lambda_) * span\n\n    return total / ts1.sequence_length\n\n\ndef ts_kc_distance_incremental(ts1, ts2, lambda_=0):\n    check_kc_tree_sequence_inputs(ts1, ts2)\n\n    sample_maps = [dict(), dict()]\n    for i, ts in enumerate([ts1, ts2]):\n        for j, u in enumerate(ts.samples()):\n            sample_maps[i][u] = j\n\n    total = 0\n    left = 0\n\n    t1_vecs = KCVectors(ts1.num_samples)\n    t2_vecs = KCVectors(ts2.num_samples)\n\n    t1_depths = np.zeros(ts1.num_nodes)\n    t2_depths = np.zeros(ts2.num_nodes)\n\n    edge_diffs_iter_1 = ts1.edge_diffs()\n    tree_iter_1 = ts1.trees(sample_lists=True)\n    t1, t1_diffs = next(tree_iter_1), next(edge_diffs_iter_1)\n    update_kc_incremental(t1, t1_vecs, t1_diffs, sample_maps[0], t1_depths)\n    for t2, t2_diffs in zip(ts2.trees(sample_lists=True), ts2.edge_diffs()):\n        update_kc_incremental(t2, t2_vecs, t2_diffs, sample_maps[1], t2_depths)\n        while t1_diffs[0][1] < t2_diffs[0][1]:\n            span = t1_diffs[0][1] - left\n            total += norm_kc_vectors(t1_vecs, t2_vecs, lambda_) * span\n\n            left = t1_diffs[0][1]\n            t1, t1_diffs = next(tree_iter_1), next(edge_diffs_iter_1)\n            update_kc_incremental(t1, t1_vecs, t1_diffs, sample_maps[0], t1_depths)\n        span = t2_diffs[0][1] - left\n        left = t2_diffs[0][1]\n        total += norm_kc_vectors(t1_vecs, t2_vecs, lambda_) * span\n\n    return total / ts1.sequence_length\n\n\n# tree is the result of removing/inserting the edges in edge_diffs\ndef update_kc_incremental(tree, kc, edge_diffs, sample_index_map, depths):\n    _, edges_out, edges_in = edge_diffs\n\n    # Update state of detached subtrees.\n    for e in reversed(edges_out):\n        u = e.child\n        depths[u] = 0\n\n        # Only update detached subtrees that remain detached. Otherwise,\n        # they must be reattached by an incoming edge and will be\n        # updated below. We're looking into the future here by seeing\n        # that u remains detached after all the incoming edges are\n        # inserted into `tree`.\n        if tree.parent(u) == tskit.NULL:\n            update_kc_subtree_state(tree, kc, u, sample_index_map, depths)\n\n    # Propagate state change down into reattached subtrees.\n    for e in reversed(edges_in):\n        u = e.child\n        assert depths[u] == 0\n        depths[u] = depths[e.parent] + 1\n        update_kc_subtree_state(tree, kc, u, sample_index_map, depths)\n\n        # The per-leaf elements of KC only change when the edge directly\n        # above the leaf changes, so are handled separately from the\n        # propagated state used for leaf-pair elements.\n        if tree.is_leaf(u):\n            time = tree.branch_length(u)\n            update_kc_vectors_single_leaf(kc, u, time, sample_index_map)\n\n\ndef update_kc_subtree_state(tree, kc, u, sample_index_map, depths):\n    \"\"\"\n    Update the depths of the nodes in this subtree. When a leaf is hit,\n    update the KC vector elements associated with that leaf.\n    \"\"\"\n    stack = [u]\n    while len(stack) > 0:\n        v = stack.pop()\n        if tree.is_leaf(v):\n            update_kc_pairs_with_leaf(tree, kc, v, sample_index_map, depths)\n        else:\n            c = tree.left_child(v)\n            while c != -1:\n                # Terminate iteration at nodes that are currently considered\n                # roots by the edge diffs. Nodes with a depth of 0 are\n                # temporary root nodes made by breaking an outgoing edge\n                # that have yet to be inserted by a later incoming edge.\n                if depths[c] != 0:\n                    depths[c] = depths[v] + 1\n                    stack.append(c)\n                c = tree.right_sib(c)\n\n\ndef update_kc_pairs_with_leaf(tree, kc, leaf, sample_index_map, depths):\n    \"\"\"\n    Perform an upward traversal from `leaf` to the root, updating the KC\n    vector elements for pairs of `leaf` with every other leaf in the tree.\n    \"\"\"\n    root_time = tree.time(tree.root)\n    p = tree.parent(leaf)\n    c = leaf\n    while p != -1:\n        time = root_time - tree.time(p)\n        depth = depths[p]\n        for sibling in tree.children(p):\n            if sibling != c:\n                update_kc_vectors_all_pairs(tree, kc, leaf, sibling, depth, time)\n        c, p = p, tree.parent(p)\n\n\ndef check_kc_tree_sequence_inputs(ts1, ts2):\n    if not np.array_equal(ts1.samples(), ts2.samples()):\n        raise ValueError(\"Trees must have the same samples\")\n    if ts1.sequence_length != ts2.sequence_length:\n        raise ValueError(\"Can't compare with sequences of different lengths\")\n\n    tree1_iter = ts1.trees(sample_lists=True)\n    tree1 = next(tree1_iter)\n    for tree2 in ts2.trees(sample_lists=True):\n        while tree1.interval.right < tree2.interval.right:\n            check_kc_tree_inputs(tree1, tree2)\n            tree1 = next(tree1_iter)\n        check_kc_tree_inputs(tree1, tree2)\n\n\ndef check_kc_tree_inputs(tree1, tree2):\n    if not len(tree1.roots) == len(tree2.roots) == 1:\n        raise ValueError(\"Trees must have one root\")\n    for tree in [tree1, tree2]:\n        for u in tree.nodes():\n            if tree.num_children(u) == 1:\n                raise ValueError(\"Unary nodes are not supported\")\n\n\nclass TestKCSequenceMetric(unittest.TestCase):\n    \"\"\"\n    Tests the KC Metric on a tree sequence.\n    \"\"\"\n\n    def test_0_distance_from_self(self):\n        ts = msprime.simulate(10)\n        assert ts_kc_distance(ts, ts) == 0\n\n    def verify_errors(self, ts1, ts2):\n        with pytest.raises(ValueError):\n            ts_kc_distance(ts1, ts2)\n        with pytest.raises(ValueError):\n            ts_kc_distance_incremental(ts1, ts2)\n        with pytest.raises(_tskit.LibraryError):\n            ts1.kc_distance(ts2)\n\n    def test_errors_diff_seq_length(self):\n        ts1 = msprime.simulate(10, length=1)\n        ts2 = msprime.simulate(10, length=2)\n        self.verify_errors(ts1, ts2)\n\n    def test_errors_diff_num_samples(self):\n        ts1 = msprime.simulate(10, length=1)\n        ts2 = msprime.simulate(12, length=2)\n        self.verify_errors(ts1, ts2)\n\n    def test_errors_different_sample_lists(self):\n        tables_1 = tskit.TableCollection(sequence_length=2.0)\n        tables_2 = tskit.TableCollection(sequence_length=2.0)\n\n        sv1 = [True, True, True, False, False]\n        tv1 = [0.0, 0.0, 0.0, 1.0, 2.0]\n        sv2 = [True, True, False, False, True]\n        tv2 = [0.0, 0.0, 1.0, 2.0, 0.0]\n        for is_sample, t in zip(sv1, tv1):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_1.nodes.add_row(flags=flags, time=t)\n        for is_sample, t in zip(sv2, tv2):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_2.nodes.add_row(flags=flags, time=t)\n\n        lv = [0.0, 0.0, 0.0, 0.0]\n        rv = [1.0, 1.0, 1.0, 1.0]\n        pv1 = [3, 3, 4, 4]\n        cv1 = [0, 1, 2, 3]\n        for left, right, p, c in zip(lv, rv, pv1, cv1):\n            tables_1.edges.add_row(left=left, right=right, parent=p, child=c)\n\n        pv2 = [2, 2, 3, 3]\n        cv2 = [0, 1, 2, 4]\n        for left, right, p, c in zip(lv, rv, pv2, cv2):\n            tables_2.edges.add_row(left=left, right=right, parent=p, child=c)\n\n        ts1 = tables_1.tree_sequence()\n        ts2 = tables_2.tree_sequence()\n        self.verify_errors(ts1, ts2)\n\n        unsimplified_ts = msprime.simulate(\n            10, random_seed=1, recombination_rate=10, record_full_arg=True\n        )\n        self.verify_errors(unsimplified_ts, unsimplified_ts)\n\n    def test_errors_unary_nodes(self):\n        tables = tskit.TableCollection(sequence_length=2.0)\n\n        sv = [True, False, False]\n        tv = [0.0, 1.0, 2.0]\n        for is_sample, t in zip(sv, tv):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables.nodes.add_row(flags=flags, time=t)\n\n        lv = [0.0, 0.0, 0.0]\n        rv = [1.0, 1.0, 1.0]\n        pv = [1, 2]\n        cv = [0, 1]\n        for left, right, p, c in zip(lv, rv, pv, cv):\n            tables.edges.add_row(left=left, right=right, parent=p, child=c)\n\n        ts = tables.tree_sequence()\n        self.verify_errors(ts, ts)\n\n    def test_errors_different_samples(self):\n        ts1 = msprime.simulate(10, random_seed=1)\n        ts2 = tsutil.jiggle_samples(ts1)\n        self.verify_errors(ts1, ts2)\n\n    def verify_result(self, ts1, ts2, lambda_, result, places=None):\n        kc1 = ts_kc_distance(ts1, ts2, lambda_)\n        kc2 = ts_kc_distance_incremental(ts1, ts2, lambda_)\n        kc3 = ts1.kc_distance(ts2, lambda_)\n        self.assertAlmostEqual(kc1, result, places=places)\n        self.assertAlmostEqual(kc2, result, places=places)\n        self.assertAlmostEqual(kc3, result, places=places)\n\n        kc1 = ts_kc_distance(ts2, ts1, lambda_)\n        kc2 = ts_kc_distance_incremental(ts2, ts1, lambda_)\n        kc3 = ts2.kc_distance(ts1, lambda_)\n        self.assertAlmostEqual(kc1, result, places=places)\n        self.assertAlmostEqual(kc2, result, places=places)\n        self.assertAlmostEqual(kc3, result, places=places)\n\n    def verify_same_kc(self, ts1, ts2, lambda_=0):\n        kc1 = ts_kc_distance(ts1, ts2, lambda_)\n        kc2 = ts_kc_distance_incremental(ts1, ts2, lambda_)\n        kc3 = ts1.kc_distance(ts2, lambda_)\n        self.assertAlmostEqual(kc1, kc2)\n        self.assertAlmostEqual(kc2, kc3)\n\n        kc1 = ts_kc_distance(ts2, ts1, lambda_)\n        kc2 = ts_kc_distance_incremental(ts2, ts1, lambda_)\n        kc3 = ts2.kc_distance(ts1, lambda_)\n        self.assertAlmostEqual(kc1, kc2)\n        self.assertAlmostEqual(kc2, kc3)\n\n    def validate_trees(self, n):\n        for seed in range(1, 10):\n            ts1 = msprime.simulate(n, random_seed=seed, recombination_rate=1)\n            ts2 = msprime.simulate(n, random_seed=seed + 1, recombination_rate=1)\n            self.verify_same_kc(ts2, ts1)\n            self.verify_same_kc(ts1, ts2)\n            self.verify_same_kc(ts1, ts1)  # Test sequences with equal breakpoints\n\n    def test_sample_5(self):\n        self.validate_trees(5)\n\n    def test_sample_10(self):\n        self.validate_trees(10)\n\n    def test_sample_20(self):\n        self.validate_trees(20)\n\n    def validate_nonbinary_trees(self, n):\n        demographic_events = [\n            msprime.SimpleBottleneck(0.02, 0, proportion=0.25),\n            msprime.SimpleBottleneck(0.2, 0, proportion=1),\n        ]\n\n        for seed in range(1, 10):\n            ts1 = msprime.simulate(\n                n,\n                random_seed=seed,\n                demographic_events=demographic_events,\n                recombination_rate=1,\n            )\n            # Check if this is really nonbinary\n            found = False\n            for edgeset in ts1.edgesets():\n                if len(edgeset.children) > 2:\n                    found = True\n                    break\n            assert found\n\n            ts2 = msprime.simulate(\n                n,\n                random_seed=seed + 1,\n                demographic_events=demographic_events,\n                recombination_rate=1,\n            )\n            self.verify_same_kc(ts1, ts2)\n\n            # compare to a binary tree also\n            ts2 = msprime.simulate(n, recombination_rate=1, random_seed=seed + 1)\n            self.verify_same_kc(ts1, ts2)\n\n    def test_non_binary_sample_10(self):\n        self.validate_nonbinary_trees(10)\n\n    def test_non_binary_sample_20(self):\n        self.validate_nonbinary_trees(20)\n\n    def test_permit_internal_samples(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(flags=1)\n        tables.nodes.add_row(flags=1)\n        tables.nodes.add_row(flags=1, time=1)\n        tables.edges.add_row(0, 1, 2, 0)\n        tables.edges.add_row(0, 1, 2, 1)\n        ts = tables.tree_sequence()\n        assert ts.kc_distance(ts) == 0\n        assert ts_kc_distance_incremental(ts, ts) == 0\n\n    def test_known_kc_sample_trees_different_shapes(self):\n        tables_1 = tskit.TableCollection(sequence_length=2.0)\n        tables_2 = tskit.TableCollection(sequence_length=2.0)\n\n        # Nodes\n        sv = [True, True, True, True, False, False, False]\n        tv = [0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0]\n        for is_sample, t in zip(sv, tv):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_1.nodes.add_row(flags=flags, time=t)\n            tables_2.nodes.add_row(flags=flags, time=t)\n\n        # First tree edges\n        pv1 = [4, 4, 5, 5, 6, 6, 5, 6]\n        cv1 = [2, 3, 1, 4, 0, 5, 0, 4]\n        lv1 = [0, 0, 0, 0, 0, 0, 1, 1]\n        rv1 = [2, 2, 2, 1, 1, 2, 2, 2]\n\n        # Second tree edges\n        pv2 = [4, 4, 5, 5, 6, 6, 5, 6]\n        cv2 = [2, 3, 0, 1, 4, 5, 4, 0]\n        lv2 = [0, 0, 0, 0, 0, 0, 1, 1]\n        rv2 = [2, 2, 1, 2, 1, 2, 2, 2]\n\n        for left, right, p, c in zip(lv1, rv1, pv1, cv1):\n            tables_1.edges.add_row(left=left, right=right, parent=p, child=c)\n        for left, right, p, c in zip(lv2, rv2, pv2, cv2):\n            tables_2.edges.add_row(left=left, right=right, parent=p, child=c)\n\n        tables_1.sort()\n        tables_2.sort()\n        ts_1 = tables_1.tree_sequence()\n        ts_2 = tables_2.tree_sequence()\n        self.verify_result(ts_1, ts_2, 0, 2.0)\n\n    def test_known_kc_sample_trees_same_shape_different_times(self):\n        tables_1 = tskit.TableCollection(sequence_length=1.0)\n        tables_2 = tskit.TableCollection(sequence_length=1.0)\n\n        # Nodes\n        sv = [True, True, True, False, False]\n        tv_1 = [0.0, 0.0, 0.0, 2.0, 3.0]\n        tv_2 = [0.0, 0.0, 0.0, 4.0, 6.0]\n\n        for is_sample, t1, t2 in zip(sv, tv_1, tv_2):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_1.nodes.add_row(flags=flags, time=t1)\n            tables_2.nodes.add_row(flags=flags, time=t2)\n\n        # Edges\n        lv = [0.0, 0.0, 0.0, 0.0]\n        rv = [1.0, 1.0, 1.0, 1.0]\n        pv = [3, 3, 4, 4]\n        cv = [0, 1, 2, 3]\n\n        for left, right, p, c in zip(lv, rv, pv, cv):\n            tables_1.edges.add_row(left=left, right=right, parent=p, child=c)\n            tables_2.edges.add_row(left=left, right=right, parent=p, child=c)\n\n        ts_1 = tables_1.tree_sequence()\n        ts_2 = tables_2.tree_sequence()\n\n        self.verify_result(ts_1, ts_2, 0, 0)\n        self.verify_result(ts_1, ts_2, 1, 4.243, places=3)\n\n    def test_known_kc_same_tree_twice_same_metric(self):\n        tables_1 = tskit.TableCollection(sequence_length=2.0)\n        tables_2 = tskit.TableCollection(sequence_length=2.0)\n\n        # Nodes\n        sv = [True, True, True, False, False]\n        tv_1 = [0.0, 0.0, 0.0, 2.0, 3.0]\n        tv_2 = [0.0, 0.0, 0.0, 4.0, 6.0]\n\n        for is_sample, t1, t2 in zip(sv, tv_1, tv_2):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_1.nodes.add_row(flags=flags, time=t1)\n            tables_2.nodes.add_row(flags=flags, time=t2)\n\n        # Edges\n        pv = [3, 3, 4, 4]\n        cv = [0, 1, 2, 3]\n\n        for p, c in zip(pv, cv):\n            tables_1.edges.add_row(left=0, right=1, parent=p, child=c)\n            tables_1.edges.add_row(left=1, right=2, parent=p, child=c)\n            tables_2.edges.add_row(left=0, right=0.5, parent=p, child=c)\n            tables_2.edges.add_row(left=0.5, right=2, parent=p, child=c)\n\n        ts_1 = tables_1.tree_sequence()\n        ts_2 = tables_2.tree_sequence()\n        self.verify_result(ts_1, ts_2, 0, 0)\n        self.verify_result(ts_1, ts_2, 1, 4.243, places=3)\n\n    def test_remove_root(self):\n        tables_1 = tskit.TableCollection(sequence_length=10.0)\n        tables_2 = tskit.TableCollection(sequence_length=10.0)\n\n        # Nodes\n        sv1 = [True, True, True, True, True, False, False, False, False, False]\n        tv1 = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0]\n\n        sv2 = [True, True, True, True, True, False, False, False, False]\n        tv2 = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0]\n\n        for is_sample, t in zip(sv1, tv1):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_1.nodes.add_row(flags=flags, time=t)\n        for is_sample, t in zip(sv2, tv2):\n            flags = tskit.NODE_IS_SAMPLE if is_sample else 0\n            tables_2.nodes.add_row(flags=flags, time=t)\n\n        # Edges\n        pv1 = [5, 5, 6, 6, 7, 7, 8, 8, 8, 9, 9]\n        cv1 = [0, 1, 3, 4, 2, 5, 2, 6, 7, 5, 8]\n        lv1 = [0, 0, 0, 0, 5, 5, 0, 0, 5, 0, 0]\n        rv1 = [10, 10, 10, 10, 10, 10, 5, 10, 10, 5, 5]\n\n        pv2 = [5, 5, 6, 6, 7, 7, 8, 8]\n        cv2 = [0, 1, 2, 3, 4, 5, 6, 7]\n        lv2 = [0, 0, 0, 0, 0, 0, 0, 0]\n        rv2 = [10, 10, 10, 10, 10, 10, 10, 10]\n\n        for p, c, l, r in zip(pv1, cv1, lv1, rv1):\n            tables_1.edges.add_row(left=l, right=r, parent=p, child=c)\n\n        for p, c, l, r in zip(pv2, cv2, lv2, rv2):\n            tables_2.edges.add_row(left=l, right=r, parent=p, child=c)\n\n        ts_1 = tables_1.tree_sequence()\n        ts_2 = tables_2.tree_sequence()\n        distance = (math.sqrt(8) * 5 + math.sqrt(6) * 5) / 10\n        self.verify_result(ts_1, ts_2, 0, distance)\n\n    def test_ignores_subtrees_with_no_samples(self):\n        nodes_1 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   0   0.000000    0   -1\n        1   0   0.000000    0   -1\n        2   0   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   0   0.000000    0   -1\n        5   0   0.000000    0   -1\n        6   1   1.000000    0   -1\n        7   1   2.000000    0   -1\n        8   0   2.000000    0   -1\n        9   0   3.000000    0   -1\n        \"\"\"\n        )\n        edges_1 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    6  0\n        0.000000    1.000000    6  1\n        0.000000    1.000000    7  2\n        0.000000    1.000000    7  6\n        0.000000    1.000000    8  4\n        0.000000    1.000000    8  5\n        0.000000    1.000000    9  3\n        0.000000    1.000000    9  7\n        0.000000    1.000000    9  8\n        \"\"\"\n        )\n        redundant = tskit.load_text(\n            nodes_1, edges_1, sequence_length=1, strict=False, base64_metadata=False\n        )\n\n        nodes_2 = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   0   0.000000    0   -1\n        1   0   0.000000    0   -1\n        2   0   0.000000    0   -1\n        3   1   0.000000    0   -1\n        4   0   0.000000    0   -1\n        5   0   0.000000    0   -1\n        6   1   1.000000    0   -1\n        7   1   2.000000    0   -1\n        8   0   2.000000    0   -1\n        9   0   3.000000    0   -1\n        \"\"\"\n        )\n        edges_2 = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    1.000000    7  2\n        0.000000    1.000000    7  6\n        0.000000    1.000000    9  3\n        0.000000    1.000000    9  7\n        \"\"\"\n        )\n        simplified = tskit.load_text(\n            nodes_2, edges_2, sequence_length=1, strict=False, base64_metadata=False\n        )\n        t1 = next(redundant.trees(sample_lists=True))\n        t2 = next(simplified.trees(sample_lists=True))\n        assert t1.kc_distance(t2, 0) == 0\n        assert t1.kc_distance(t2, 1) == 0\n\n\n# Test the RF distance metrics:\n# TODO: integrate with the KC tests\n\n\nclass TestTreeSameSamples:\n    # Tree1\n    # 2.00┊    6    ┊\n    #     ┊  ┏━┻━┓  ┊\n    # 1.00┊  4   5  ┊\n    #     ┊ ┏┻┓ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 ┊\n    #     0         1\n    #\n    # Tree2\n    # 3.00┊   6     ┊\n    #     ┊ ┏━┻━┓   ┊\n    # 2.00┊ ┃   5   ┊\n    #     ┊ ┃ ┏━┻┓  ┊\n    # 1.00┊ ┃ ┃  4  ┊\n    #     ┊ ┃ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 ┊\n    #     0         1\n\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(4)\n\n    @tests.cached_example\n    def tree_other(self):\n        return tskit.Tree.generate_comb(4)\n\n    def test_rf_distance(self):\n        assert self.tree().rf_distance(self.tree_other()) == 2\n\n\nclass TestTreeDifferentSamples:\n    # Tree1\n    # 2.00┊     6     ┊\n    #     ┊   ┏━┻━┓   ┊\n    # 1.00┊   4   5   ┊\n    #     ┊  ┏┻┓ ┏┻┓  ┊\n    # 0.00┊  0 1 2 3  ┊\n    #     0           1\n    #\n    # Tree2\n    # 4.00┊   8       ┊\n    #     ┊ ┏━┻━┓     ┊\n    # 3.00┊ ┃   7     ┊\n    #     ┊ ┃ ┏━┻━┓   ┊\n    # 2.00┊ ┃ ┃   6   ┊\n    #     ┊ ┃ ┃ ┏━┻┓  ┊\n    # 1.00┊ ┃ ┃ ┃  5  ┊\n    #     ┊ ┃ ┃ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 4 ┊\n    #     0           1\n\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(4)\n\n    @tests.cached_example\n    def tree_other(self):\n        return tskit.Tree.generate_comb(5)\n\n    def test_rf_distance(self):\n        assert self.tree().rf_distance(self.tree_other()) == 8\n\n\nclass TestTreeMultiRoots:\n    # Tree1\n    # 4.00┊        15             ┊\n    #     ┊     ┏━━━┻━━━┓         ┊\n    # 3.00┊     ┃      14         ┊\n    #     ┊     ┃     ┏━┻━┓       ┊\n    # 2.00┊    12     ┃  13       ┊\n    #     ┊   ┏━┻━┓   ┃  ┏┻┓      ┊\n    # 1.00┊   9  10   ┃  ┃ 11     ┊\n    #     ┊  ┏┻┓ ┏┻┓ ┏┻┓ ┃ ┏┻┓    ┊\n    # 0.00┊  0 1 2 3 4 5 6 7 8    ┊\n    #     0                       1\n    #\n    # Tree2\n    # 3.00┊              15       ┊\n    #     ┊            ┏━━┻━┓     ┊\n    # 2.00┊     11     ┃   14     ┊\n    #     ┊    ┏━┻━┓   ┃  ┏━┻┓    ┊\n    # 1.00┊    9  10  12  ┃ 13    ┊\n    #     ┊   ┏┻┓ ┏┻┓ ┏┻┓ ┃ ┏┻┓   ┊\n    # 0.00┊   0 1 2 3 4 5 6 7 8   ┊\n    #     0                       1\n\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(9)\n\n    @tests.cached_example\n    def tree_other(self):\n        tables = tskit.Tree.generate_balanced(9, arity=2).tree_sequence.dump_tables()\n        edges = tables.edges.copy()\n        tables.edges.clear()\n        for edge in edges:\n            if edge.parent != 16:\n                tables.edges.append(edge)\n        return tables.tree_sequence().first()\n\n    def test_rf_distance(self):\n        with pytest.raises(ValueError):\n            self.tree().rf_distance(self.tree_other())\n\n\nclass TestEmpty:\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.TableCollection(1)\n        return tables.tree_sequence().first()\n\n    @tests.cached_example\n    def tree_other(self):\n        tables = tskit.TableCollection(1)\n        return tables.tree_sequence().first()\n\n    def test_rf_distance(self):\n        with pytest.raises(ValueError):\n            self.tree().rf_distance(self.tree_other())\n\n\nclass TestTreeInNullState:\n    @tests.cached_example\n    def tsk_tree1(self):\n        tree = tskit.Tree.generate_comb(5)\n        tree.clear()\n        return tree\n\n    @tests.cached_example\n    def tree_other(self):\n        tree = tskit.Tree.generate_comb(5)\n        tree.clear()\n        return tree\n\n    def test_rf_distance(self):\n        with pytest.raises(ValueError):\n            self.tsk_tree1().rf_distance(self.tree_other())\n\n\nclass TestAllRootsN5:\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(5):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        return tables.tree_sequence().first()\n\n    def test_rf_distance(self):\n        with pytest.raises(ValueError, match=\"single root\"):\n            self.tree().rf_distance(self.tree())\n\n\nclass TestWithPackages:\n    def to_dendropy(self, newick_data, tns):\n        return dendropy.Tree.get(\n            data=newick_data,\n            schema=\"newick\",\n            rooting=\"force-rooted\",\n            taxon_namespace=tns,\n        )\n\n    def dendropy_rf_distance(self, tree1, tree2, weighted=False):\n        tns = dendropy.TaxonNamespace()\n        tree1 = self.to_dendropy(tree1.as_newick(), tns)\n        tree2 = self.to_dendropy(tree2.as_newick(), tns)\n        tree1.encode_bipartitions()\n        tree2.encode_bipartitions()\n        if weighted:\n            return treecompare.weighted_robinson_foulds_distance(tree1, tree2)\n        else:\n            return treecompare.unweighted_robinson_foulds_distance(tree1, tree2)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 10, 20])\n    def test_rf_distance_against_dendropy(self, n):\n        trees = []\n        for seed in [42, 43]:\n            ts = msprime.sim_ancestry(n, ploidy=1, random_seed=seed)\n            trees.append(ts.first())\n        rf1 = trees[0].rf_distance(trees[1])\n        rf2 = self.dendropy_rf_distance(trees[0], trees[1])\n        assert rf1 == rf2\n\n\nclass TestDistanceBetween:\n    @pytest.mark.parametrize(\n        (\"u\", \"v\"),\n        itertools.combinations([0, 1, 2, 3], 2),\n    )\n    def test_distance_between_sample(self, u, v):\n        ts = msprime.sim_ancestry(\n            2, sequence_length=10, recombination_rate=0.1, random_seed=42\n        )\n        test_tree = ts.first()\n        assert test_tree.distance_between(u, v) == pytest.approx(\n            ts.diversity([u, v], mode=\"branch\", windows=\"trees\")[0]\n        )\n\n    def test_distance_between_same_node(self):\n        ts = msprime.sim_ancestry(\n            2, sequence_length=10, recombination_rate=0.1, random_seed=42\n        )\n        test_tree = ts.first()\n        assert test_tree.distance_between(0, 0) == 0\n\n    def test_distance_between_nodes(self):\n        # 4.00┊   8       ┊\n        #     ┊ ┏━┻━┓     ┊\n        # 3.00┊ ┃   7     ┊\n        #     ┊ ┃ ┏━┻━┓   ┊\n        # 2.00┊ ┃ ┃   6   ┊\n        #     ┊ ┃ ┃ ┏━┻┓  ┊\n        # 1.00┊ ┃ ┃ ┃  5  ┊\n        #     ┊ ┃ ┃ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 4 ┊\n        #     0           1\n        ts = tskit.Tree.generate_comb(5)\n        assert ts.distance_between(1, 7) == 3.0\n        assert ts.distance_between(6, 8) == 2.0\n\n    def test_distance_between_invalid_nodes(self):\n        ts = tskit.Tree.generate_comb(5)\n        with pytest.raises(ValueError):\n            ts.distance_between(0, 100)\n"
  },
  {
    "path": "python/tests/test_divmat.py",
    "content": "# MIT License\n#\n# Copyright (c) 2023-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for divergence matrix based pairwise stats\n\"\"\"\n\nimport array\nimport collections\nimport functools\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\nfrom tests import tsutil\nfrom tests.tsutil import get_example_tree_sequences\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this.\n\nDIVMAT_MODES = [\"branch\", \"site\"]\n\n# NOTE: this implementation of Schieber-Vishkin algorithm is done like\n# this so it's easy to run with numba. It would be more naturally\n# packaged as a class. We don't actually use numba here, but it's\n# handy to have a version of the SV code lying around that can be\n# run directly with numba.\n\n\ndef sv_tables_init(parent_array):\n    n = 1 + parent_array.shape[0]\n\n    LAMBDA = 0\n    # Triply-linked tree. FIXME we shouldn't need to build this as it's\n    # available already in tskit\n    child = np.zeros(n, dtype=np.int32)\n    parent = np.zeros(n, dtype=np.int32)\n    sib = np.zeros(n, dtype=np.int32)\n\n    for j in range(n - 1):\n        u = j + 1\n        v = parent_array[j] + 1\n        sib[u] = child[v]\n        child[v] = u\n        parent[u] = v\n\n    lambd = np.zeros(n, dtype=np.int32)\n    pi = np.zeros(n, dtype=np.int32)\n    tau = np.zeros(n, dtype=np.int32)\n    beta = np.zeros(n, dtype=np.int32)\n    alpha = np.zeros(n, dtype=np.int32)\n\n    p = child[LAMBDA]\n    n = 0\n    lambd[0] = -1\n    while p != LAMBDA:\n        while True:\n            n += 1\n            pi[p] = n\n            tau[n] = LAMBDA\n            lambd[n] = 1 + lambd[n >> 1]\n            if child[p] != LAMBDA:\n                p = child[p]\n            else:\n                break\n        beta[p] = n\n        while True:\n            tau[beta[p]] = parent[p]\n            if sib[p] != LAMBDA:\n                p = sib[p]\n                break\n            else:\n                p = parent[p]\n                if p != LAMBDA:\n                    h = lambd[n & -pi[p]]\n                    beta[p] = ((n >> h) | 1) << h\n                else:\n                    break\n\n    # Begin the second traversal\n    lambd[0] = lambd[n]\n    pi[LAMBDA] = 0\n    beta[LAMBDA] = 0\n    alpha[LAMBDA] = 0\n    p = child[LAMBDA]\n    while p != LAMBDA:\n        while True:\n            a = alpha[parent[p]] | (beta[p] & -beta[p])\n            alpha[p] = a\n            if child[p] != LAMBDA:\n                p = child[p]\n            else:\n                break\n        while True:\n            if sib[p] != LAMBDA:\n                p = sib[p]\n                break\n            else:\n                p = parent[p]\n                if p == LAMBDA:\n                    break\n\n    return lambd, pi, tau, beta, alpha\n\n\ndef _sv_mrca(x, y, lambd, pi, tau, beta, alpha):\n    if beta[x] <= beta[y]:\n        h = lambd[beta[y] & -beta[x]]\n    else:\n        h = lambd[beta[x] & -beta[y]]\n    k = alpha[x] & alpha[y] & -(1 << h)\n    h = lambd[k & -k]\n    j = ((beta[x] >> h) | 1) << h\n    if j == beta[x]:\n        xhat = x\n    else:\n        ell = lambd[alpha[x] & ((1 << h) - 1)]\n        xhat = tau[((beta[x] >> ell) | 1) << ell]\n    if j == beta[y]:\n        yhat = y\n    else:\n        ell = lambd[alpha[y] & ((1 << h) - 1)]\n        yhat = tau[((beta[y] >> ell) | 1) << ell]\n    if pi[xhat] <= pi[yhat]:\n        z = xhat\n    else:\n        z = yhat\n    return z\n\n\ndef sv_mrca(x, y, lambd, pi, tau, beta, alpha):\n    # Convert to 1-based indexes\n    return _sv_mrca(x + 1, y + 1, lambd, pi, tau, beta, alpha) - 1\n\n\ndef local_root(tree, u):\n    while tree.parent(u) != tskit.NULL:\n        u = tree.parent(u)\n    return u\n\n\ndef span_normalise_windows(D, windows):\n    assert len(D) == len(windows) - 1\n    for j in range(len(windows) - 1):\n        span = windows[j + 1] - windows[j]\n        D[j] /= span\n\n\ndef sample_set_normalisation(sample_sets):\n    n = len(sample_sets)\n    C = np.zeros((n, n))\n    for j in range(n):\n        C[j, j] = len(sample_sets[j]) * (len(sample_sets[j]) - 1)\n        for k in range(j + 1, n):\n            C[j, k] = len(sample_sets[j]) * len(sample_sets[k])\n            C[k, j] = C[j, k]\n    # Avoid division by zero for singleton samplesets\n    C[C == 0] = 1\n    # print(\"C = \", C)\n    return C\n\n\ndef branch_divergence_matrix(ts, sample_sets=None, windows=None, span_normalise=True):\n    windows_specified = windows is not None\n    windows = ts.parse_windows(windows)\n    num_windows = len(windows) - 1\n\n    n = len(sample_sets)\n    D = np.zeros((num_windows, n, n))\n    tree = tskit.Tree(ts)\n    C = sample_set_normalisation(sample_sets)\n    for i in range(num_windows):\n        left = windows[i]\n        right = windows[i + 1]\n        # print(f\"WINDOW {i} [{left}, {right})\")\n        tree.seek(left)\n        # Iterate over the trees in this window\n        while tree.interval.left < right and tree.index != -1:\n            span_left = max(tree.interval.left, left)\n            span_right = min(tree.interval.right, right)\n            span = span_right - span_left\n            # print(f\"\\ttree {tree.interval} [{span_left}, {span_right})\")\n            tables = sv_tables_init(tree.parent_array)\n            for j in range(n):\n                for u in sample_sets[j]:\n                    for k in range(j, n):\n                        for v in sample_sets[k]:\n                            # The u=v case here contributes zero, not bothering\n                            # to exclude it.\n                            w = sv_mrca(u, v, *tables)\n                            assert w == tree.mrca(u, v)\n                            if w != tskit.NULL:\n                                tu = ts.nodes_time[w] - ts.nodes_time[u]\n                                tv = ts.nodes_time[w] - ts.nodes_time[v]\n                            else:\n                                tu = (\n                                    ts.nodes_time[local_root(tree, u)] - ts.nodes_time[u]\n                                )\n                                tv = (\n                                    ts.nodes_time[local_root(tree, v)] - ts.nodes_time[v]\n                                )\n                            d = (tu + tv) * span\n                            D[i, j, k] += d\n            tree.next()\n        # Fill out symmetric triangle in the matrix, and get average\n        for j in range(n):\n            D[i, j, j] /= C[j, j]\n            for k in range(j + 1, n):\n                D[i, j, k] /= C[j, k]\n                D[i, k, j] = D[i, j, k]\n    if span_normalise:\n        span_normalise_windows(D, windows)\n    if not windows_specified:\n        D = D[0]\n    return D\n\n\ndef divergence_matrix(\n    ts, windows=None, sample_sets=None, samples=None, mode=\"site\", span_normalise=True\n):\n    assert mode in [\"site\", \"branch\"]\n    if samples is not None and sample_sets is not None:\n        raise ValueError(\"Cannot specify both\")\n    if samples is None and sample_sets is None:\n        samples = ts.samples()\n    if samples is not None:\n        sample_sets = [[u] for u in samples]\n    else:\n        assert sample_sets is not None\n\n    if mode == \"site\":\n        return site_divergence_matrix(\n            ts, sample_sets, windows=windows, span_normalise=span_normalise\n        )\n    else:\n        return branch_divergence_matrix(\n            ts, sample_sets, windows=windows, span_normalise=span_normalise\n        )\n\n\ndef stats_api_divergence_matrix(ts, *args, **kwargs):\n    return stats_api_matrix_method(ts, ts.divergence, *args, **kwargs)\n\n\ndef stats_api_genetic_relatedness_matrix(ts, *args, **kwargs):\n    method = functools.partial(ts.genetic_relatedness, proportion=False)\n    return stats_api_matrix_method(ts, method, *args, **kwargs)\n\n\ndef stats_api_matrix_method(\n    ts,\n    method,\n    windows=None,\n    samples=None,\n    sample_sets=None,\n    mode=\"site\",\n    span_normalise=True,\n):\n    if samples is not None and sample_sets is not None:\n        raise ValueError(\"Cannot specify both\")\n    if samples is None and sample_sets is None:\n        samples = ts.samples()\n    if samples is not None:\n        sample_sets = [[u] for u in samples]\n    else:\n        assert sample_sets is not None\n\n    windows_specified = windows is not None\n    windows = [0, ts.sequence_length] if windows is None else list(windows)\n    num_windows = len(windows) - 1\n\n    if len(sample_sets) == 0:\n        # FIXME: the code general stat code doesn't seem to handle zero samples\n        # case, need to identify MWE and file issue.\n        if windows_specified:\n            return np.zeros(shape=(num_windows, 0, 0))\n        else:\n            return np.zeros(shape=(0, 0))\n\n    # FIXME We have to go through this annoying rigmarole because windows must start and\n    # end with 0 and L. We should relax this requirement to just making the windows\n    # contiguous, so that we just look at specific sections of the genome.\n    drop = []\n    if windows[0] != 0:\n        windows = [0] + windows\n        drop.append(0)\n    if windows[-1] != ts.sequence_length:\n        windows.append(ts.sequence_length)\n        drop.append(-1)\n\n    n = len(sample_sets)\n    indexes = [(i, j) for i in range(n) for j in range(n)]\n    X = method(\n        sample_sets,\n        indexes=indexes,\n        mode=mode,\n        span_normalise=span_normalise,\n        windows=windows,\n    )\n    keep = np.ones(len(windows) - 1, dtype=bool)\n    keep[drop] = False\n    X = X[keep]\n    # Quick hack to get the within singleton sampleset divergence=0\n    X[np.isnan(X)] = 0\n    out = X.reshape((X.shape[0], n, n))\n    if not windows_specified:\n        out = out[0]\n    return out\n\n\ndef group_alleles(genotypes, num_alleles):\n    n = genotypes.shape[0]\n    A = np.zeros(n, dtype=int)\n    offsets = np.zeros(num_alleles + 1, dtype=int)\n    k = 0\n    for a in range(num_alleles):\n        offsets[a + 1] = offsets[a]\n        for j in range(n):\n            if genotypes[j] == a:\n                offsets[a + 1] += 1\n                A[k] = j\n                k += 1\n    return A, offsets\n\n\ndef site_divergence_matrix(ts, sample_sets, *, windows=None, span_normalise=True):\n    windows_specified = windows is not None\n    windows = ts.parse_windows(windows)\n    num_windows = len(windows) - 1\n\n    n = len(sample_sets)\n    samples = []\n    sample_set_index_map = []\n    for j in range(n):\n        for u in sample_sets[j]:\n            samples.append(u)\n            sample_set_index_map.append(j)\n    C = sample_set_normalisation(sample_sets)\n    D = np.zeros((num_windows, n, n))\n\n    site_id = 0\n    while site_id < ts.num_sites and ts.sites_position[site_id] < windows[0]:\n        site_id += 1\n\n    # Note we have to use isolated_as_missing here because we're working with\n    # non-sample nodes. There are tricky problems here later with missing data.\n    variant = tskit.Variant(ts, samples=samples, isolated_as_missing=False)\n    for i in range(num_windows):\n        left = windows[i]\n        right = windows[i + 1]\n        if site_id < ts.num_sites:\n            assert ts.sites_position[site_id] >= left\n        while site_id < ts.num_sites and ts.sites_position[site_id] < right:\n            variant.decode(site_id)\n            X, offsets = group_alleles(variant.genotypes, variant.num_alleles)\n            for j in range(variant.num_alleles):\n                A = X[offsets[j] : offsets[j + 1]]\n                for k in range(j + 1, variant.num_alleles):\n                    B = X[offsets[k] : offsets[k + 1]]\n                    for a in A:\n                        a_set_index = sample_set_index_map[a]\n                        for b in B:\n                            b_set_index = sample_set_index_map[b]\n                            D[i, a_set_index, b_set_index] += 1\n                            D[i, b_set_index, a_set_index] += 1\n            site_id += 1\n        D[i] /= C\n    if span_normalise:\n        span_normalise_windows(D, windows)\n    if not windows_specified:\n        D = D[0]\n    return D\n\n\ndef check_divmat(\n    ts,\n    *,\n    windows=None,\n    samples=None,\n    sample_sets=None,\n    span_normalise=True,\n    verbosity=0,\n    compare_stats_api=True,\n    compare_lib=True,\n    mode=\"site\",\n):\n    # print(\"samples = \", samples, sample_sets)\n    # print(ts.draw_text())\n    if verbosity > 1:\n        print(ts.draw_text())\n\n    D1 = divergence_matrix(\n        ts,\n        sample_sets=sample_sets,\n        samples=samples,\n        windows=windows,\n        mode=mode,\n        span_normalise=span_normalise,\n    )\n    if compare_stats_api:\n        D2 = stats_api_divergence_matrix(\n            ts,\n            windows=windows,\n            samples=samples,\n            sample_sets=sample_sets,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n        # print(\"windows = \", windows)\n        # print(D1)\n        # print(D2)\n        np.testing.assert_allclose(D1, D2)\n        assert D1.shape == D2.shape\n    if compare_lib:\n        ids = None\n        if sample_sets is not None:\n            ids = sample_sets\n        if samples is not None:\n            ids = samples\n        D3 = ts.divergence_matrix(\n            ids,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n        # print()\n        # np.set_printoptions(linewidth=500, precision=4)\n        # print(D1)\n        # print(D3)\n        assert D1.shape == D3.shape\n        np.testing.assert_allclose(D1, D3)\n\n    return D1\n\n\nclass TestExamplesWithAnswer:\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_zero_samples(self, mode):\n        ts = tskit.Tree.generate_balanced(2).tree_sequence\n        D = check_divmat(ts, samples=[], mode=mode)\n        assert D.shape == (0, 0)\n\n    @pytest.mark.parametrize(\"num_windows\", [1, 2, 3, 5])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_zero_samples_windows(self, num_windows, mode):\n        ts = tskit.Tree.generate_balanced(2).tree_sequence\n        windows = np.linspace(0, ts.sequence_length, num=num_windows + 1)\n        D = check_divmat(ts, samples=[], windows=windows, mode=mode)\n        assert D.shape == (num_windows, 0, 0)\n\n    @pytest.mark.parametrize(\"m\", [0, 1, 2, 10])\n    def test_single_tree_sites_per_branch(self, m):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts, m)\n        D1 = check_divmat(ts, mode=\"site\")\n        D2 = np.array(\n            [\n                [0.0, 2.0, 4.0, 4.0],\n                [2.0, 0.0, 4.0, 4.0],\n                [4.0, 4.0, 0.0, 2.0],\n                [4.0, 4.0, 2.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, m * D2)\n\n    @pytest.mark.parametrize(\"m\", [1, 3])\n    def test_single_tree_mutations_per_branch(self, m):\n        # 3.00┊   6     ┊\n        #     ┊ ┏━┻━┓   ┊\n        # 2.00┊ ┃   5   ┊\n        #     ┊ ┃ ┏━┻┓  ┊\n        # 1.00┊ ┃ ┃  4  ┊\n        #     ┊ ┃ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        # state 2 3 4 4\n        ts = tskit.Tree.generate_comb(4).tree_sequence\n        ts = tsutil.insert_branch_mutations(ts, m, num_states=5)\n        D1 = check_divmat(ts, mode=\"site\")\n        D2 = np.array(\n            [\n                [0.0, 1.0, 1.0, 1.0],\n                [1.0, 0.0, 1.0, 1.0],\n                [1.0, 1.0, 0.0, 0.0],\n                [1.0, 1.0, 0.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5])\n    def test_single_tree_unique_sample_alleles(self, n):\n        tables = tskit.Tree.generate_balanced(n).tree_sequence.dump_tables()\n        tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        for j in range(n):\n            tables.mutations.add_row(site=0, node=j, derived_state=f\"{j + 1}\")\n        ts = tables.tree_sequence()\n        D1 = check_divmat(ts, mode=\"site\")\n        D2 = np.ones((n, n))\n        np.fill_diagonal(D2, 0)\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"L\", [0.1, 1, 2, 100])\n    def test_single_tree_sequence_length(self, L):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4, span=L).tree_sequence\n        D1 = check_divmat(ts, mode=\"branch\", span_normalise=False)\n        D2 = np.array(\n            [\n                [0.0, 2.0, 4.0, 4.0],\n                [2.0, 0.0, 4.0, 4.0],\n                [4.0, 4.0, 0.0, 2.0],\n                [4.0, 4.0, 2.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, L * D2)\n\n    @pytest.mark.parametrize(\"L\", [0.1, 1, 2, 100])\n    def test_single_tree_sequence_length_span_normalise(self, L):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4, span=L).tree_sequence\n        D1 = check_divmat(ts, mode=\"branch\", span_normalise=True)\n        D2 = np.array(\n            [\n                [0.0, 2.0, 4.0, 4.0],\n                [2.0, 0.0, 4.0, 4.0],\n                [4.0, 4.0, 0.0, 2.0],\n                [4.0, 4.0, 2.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_diploid_individuals(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        D1 = check_divmat(\n            ts,\n            sample_sets=[ind.nodes for ind in ts.individuals()],\n            mode=mode,\n        )\n        D2 = np.array([[2.0, 4.0], [4.0, 2.0]])\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"num_windows\", [1, 2, 3, 5])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_gap_at_end(self, num_windows, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊ 0 1 2 3\n        #     0         1         2\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        tables = ts.dump_tables()\n        tables.sequence_length = 2\n        ts = tables.tree_sequence()\n        windows = np.linspace(0, ts.sequence_length, num=num_windows + 1)\n        D1 = check_divmat(ts, windows=windows, mode=mode, span_normalise=False)\n        D1 = np.sum(D1, axis=0)\n        D2 = np.array(\n            [\n                [0.0, 2.0, 4.0, 4.0],\n                [2.0, 0.0, 4.0, 4.0],\n                [4.0, 4.0, 0.0, 2.0],\n                [4.0, 4.0, 2.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_subset_permuted_samples(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        D1 = check_divmat(ts, samples=[1, 2, 0], mode=mode)\n        D2 = np.array(\n            [\n                [0.0, 4.0, 2.0],\n                [4.0, 0.0, 4.0],\n                [2.0, 4.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_mixed_non_sample_samples(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_BAD_SAMPLES\"):\n            ts.divergence_matrix([0, 5], mode=mode)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_duplicate_samples(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_DUPLICATE_SAMPLE\"):\n            ts.divergence_matrix([0, 0, 1], mode=mode)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_multiroot(self, mode):\n        # 2.00┊         ┊\n        #     ┊         ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        ts = ts.decapitate(1)\n        D1 = check_divmat(ts, mode=mode)\n        D2 = np.array(\n            [\n                [0.0, 2.0, 2.0, 2.0],\n                [2.0, 0.0, 2.0, 2.0],\n                [2.0, 2.0, 0.0, 2.0],\n                [2.0, 2.0, 2.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize([\"left\", \"right\"], [(0, 10), (1, 3), (3.25, 3.75), (5, 10)])\n    def test_single_tree_interval(self, left, right):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4, span=10).tree_sequence\n        D1 = check_divmat(ts, windows=[left, right], mode=\"branch\", span_normalise=False)\n        D2 = np.array(\n            [\n                [0.0, 2.0, 4.0, 4.0],\n                [2.0, 0.0, 4.0, 4.0],\n                [4.0, 4.0, 0.0, 2.0],\n                [4.0, 4.0, 2.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1[0], (right - left) * D2)\n\n    @pytest.mark.parametrize(\"num_windows\", [1, 2, 3, 5, 11])\n    def test_single_tree_equal_windows(self, num_windows):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4, span=10).tree_sequence\n        windows = np.linspace(0, ts.sequence_length, num=num_windows + 1)\n        x = ts.sequence_length / num_windows\n        # print(windows)\n        D1 = check_divmat(ts, windows=windows, mode=\"branch\", span_normalise=False)\n        assert D1.shape == (num_windows, 4, 4)\n        D2 = np.array(\n            [\n                [0.0, 2.0, 4.0, 4.0],\n                [2.0, 0.0, 4.0, 4.0],\n                [4.0, 4.0, 0.0, 2.0],\n                [4.0, 4.0, 2.0, 0.0],\n            ]\n        )\n        for D in D1:\n            np.testing.assert_array_almost_equal(D, x * D2)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5])\n    def test_single_tree_no_sites(self, n):\n        ts = tskit.Tree.generate_balanced(n, span=10).tree_sequence\n        D = check_divmat(ts, mode=\"site\")\n        np.testing.assert_array_equal(D, np.zeros((n, n)))\n\n\nclass TestExamples:\n    @pytest.mark.parametrize(\n        \"interval\", [(0, 26), (1, 3), (3.25, 13.75), (5, 10), (25.5, 26)]\n    )\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"span_normalise\", [True, False])\n    def test_all_trees_interval(self, interval, mode, span_normalise):\n        ts = tsutil.all_trees_ts(4)\n        ts = tsutil.insert_branch_sites(ts)\n        assert ts.sequence_length == 26\n        check_divmat(ts, windows=interval, mode=mode, span_normalise=span_normalise)\n\n    @pytest.mark.parametrize(\n        \"windows\",\n        [\n            [0, 26],\n            [0, 1, 2],\n            list(range(27)),\n            [5, 7, 9, 20],\n            [5.1, 5.2, 5.3, 5.5, 6],\n            [5.1, 5.2, 6.5],\n        ],\n    )\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"span_normalise\", [True, False])\n    def test_all_trees_windows(self, windows, mode, span_normalise):\n        ts = tsutil.all_trees_ts(4)\n        ts = tsutil.insert_branch_sites(ts)\n        assert ts.sequence_length == 26\n        D = check_divmat(ts, windows=windows, mode=mode, span_normalise=span_normalise)\n        assert D.shape == (len(windows) - 1, 4, 4)\n\n    @pytest.mark.parametrize(\"num_windows\", [1, 5, 28])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"span_normalise\", [True, False])\n    def test_all_trees_windows_gap_at_end(self, num_windows, mode, span_normalise):\n        tables = tsutil.all_trees_ts(4).dump_tables()\n        tables.sequence_length = 30\n        ts = tables.tree_sequence()\n        ts = tsutil.insert_branch_sites(ts)\n        assert ts.last().num_roots == 4\n        windows = np.linspace(0, ts.sequence_length, num=num_windows + 1)\n        check_divmat(ts, windows=windows, mode=mode, span_normalise=span_normalise)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5])\n    @pytest.mark.parametrize(\"seed\", range(1, 4))\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_small_sims(self, n, seed, mode):\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=1,\n            sequence_length=1000,\n            recombination_rate=0.01,\n            random_seed=seed,\n        )\n        assert ts.num_trees >= 2\n        ts = msprime.sim_mutations(ts, rate=0.1, discrete_genome=False, random_seed=seed)\n        assert ts.num_mutations > 1\n        check_divmat(ts, verbosity=0, mode=mode)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 15])\n    @pytest.mark.parametrize(\"num_windows\", range(1, 5))\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_sims_windows(self, n, num_windows, mode):\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=79234,\n        )\n        assert ts.num_trees >= 2\n        ts = msprime.sim_mutations(\n            ts,\n            rate=0.01,\n            discrete_genome=False,\n            random_seed=1234,\n        )\n        assert ts.num_mutations >= 2\n        windows = np.linspace(0, ts.sequence_length, num=num_windows + 1)\n        check_divmat(ts, windows=windows, mode=mode)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 15])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_balanced_tree(self, n, mode):\n        ts = tskit.Tree.generate_balanced(n).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        # print(ts.draw_text())\n        check_divmat(ts, verbosity=0, mode=mode)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_internal_sample(self, mode):\n        tables = tskit.Tree.generate_balanced(4).tree_sequence.dump_tables()\n        flags = tables.nodes.flags\n        flags[3] = 0\n        flags[5] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        ts = tables.tree_sequence()\n        ts = tsutil.insert_branch_sites(ts)\n        check_divmat(ts, verbosity=0, mode=mode)\n\n    @pytest.mark.parametrize(\"seed\", range(1, 5))\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_one_internal_sample_sims(self, seed, mode):\n        ts = msprime.sim_ancestry(\n            10,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=seed,\n        )\n        t = ts.dump_tables()\n        # Add a new sample directly below another sample\n        u = t.nodes.add_row(time=-1, flags=tskit.NODE_IS_SAMPLE)\n        t.edges.add_row(parent=0, child=u, left=0, right=ts.sequence_length)\n        t.sort()\n        t.build_index()\n        ts = t.tree_sequence()\n        ts = tsutil.insert_branch_sites(ts)\n        check_divmat(ts, mode=mode)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_missing_flanks(self, mode):\n        ts = msprime.sim_ancestry(\n            20,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=1234,\n        )\n        assert ts.num_trees >= 2\n        ts = ts.keep_intervals([[20, 80]])\n        assert ts.first().interval == (0, 20)\n        ts = tsutil.insert_branch_sites(ts)\n        check_divmat(ts, mode=mode)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 10])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_dangling_on_samples(self, n, mode):\n        # Adding non sample branches below the samples does not alter\n        # the overall divergence *between* the samples\n        ts1 = tskit.Tree.generate_balanced(n).tree_sequence\n        ts1 = tsutil.insert_branch_sites(ts1)\n        D1 = check_divmat(ts1, mode=mode)\n        tables = ts1.dump_tables()\n        for u in ts1.samples():\n            v = tables.nodes.add_row(time=-1)\n            tables.edges.add_row(left=0, right=ts1.sequence_length, parent=u, child=v)\n        tables.sort()\n        tables.build_index()\n        ts2 = tables.tree_sequence()\n        D2 = check_divmat(ts2, mode=mode)\n        np.testing.assert_array_almost_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 10])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_dangling_on_all(self, n, mode):\n        # Adding non sample branches below the samples does not alter\n        # the overall divergence *between* the samples\n        ts1 = tskit.Tree.generate_balanced(n).tree_sequence\n        ts1 = tsutil.insert_branch_sites(ts1)\n        D1 = check_divmat(ts1, mode=mode)\n        tables = ts1.dump_tables()\n        for u in range(ts1.num_nodes):\n            v = tables.nodes.add_row(time=-1)\n            tables.edges.add_row(left=0, right=ts1.sequence_length, parent=u, child=v)\n        tables.sort()\n        tables.build_index()\n        ts2 = tables.tree_sequence()\n        D2 = check_divmat(ts2, mode=mode)\n        np.testing.assert_array_almost_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_disconnected_non_sample_topology(self, mode):\n        # Adding non sample branches below the samples does not alter\n        # the overall divergence *between* the samples\n        ts1 = tskit.Tree.generate_balanced(5).tree_sequence\n        ts1 = tsutil.insert_branch_sites(ts1)\n        D1 = check_divmat(ts1, mode=mode)\n        tables = ts1.dump_tables()\n        # Add an extra bit of disconnected non-sample topology\n        u = tables.nodes.add_row(time=0)\n        v = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=ts1.sequence_length, parent=v, child=u)\n        tables.sort()\n        tables.build_index()\n        ts2 = tables.tree_sequence()\n        D2 = check_divmat(ts2, mode=mode)\n        np.testing.assert_array_almost_equal(D1, D2)\n\n\nclass TestSuiteExamples:\n    \"\"\"\n    Compare the stats API method vs the library implementation for the\n    suite test examples. Some of these examples are too large to run the\n    Python code above on.\n    \"\"\"\n\n    def check(\n        self,\n        ts,\n        windows=None,\n        sample_sets=None,\n        num_threads=0,\n        span_normalise=True,\n        mode=\"branch\",\n    ):\n        D1 = ts.divergence_matrix(\n            sample_sets,\n            windows=windows,\n            num_threads=num_threads,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n        D2 = stats_api_divergence_matrix(\n            ts,\n            windows=windows,\n            sample_sets=sample_sets,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n        assert D1.shape == D2.shape\n        # np.set_printoptions(linewidth=500, precision=4)\n        # print()\n        # print(D1)\n        # print(D2)\n        if mode == \"branch\":\n            # If we have missing data then parts of the divmat are defined to be zero,\n            # so relative tolerances aren't useful. Because the stats API\n            # method necessarily involves subtracting away all of the previous\n            # values for an empty tree, there is a degree of numerical imprecision\n            # here. This value for atol is what is needed to get the tests to\n            # pass in practise.\n            has_missing_data = any(tree._has_isolated_samples() for tree in ts.trees())\n            atol = 1e-11 if has_missing_data else 0\n            np.testing.assert_allclose(D1, D2, atol=atol)\n        else:\n            assert mode == \"site\"\n            np.testing.assert_allclose(D1, D2)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_defaults(self, ts, mode):\n        self.check(ts, mode=mode)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_subset_samples(self, ts, mode):\n        n = min(ts.num_samples, 2)\n        self.check(ts, sample_sets=[[u] for u in ts.samples()[:n]], mode=mode)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"ploidy\", [1, 2, 3])\n    def test_ploidy_sample_sets(self, ts, mode, ploidy):\n        if ts.num_samples >= 2 * ploidy:\n            # Workaround limitations in the stats API\n            sample_sets = np.array_split(ts.samples(), ts.num_samples // ploidy)\n            self.check(ts, sample_sets=sample_sets, mode=mode)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"span_normalise\", [True, False])\n    def test_windows(self, ts, mode, span_normalise):\n        windows = np.linspace(0, ts.sequence_length, num=13)\n        self.check(ts, windows=windows, mode=mode, span_normalise=span_normalise)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_threads_no_windows(self, ts, mode):\n        self.check(ts, num_threads=5, mode=mode)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_threads_windows(self, ts, mode):\n        windows = np.linspace(0, ts.sequence_length, num=11)\n        self.check(ts, num_threads=5, windows=windows, mode=mode)\n\n\nclass TestThreadsNoWindows:\n    def check(self, ts, num_threads, samples=None, mode=None, span_normalise=True):\n        D1 = ts.divergence_matrix(\n            samples, num_threads=0, mode=mode, span_normalise=span_normalise\n        )\n        D2 = ts.divergence_matrix(\n            samples,\n            num_threads=num_threads,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n        np.testing.assert_array_almost_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"num_threads\", [1, 2, 3, 5, 26, 27])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"span_normalise\", [True, False])\n    def test_all_trees(self, num_threads, mode, span_normalise):\n        ts = tsutil.all_trees_ts(4)\n        assert ts.num_trees == 26\n        self.check(ts, num_threads, mode=mode, span_normalise=span_normalise)\n\n    @pytest.mark.parametrize(\"samples\", [None, [0, 1]])\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_all_trees_samples(self, samples, mode):\n        ts = tsutil.all_trees_ts(4)\n        assert ts.num_trees == 26\n        self.check(ts, 2, samples, mode=mode)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 15])\n    @pytest.mark.parametrize(\"num_threads\", range(1, 5))\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_simple_sims(self, n, num_threads, mode):\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=1234,\n        )\n        assert ts.num_trees >= 2\n        self.check(ts, num_threads, mode=mode)\n\n\nclass TestThreadsWindows:\n    def check(self, ts, num_threads, *, windows, samples=None, mode=None):\n        D1 = ts.divergence_matrix(samples, num_threads=0, windows=windows, mode=mode)\n        D2 = ts.divergence_matrix(\n            samples, num_threads=num_threads, windows=windows, mode=mode\n        )\n        np.testing.assert_array_almost_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"num_threads\", [1, 2, 3, 5, 26, 27])\n    @pytest.mark.parametrize(\n        \"windows\",\n        [\n            [0, 26],\n            [0, 1, 2],\n            list(range(27)),\n            [5, 7, 9, 20],\n            [5.1, 5.2, 5.3, 5.5, 6],\n            [5.1, 5.2, 6.5],\n            \"trees\",\n            \"sites\",\n        ],\n    )\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_all_trees(self, num_threads, windows, mode):\n        ts = tsutil.all_trees_ts(4)\n        assert ts.num_trees == 26\n        self.check(ts, num_threads, windows=windows, mode=mode)\n\n    @pytest.mark.parametrize(\"samples\", [None, [0, 1]])\n    @pytest.mark.parametrize(\n        \"windows\",\n        [\n            [0, 26],\n            None,\n            \"trees\",\n            \"sites\",\n        ],\n    )\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_all_trees_samples(self, samples, windows, mode):\n        ts = tsutil.all_trees_ts(4)\n        self.check(ts, 2, windows=windows, samples=samples, mode=mode)\n\n    @pytest.mark.parametrize(\"num_threads\", range(1, 5))\n    @pytest.mark.parametrize(\n        \"windows\",\n        [\n            [0, 100],\n            [0, 50, 75, 95, 100],\n            [50, 75, 95, 100],\n            [0, 50, 75, 95],\n            list(range(100)),\n            \"trees\",\n            \"sites\",\n        ],\n    )\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_simple_sims(self, num_threads, windows, mode):\n        ts = msprime.sim_ancestry(\n            15,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=1234,\n        )\n        assert ts.num_trees >= 2\n        ts = msprime.sim_mutations(ts, rate=0.01, random_seed=1234)\n        assert ts.num_mutations > 10\n        self.check(ts, num_threads, windows=windows, mode=mode)\n\n\n# NOTE these are tests that are for more general functionality that might\n# get applied across many different functions, and so probably should be\n# tested in another file. For now they're only used by divmat, so we can\n# keep them here for simplificity.\nclass TestChunkByTree:\n    # These are based on what we get from np.array_split, there's nothing\n    # particularly critical about exactly how we portion things up.\n    @pytest.mark.parametrize(\n        [\"num_chunks\", \"expected\"],\n        [\n            (1, [[0, 26]]),\n            (2, [[0, 13], [13, 26]]),\n            (3, [[0, 9], [9, 18], [18, 26]]),\n            (4, [[0, 7], [7, 14], [14, 20], [20, 26]]),\n            (5, [[0, 6], [6, 11], [11, 16], [16, 21], [21, 26]]),\n        ],\n    )\n    def test_all_trees_ts_26(self, num_chunks, expected):\n        ts = tsutil.all_trees_ts(4)\n        actual = ts._chunk_sequence_by_tree(num_chunks)\n        np.testing.assert_equal(actual, expected)\n\n    @pytest.mark.parametrize(\n        [\"num_chunks\", \"expected\"],\n        [\n            (1, [[0, 4]]),\n            (2, [[0, 2], [2, 4]]),\n            (3, [[0, 2], [2, 3], [3, 4]]),\n            (4, [[0, 1], [1, 2], [2, 3], [3, 4]]),\n            (5, [[0, 1], [1, 2], [2, 3], [3, 4]]),\n            (100, [[0, 1], [1, 2], [2, 3], [3, 4]]),\n        ],\n    )\n    def test_all_trees_ts_4(self, num_chunks, expected):\n        ts = tsutil.all_trees_ts(3)\n        assert ts.num_trees == 4\n        actual = ts._chunk_sequence_by_tree(num_chunks)\n        np.testing.assert_equal(actual, expected)\n\n    @pytest.mark.parametrize(\"span\", [1, 2, 5, 0.3])\n    @pytest.mark.parametrize(\n        [\"num_chunks\", \"expected\"],\n        [\n            (1, [[0, 4]]),\n            (2, [[0, 2], [2, 4]]),\n            (3, [[0, 2], [2, 3], [3, 4]]),\n            (4, [[0, 1], [1, 2], [2, 3], [3, 4]]),\n            (5, [[0, 1], [1, 2], [2, 3], [3, 4]]),\n            (100, [[0, 1], [1, 2], [2, 3], [3, 4]]),\n        ],\n    )\n    def test_all_trees_ts_4_trees_span(self, span, num_chunks, expected):\n        tables = tsutil.all_trees_ts(3).dump_tables()\n        tables.edges.left *= span\n        tables.edges.right *= span\n        tables.sequence_length *= span\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 4\n        actual = ts._chunk_sequence_by_tree(num_chunks)\n        np.testing.assert_equal(actual, np.array(expected) * span)\n\n    @pytest.mark.parametrize(\"num_chunks\", range(1, 5))\n    def test_empty_ts(self, num_chunks):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        chunks = ts._chunk_sequence_by_tree(num_chunks)\n        np.testing.assert_equal(chunks, [[0, 1]])\n\n    @pytest.mark.parametrize(\"num_chunks\", range(1, 5))\n    def test_single_tree(self, num_chunks):\n        L = 10\n        ts = tskit.Tree.generate_balanced(2, span=L).tree_sequence\n        chunks = ts._chunk_sequence_by_tree(num_chunks)\n        np.testing.assert_equal(chunks, [[0, L]])\n\n    @pytest.mark.parametrize(\"num_chunks\", [0, -1, 0.5])\n    def test_bad_chunks(self, num_chunks):\n        ts = tskit.Tree.generate_balanced(2).tree_sequence\n        with pytest.raises(ValueError, match=\"Number of chunks must be an integer > 0\"):\n            ts._chunk_sequence_by_tree(num_chunks)\n\n\nclass TestChunkWindows:\n    # These are based on what we get from np.array_split, there's nothing\n    # particularly critical about exactly how we portion things up.\n    @pytest.mark.parametrize(\n        [\"windows\", \"num_chunks\", \"expected\"],\n        [\n            ([0, 10], 1, [[0, 10]]),\n            ([0, 10], 2, [[0, 10]]),\n            ([0, 5, 10], 2, [[0, 5], [5, 10]]),\n            ([0, 5, 6, 10], 2, [[0, 5, 6], [6, 10]]),\n            ([0, 5, 6, 10], 3, [[0, 5], [5, 6], [6, 10]]),\n        ],\n    )\n    def test_examples(self, windows, num_chunks, expected):\n        actual = tskit.TreeSequence._chunk_windows(windows, num_chunks)\n        np.testing.assert_equal(actual, expected)\n\n    @pytest.mark.parametrize(\"num_chunks\", [0, -1, 0.5])\n    def test_bad_chunks(self, num_chunks):\n        with pytest.raises(ValueError, match=\"Number of chunks must be an integer > 0\"):\n            tskit.TreeSequence._chunk_windows([0, 1], num_chunks)\n\n\nclass TestGroupAlleles:\n    @pytest.mark.parametrize(\n        [\"G\", \"num_alleles\", \"A\", \"offsets\"],\n        [\n            ([0, 1], 2, [0, 1], [0, 1, 2]),\n            ([0, 1], 3, [0, 1], [0, 1, 2, 2]),\n            ([0, 2], 3, [0, 1], [0, 1, 1, 2]),\n            ([1, 0], 2, [1, 0], [0, 1, 2]),\n            ([0, 0, 0, 1, 1, 1], 2, [0, 1, 2, 3, 4, 5], [0, 3, 6]),\n            ([0, 0], 1, [0, 1], [0, 2]),\n            ([2, 2], 3, [0, 1], [0, 0, 0, 2]),\n        ],\n    )\n    def test_examples(self, G, num_alleles, A, offsets):\n        A1, offsets1 = group_alleles(np.array(G), num_alleles)\n        assert list(A) == list(A1)\n        assert list(offsets) == list(offsets1)\n\n    def test_simple_simulation(self):\n        ts = msprime.sim_ancestry(\n            15,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=1234,\n        )\n        ts = msprime.sim_mutations(ts, rate=0.01, random_seed=1234)\n        assert ts.num_mutations > 10\n        for var in ts.variants():\n            A, offsets = group_alleles(var.genotypes, var.num_alleles)\n            allele_samples = [[] for _ in range(var.num_alleles)]\n            for j, a in enumerate(var.genotypes):\n                allele_samples[a].append(j)\n\n            assert len(offsets) == var.num_alleles + 1\n            assert offsets[0] == 0\n            assert offsets[-1] == ts.num_samples\n            assert np.all(np.diff(offsets) >= 0)\n            for j in range(var.num_alleles):\n                a = A[offsets[j] : offsets[j + 1]]\n                assert list(a) == list(allele_samples[j])\n\n\nclass TestSampleSetParsing:\n    @pytest.mark.parametrize(\n        [\"arg\", \"flattened\", \"sizes\"],\n        [\n            ([], [], []),\n            ([1], [1], [1]),\n            ([1, 2], [1, 2], [1, 1]),\n            ([[1, 2], [3, 4]], [1, 2, 3, 4], [2, 2]),\n            (((1, 2), (3, 4)), [1, 2, 3, 4], [2, 2]),\n            (np.array([[1, 2], [3, 4]]), [1, 2, 3, 4], [2, 2]),\n            (np.array([1, 2]), [1, 2], [1, 1]),\n            (np.array([1, 2], dtype=np.uint32), [1, 2], [1, 1]),\n            (array.array(\"i\", [1, 2]), [1, 2], [1, 1]),\n            ([[1, 2], [3], [4]], [1, 2, 3, 4], [2, 1, 1]),\n            ([[1], [2]], [1, 2], [1, 1]),\n            ([[1, 1], [2]], [1, 1, 2], [2, 1]),\n        ],\n    )\n    def test_good_args(self, arg, flattened, sizes):\n        f, s = tskit.TreeSequence._parse_stat_matrix_sample_sets(arg)\n        # print(f, s)\n        assert isinstance(f, np.ndarray)\n        assert f.dtype == np.int32\n        assert isinstance(s, np.ndarray)\n        assert s.dtype == np.uint64\n        np.testing.assert_array_equal(f, flattened)\n        np.testing.assert_array_equal(s, sizes)\n\n    @pytest.mark.parametrize(\n        \"arg\",\n        [\n            [\"0\", \"1\"],\n            [\"0\", 1],\n            [0, \"1\"],\n            [0, {\"a\": \"b\"}],\n        ],\n    )\n    def test_nested_bad_types(self, arg):\n        with pytest.raises(TypeError):\n            tskit.TreeSequence._parse_stat_matrix_sample_sets(arg)\n\n    @pytest.mark.parametrize(\n        \"arg\",\n        [\n            [[0], [[0, 0]]],\n            [[[0, 0]], [0]],\n            np.array([[[0, 0], [0, 0]]]),\n        ],\n    )\n    def test_nested_arrays(self, arg):\n        with pytest.raises(ValueError):\n            tskit.TreeSequence._parse_stat_matrix_sample_sets(arg)\n\n    @pytest.mark.parametrize(\"arg\", [\"\", \"string\", \"1\", \"[1, 2]\", b\"\", \"1234\"])\n    def test_string_args(self, arg):\n        with pytest.raises(TypeError, match=\"ID specification cannot be\"):\n            tskit.TreeSequence._parse_stat_matrix_sample_sets(arg)\n\n    @pytest.mark.parametrize(\n        \"arg\",\n        [\n            {},\n            {\"a\": \"b\"},\n            collections.Counter(),\n        ],\n    )\n    def test_dict_args(self, arg):\n        with pytest.raises(TypeError, match=\"ID specification cannot be\"):\n            tskit.TreeSequence._parse_stat_matrix_sample_sets(arg)\n\n    @pytest.mark.parametrize(\n        \"arg\",\n        [\n            0,\n            {0: 1},\n            None,\n            {\"a\": \"b\"},\n            np.array([1.1]),\n        ],\n    )\n    def test_bad_arg_types(self, arg):\n        with pytest.raises(TypeError):\n            tskit.TreeSequence._parse_stat_matrix_sample_sets(arg)\n\n\nclass TestGeneticRelatednessMatrix:\n    def check(self, ts, mode, *, sample_sets=None, windows=None, span_normalise=True):\n        # These are *only* expected to be the same\n        # under infinite-sites mutations\n        if mode == \"site\" and np.any([len(s.mutations) > 1 for s in ts.sites()]):\n            ts = msprime.sim_mutations(\n                ts,\n                rate=100 / ts.segregating_sites(mode=\"branch\", span_normalise=False),\n                random_seed=123,\n                discrete_genome=False,\n                keep=False,\n            )\n        G1 = stats_api_genetic_relatedness_matrix(\n            ts,\n            mode=mode,\n            sample_sets=sample_sets,\n            windows=windows,\n            span_normalise=span_normalise,\n        )\n        G2 = ts.genetic_relatedness_matrix(\n            mode=mode,\n            sample_sets=sample_sets,\n            windows=windows,\n            span_normalise=span_normalise,\n        )\n        np.testing.assert_array_almost_equal(G1, G2)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        self.check(ts, mode)\n\n    @pytest.mark.parametrize(\"m\", [1, 3])\n    def test_single_tree_mutations_per_branch(self, m):\n        # 3.00┊   6     ┊\n        #     ┊ ┏━┻━┓   ┊\n        # 2.00┊ ┃   5   ┊\n        #     ┊ ┃ ┏━┻┓  ┊\n        # 1.00┊ ┃ ┃  4  ┊\n        #     ┊ ┃ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        # state 2 3 4 4\n        ts = tskit.Tree.generate_comb(4).tree_sequence\n        ts = tsutil.insert_branch_mutations(ts, m, num_states=5)\n        D1 = check_divmat(ts, mode=\"site\")\n        D2 = np.array(\n            [\n                [0.0, 1.0, 1.0, 1.0],\n                [1.0, 0.0, 1.0, 1.0],\n                [1.0, 1.0, 0.0, 0.0],\n                [1.0, 1.0, 0.0, 0.0],\n            ]\n        )\n        np.testing.assert_array_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_sample_sets(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        self.check(ts, mode, sample_sets=[[0, 1], [2, 3]])\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_single_samples(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        self.check(ts, mode, sample_sets=[[0], [1]])\n        self.check(ts, mode, sample_sets=[[0], [2]])\n        self.check(ts, mode, sample_sets=[[0], [1], [2]])\n\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_single_tree_windows(self, mode):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        self.check(ts, mode, windows=[0, 0.5, 1])\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    def test_suite_defaults(self, ts, mode):\n        self.check(ts, mode=mode)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"span_normalise\", [True, False])\n    def test_suite_span_normalise(self, ts, mode, span_normalise):\n        self.check(ts, mode=mode, span_normalise=span_normalise)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"mode\", DIVMAT_MODES)\n    @pytest.mark.parametrize(\"num_sets\", [2])  # [[2, 3, 4, 5])\n    def test_suite_sample_sets(self, ts, mode, num_sets):\n        if ts.num_samples >= num_sets:\n            sample_sets = np.array_split(ts.samples(), num_sets)\n            self.check(ts, sample_sets=sample_sets, mode=mode)\n"
  },
  {
    "path": "python/tests/test_drawing.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (C) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for visualisation in tskit.\n\"\"\"\n\nimport collections\nimport io\nimport logging\nimport math\nimport os\nimport pathlib\nimport platform\nimport re\nimport xml.etree\n\nimport msprime\nimport numpy as np\nimport pytest\nimport xmlunittest\n\nimport tests.test_wright_fisher as wf\nimport tests.tsutil as tsutil\nimport tskit\nfrom tskit import drawing\n\nIS_WINDOWS = platform.system() == \"Windows\"\n\n\nclass TestTreeDraw:\n    \"\"\"\n    Tests for the tree drawing functionality.\n    TODO - the get_XXX_tree() functions should probably be placed in fixtures\n    \"\"\"\n\n    def get_binary_tree(self):\n        ts = msprime.simulate(10, random_seed=1, mutation_rate=1)\n        return next(ts.trees())\n\n    def get_nonbinary_ts(self):\n        tables = wf.wf_sim(\n            8,\n            4,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=2,\n        )\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        return tsutil.jukes_cantor(ts, 10, 0.025, seed=1)\n\n    def get_nonbinary_tree(self):\n        for t in self.get_nonbinary_ts().trees():\n            for u in t.nodes():\n                if len(t.children(u)) > 2:\n                    return t\n        raise AssertionError()\n\n    def get_zero_edge_tree(self):\n        tables = tskit.TableCollection(sequence_length=2)\n        # These must be samples or we will have zero roots.\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(position=0, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"1\")\n        tables.mutations.add_row(site=0, node=1, derived_state=\"1\")\n        return tables.tree_sequence().first()\n\n    def get_zero_roots_tree(self):\n        tables = tskit.TableCollection(sequence_length=2)\n        # If we have no samples we have zero roots\n        tables.nodes.add_row(time=0)\n        tables.nodes.add_row(time=0)\n        tables.nodes.add_row(time=1)\n        tables.edges.add_row(0, 2, 2, 0)\n        tables.edges.add_row(0, 2, 2, 1)\n        tree = tables.tree_sequence().first()\n        assert tree.num_roots == 0\n        return tree\n\n    def get_multiroot_tree(self):\n        ts = msprime.simulate(15, random_seed=1)\n        # Take off the top quarter of edges\n        tables = ts.dump_tables()\n        edges = tables.edges\n        n = len(edges) - len(edges) // 4\n        edges.set_columns(\n            left=edges.left[:n],\n            right=edges.right[:n],\n            parent=edges.parent[:n],\n            child=edges.child[:n],\n        )\n        ts = tables.tree_sequence()\n        for t in ts.trees():\n            if t.num_roots > 1:\n                return t\n        raise AssertionError()\n\n    def get_mutations_over_roots_tree(self):\n        ts = msprime.simulate(15, random_seed=1)\n        ts = ts.decapitate(ts.tables.nodes.time[-1] / 2)\n        tables = ts.dump_tables()\n        delta = 1.0 / (ts.num_nodes + 1)\n        x = 0\n        for node in range(ts.num_nodes):\n            site_id = tables.sites.add_row(x, ancestral_state=\"0\")\n            x += delta\n            tables.mutations.add_row(site_id, node=node, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        tree = ts.first()\n        assert any(tree.parent(mut.node) == tskit.NULL for mut in tree.mutations())\n        return tree\n\n    def get_unary_node_tree(self):\n        ts = msprime.simulate(2, random_seed=1)\n        tables = ts.dump_tables()\n        edges = tables.edges\n        # Take out all the edges except 1\n        n = 1\n        edges.set_columns(\n            left=edges.left[:n],\n            right=edges.right[:n],\n            parent=edges.parent[:n],\n            child=edges.child[:n],\n        )\n        ts = tables.tree_sequence()\n        for t in ts.trees():\n            for u in t.nodes():\n                if len(t.children(u)) == 1:\n                    return t\n        raise AssertionError()\n\n    def get_empty_tree(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        ts = tables.tree_sequence()\n        return next(ts.trees())\n\n    def get_simple_ts(self, use_mutation_times=False):\n        \"\"\"\n        return a simple tree seq that does not depend on msprime\n        \"\"\"\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      individual      time    metadata\n        0       1       0       -1      0\n        1       1       0       -1      0\n        2       1       0       -1      0\n        3       1       0       -1      0\n        4       0       0       -1      0.1145014598813\n        5       0       0       -1      1.11067965364865\n        6       0       0       -1      1.75005250750382\n        7       0       0       -1      5.31067154311640\n        8       0       0       -1      6.57331354884652\n        9       0       0       -1      9.08308317451295\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      1.00000000      4       0\n        1       0.00000000      1.00000000      4       1\n        2       0.00000000      1.00000000      5       2\n        3       0.00000000      1.00000000      5       3\n        4       0.79258618      0.90634460      6       4\n        5       0.79258618      0.90634460      6       5\n        6       0.05975243      0.79258618      7       4\n        7       0.90634460      0.91029435      7       4\n        8       0.05975243      0.79258618      7       5\n        9       0.90634460      0.91029435      7       5\n        10      0.91029435      1.00000000      8       4\n        11      0.91029435      1.00000000      8       5\n        12      0.00000000      0.05975243      9       4\n        13      0.00000000      0.05975243      9       5\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position      ancestral_state\n        0.05          A\n        0.06          0\n        0.3           Empty\n        0.5           XXX\n        0.91          T\n        \"\"\"\n        )\n        muts = io.StringIO(\n            \"\"\"\\\n        site   node    derived_state    parent    time\n        0      9       T                -1        15\n        0      9       G                0         9.1\n        0      5       1                1         9\n        1      4       C                -1        1.6\n        1      4       G                3         1.5\n        2      7       G                -1        10\n        2      3       C                5         1\n        4      3       G                -1        1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sites=sites, mutations=muts, strict=False)\n        if use_mutation_times:\n            return ts\n        tables = ts.dump_tables()\n        tables.mutations.time = np.full_like(tables.mutations.time, tskit.UNKNOWN_TIME)\n        return tables.tree_sequence()\n\n    def get_ts_varying_min_times(self, *args, **kwargs):\n        \"\"\"\n        Like get_simple_ts but return a tree sequence with negative times, and some trees\n        with different min times (i.e. with dangling nonsample nodes at negative times)\n        \"\"\"\n        ts = self.get_simple_ts(*args, **kwargs)\n        tables = ts.dump_tables()\n        time = tables.nodes.time\n        time[time == 0] = 0.1\n        time[3] = -9.99\n        tables.nodes.time = time\n        # set node 3 to be non-sample node lower than the rest\n        flags = tables.nodes.flags\n        flags[3] = 0\n        tables.nodes.flags = flags\n        edges = tables.edges\n        assert edges[3].child == 3 and edges[3].parent == 5\n        edges[3] = edges[3].replace(left=ts.breakpoints(True)[1])\n        tables.sort()\n        tables.nodes.flags = flags\n        return tables.tree_sequence()\n\n    def fail(self, *args, **kwargs):\n        \"\"\"\n        Required for xmlunittest.XmlTestMixin to work with pytest not unittest\n        \"\"\"\n        pytest.fail(*args, **kwargs)  # noqa: PT016\n\n\ndef closest_left_node(tree, u):\n    \"\"\"\n    Returns the node that is closest to u in a left-to-right sense.\n    \"\"\"\n    ret = tskit.NULL\n    while u != tskit.NULL and ret == tskit.NULL:\n        ret = tree.left_sib(u)\n        u = tree.parent(u)\n    return ret\n\n\ndef get_left_neighbour(tree, traversal_order):\n    \"\"\"\n    This is a less efficient version of the get_left_neighbour function in\n    drawing.py.\n    \"\"\"\n    # Note: roots are the children of -1 here.\n    children = collections.defaultdict(list)\n    for u in tree.nodes(order=traversal_order):\n        parent = tree.parent(u)\n        children[parent].append(u)\n\n    left_neighbour = np.full(tree.tree_sequence.num_nodes, tskit.NULL, dtype=int)\n    for u in tree.nodes():\n        next_left = tskit.NULL\n        child = u\n        while child != tskit.NULL and next_left == tskit.NULL:\n            parent = tree.parent(child)\n            child_index = children[parent].index(child)\n            if child_index > 0:\n                next_left = children[parent][child_index - 1]\n            child = parent\n        left_neighbour[u] = next_left\n    return left_neighbour\n\n\nclass TestClosestLeftNode(TestTreeDraw):\n    \"\"\"\n    Tests the code for finding the closest left node in a tree.\n    \"\"\"\n\n    def verify(self, tree):\n        m1 = drawing.get_left_neighbour(tree, \"postorder\")\n        m2 = get_left_neighbour(tree, \"postorder\")\n        np.testing.assert_array_equal(m1, m2)\n        for u in tree.nodes():\n            assert m1[u] == closest_left_node(tree, u)\n\n        m1 = drawing.get_left_neighbour(tree, \"minlex_postorder\")\n        m2 = get_left_neighbour(tree, \"minlex_postorder\")\n        np.testing.assert_array_equal(m1, m2)\n\n    def test_2_binary(self):\n        ts = msprime.simulate(2, random_seed=2)\n        self.verify(ts.first())\n\n    def test_5_binary(self):\n        ts = msprime.simulate(5, random_seed=2)\n        self.verify(ts.first())\n\n    def test_10_binary(self):\n        ts = msprime.simulate(10, random_seed=2)\n        self.verify(ts.first())\n\n    def test_20_binary(self):\n        ts = msprime.simulate(20, random_seed=3)\n        self.verify(ts.first())\n\n    def test_nonbinary(self):\n        self.verify(self.get_nonbinary_tree())\n\n    def test_zero_edge(self):\n        self.verify(self.get_zero_edge_tree())\n\n    def test_zero_roots(self):\n        self.verify(self.get_zero_roots_tree())\n\n    def test_multiroot(self):\n        self.verify(self.get_multiroot_tree())\n\n    def test_left_child(self):\n        t = self.get_nonbinary_tree()\n        left_child = drawing.get_left_child(t, t.postorder())\n        for u in t.nodes(order=\"postorder\"):\n            if t.num_children(u) > 0:\n                assert left_child[u] == t.children(u)[0]\n\n    def test_null_node_left_child(self):\n        t = self.get_nonbinary_tree()\n        arr = list(t.nodes(order=\"minlex_postorder\"))\n        left_child = drawing.get_left_child(t, arr)\n        assert left_child[tskit.NULL] == tskit.NULL\n\n    def test_leaf_node_left_child(self):\n        t = self.get_nonbinary_tree()\n        arr = list(t.nodes(order=\"minlex_postorder\"))\n        left_child = drawing.get_left_child(t, arr)\n        for u in t.samples():\n            assert left_child[u] == tskit.NULL\n\n\nclass TestOrder(TestTreeDraw):\n    \"\"\"\n    Tests for using the different node orderings.\n    \"\"\"\n\n    def test_bad_order(self):\n        for bad_order in [(\"sdf\"), \"sdf\", 1234, \"\"]:\n            with pytest.raises(ValueError):\n                drawing.check_order(bad_order)\n\n    def test_default_order(self):\n        traversal_order = drawing.check_order(None)\n        assert traversal_order == \"minlex_postorder\"\n\n    def test_order_mapping(self):\n        assert drawing.check_order(\"tree\") == \"postorder\"\n        assert drawing.check_order(\"minlex\") == \"minlex_postorder\"\n\n    def test_tree_svg_variants(self):\n        t = self.get_binary_tree()\n        output1 = t.draw(format=\"svg\")\n        output2 = t.draw(format=\"svg\", order=\"minlex\")\n        output3 = t.draw(format=\"svg\", order=\"tree\")\n        # Default is minlex\n        assert output1 == output2\n        # tree is at least different to minlex\n        assert output1 != output3\n        # draw_svg gets the same results\n        assert t.draw_svg() == output1\n        assert t.draw_svg(order=\"minlex\") == output1\n        assert t.draw_svg(order=\"tree\") == output3\n\n    def test_tree_text_variants(self):\n        t = self.get_binary_tree()\n        output1 = t.draw(format=\"unicode\")\n        output2 = t.draw(format=\"unicode\", order=\"minlex\")\n        output3 = t.draw(format=\"unicode\", order=\"tree\")\n        # Default is minlex\n        assert output1 == output2\n        # tree is at least different to minlex\n        assert output1 != output3\n        # draw_text gets the same results\n        assert t.draw_text() == output1\n        assert t.draw_text(order=\"minlex\") == output1\n        assert t.draw_text(order=\"tree\") == output3\n\n    def test_tree_sequence_text_variants(self):\n        ts = msprime.simulate(10, random_seed=2)\n        output1 = ts.draw_text()\n        output2 = ts.draw_text(order=\"minlex\")\n        output3 = ts.draw_text(order=\"tree\")\n\n        # Default is minlex\n        assert output1 == output2\n        # tree is at least different to minlex\n        assert output1 != output3\n\n    def test_tree_sequence_svg_variants(self):\n        ts = msprime.simulate(10, random_seed=2)\n        output1 = ts.draw_svg()\n        output2 = ts.draw_svg(order=\"minlex\")\n        output3 = ts.draw_svg(order=\"tree\")\n\n        # Default is minlex\n        assert output1 == output2\n        # tree is at least different to minlex\n        assert output1 != output3\n\n\nclass TestFormats(TestTreeDraw):\n    \"\"\"\n    Tests that formats are recognised correctly.\n    \"\"\"\n\n    def test_svg_variants(self):\n        t = self.get_binary_tree()\n        for svg in [\"svg\", \"SVG\", \"sVg\"]:\n            output = t.draw(format=svg)\n            root = xml.etree.ElementTree.fromstring(output)\n            assert root.tag == \"{http://www.w3.org/2000/svg}svg\"\n\n    def test_default(self):\n        # Default is SVG\n        t = self.get_binary_tree()\n        output = t.draw(format=None)\n        root = xml.etree.ElementTree.fromstring(output)\n        assert root.tag == \"{http://www.w3.org/2000/svg}svg\"\n        output = t.draw()\n        root = xml.etree.ElementTree.fromstring(output)\n        assert root.tag == \"{http://www.w3.org/2000/svg}svg\"\n\n    def test_ascii_variants(self):\n        t = self.get_binary_tree()\n        for fmt in [\"ascii\", \"ASCII\", \"AScii\"]:\n            output = t.draw(format=fmt)\n            with pytest.raises(xml.etree.ElementTree.ParseError):\n                xml.etree.ElementTree.fromstring(\n                    output,\n                )\n\n    def test_unicode_variants(self):\n        t = self.get_binary_tree()\n        for fmt in [\"unicode\", \"UNICODE\", \"uniCODE\"]:\n            output = t.draw(format=fmt)\n            with pytest.raises(xml.etree.ElementTree.ParseError):\n                xml.etree.ElementTree.fromstring(\n                    output,\n                )\n\n    def test_bad_formats(self):\n        t = self.get_binary_tree()\n        for bad_format in [\"\", \"ASC\", \"SV\", \"jpeg\"]:\n            with pytest.raises(ValueError):\n                t.draw(format=bad_format)\n\n\nclass TestDrawText(TestTreeDraw):\n    \"\"\"\n    Tests the ASCII tree drawing method.\n    \"\"\"\n\n    drawing_format = \"ascii\"\n    example_label = \"XXX\"\n\n    def verify_basic_text(self, text):\n        assert isinstance(text, str)\n        # TODO surely something else we can verify about this...\n\n    def test_draw_defaults(self):\n        t = self.get_binary_tree()\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_draw_nonbinary(self):\n        t = self.get_nonbinary_tree()\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_draw_multiroot(self):\n        t = self.get_multiroot_tree()\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_draw_mutations_over_roots(self):\n        t = self.get_mutations_over_roots_tree()\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_draw_unary(self):\n        t = self.get_unary_node_tree()\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_draw_empty_tree(self):\n        t = self.get_empty_tree()\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format)\n\n    def test_draw_zero_roots_tree(self):\n        t = self.get_zero_roots_tree()\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format)\n\n    def test_draw_zero_edge_tree(self):\n        t = self.get_zero_edge_tree()\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_even_num_children_tree(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           1\n        2   1           2\n        3   1           1\n        4   1           4\n        5   1           5\n        6   1           7\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       6       0\n        0       1       6       1\n        0       1       6       2\n        0       1       6       3\n        0       1       6       4\n        0       1       6       5\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_odd_num_children_tree(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           1\n        2   1           2\n        3   1           1\n        4   1           4\n        5   1           5\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       5       0\n        0       1       5       1\n        0       1       5       2\n        0       1       5       3\n        0       1       5       4\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        text = t.draw(format=self.drawing_format)\n        self.verify_basic_text(text)\n\n    def test_node_labels(self):\n        t = self.get_binary_tree()\n        labels = {u: self.example_label for u in t.nodes()}\n        text = t.draw(format=self.drawing_format, node_labels=labels)\n        self.verify_basic_text(text)\n        j = 0\n        for _ in t.nodes():\n            j = text[j:].find(self.example_label)\n            assert j != -1\n\n    def test_long_internal_labels(self):\n        t = self.get_binary_tree()\n        labels = {u: \"X\" * 10 for u in t.nodes() if t.is_internal(u)}\n        text = t.draw(format=self.drawing_format, node_labels=labels)\n        self.verify_basic_text(text)\n\n    def test_no_node_labels(self):\n        t = self.get_binary_tree()\n        labels = {}\n        text = t.draw(format=self.drawing_format, node_labels=labels)\n        self.verify_basic_text(text)\n        for u in t.nodes():\n            assert text.find(str(u)) == -1\n\n    def test_unused_args(self):\n        t = self.get_binary_tree()\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, width=300)\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, height=300)\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, mutation_labels={})\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, mutation_colours={})\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, edge_colours={})\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, node_colours={})\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, max_time=1234)\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, min_time=1234)\n        with pytest.raises(ValueError):\n            with pytest.warns(FutureWarning):\n                t.draw(format=self.drawing_format, max_tree_height=1234)\n        with pytest.raises(ValueError):\n            t.draw(format=self.drawing_format, time_scale=\"time\")\n        with pytest.raises(ValueError):\n            with pytest.warns(FutureWarning):\n                t.draw(format=self.drawing_format, tree_height_scale=\"time\")\n\n\nclass TestDrawUnicode(TestDrawText):\n    \"\"\"\n    Tests the Unicode tree drawing method\n    \"\"\"\n\n    drawing_format = \"unicode\"\n    example_label = \"\\u20ac\" * 10  # euro symbol\n\n\nclass TestDrawTextErrors:\n    \"\"\"\n    Tests for errors occuring in tree drawing code.\n    \"\"\"\n\n    def test_bad_orientation(self):\n        t = msprime.simulate(5, mutation_rate=0.1, random_seed=2).first()\n        for bad_orientation in [\"\", \"leftright\", \"sdf\"]:\n            with pytest.raises(ValueError):\n                t.draw_text(orientation=bad_orientation)\n\n\nclass TestDrawTextExamples(TestTreeDraw):\n    \"\"\"\n    Verify that we get the correct rendering for some examples.\n    \"\"\"\n\n    def verify_text_rendering(self, drawn, drawn_tree, debug=False):\n        if debug:\n            print(\"Drawn:\")\n            print(drawn)\n            print(\"Expected:\")\n            print(drawn_tree)\n        tree_lines = drawn_tree.splitlines()\n        drawn_lines = drawn.splitlines()\n        assert len(tree_lines) == len(drawn_lines)\n        for l1, l2 in zip(tree_lines, drawn_lines):\n            # Trailing white space isn't significant.\n            assert l1.rstrip() == l2.rstrip()\n\n    def test_simple_tree(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       2       1\n        \"\"\"\n        )\n        # fmt: off\n        tree = (\n            \" 2 \\n\"\n            \"┏┻┓\\n\"\n            \"0 1\"\n        )\n        # fmt: on\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        drawn = t.draw_text()\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \" 2 \\n\"\n            \"+++\\n\"\n            \"0 1\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(use_ascii=True, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \" ┏0\\n\"\n            \"2┫  \\n\"\n            \" ┗1\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"left\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        # fmt: off\n        tree = (\n            \" +0\\n\"\n            \"2+  \\n\"\n            \" +1\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"left\", use_ascii=True, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n    def test_simple_tree_long_label(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       2       1\n        \"\"\"\n        )\n        # fmt: off\n        tree = (\n            \"ABCDEF\\n\"\n            \"┏┻┓   \\n\"\n            \"0 1   \\n\"\n        )\n        # fmt: on\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        drawn = t.draw_text(node_labels={0: \"0\", 1: \"1\", 2: \"ABCDEF\"}, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \"0┓      \\n\"\n            \" ┣ABCDEF\\n\"\n            \"1┛      \\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(\n            node_labels={0: \"0\", 1: \"1\", 2: \"ABCDEF\"}, orientation=\"right\", order=\"tree\"\n        )\n        self.verify_text_rendering(drawn, tree)\n\n        drawn = t.draw_text(\n            node_labels={0: \"ABCDEF\", 1: \"1\", 2: \"2\"}, orientation=\"right\", order=\"tree\"\n        )\n        # fmt: off\n        tree = (\n            \"ABCDEF┓ \\n\"\n            \"      ┣2\\n\"\n            \"1━━━━━┛ \\n\"\n        )\n        # fmt: on\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \"      ┏0\\n\"\n            \"ABCDEF┫ \\n\"\n            \"      ┗1\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(\n            node_labels={0: \"0\", 1: \"1\", 2: \"ABCDEF\"}, orientation=\"left\", order=\"tree\"\n        )\n        self.verify_text_rendering(drawn, tree)\n\n    def test_four_leaves(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      individual      time    metadata\n        0       1       0       -1      0.00000000000000\n        1       1       0       -1      0.00000000000000\n        2       1       0       -1      0.00000000000000\n        3       1       0       -1      0.00000000000000\n        4       0       0       -1      0.26676079696421\n        5       0       0       -1      1.48826948286480\n        6       0       0       -1      2.91835007758007\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left            right           parent  child\n        0.00000000      1.00000000      4       0\n        0.00000000      1.00000000      4       3\n        0.00000000      1.00000000      5       2\n        0.00000000      1.00000000      5       4\n        0.00000000      1.00000000      6       1\n        0.00000000      1.00000000      6       5\n        \"\"\"\n        )\n        # fmt: off\n        tree = (\n            \"  6     \\n\"\n            \"┏━┻━┓   \\n\"\n            \"┃   5   \\n\"\n            \"┃ ┏━┻┓  \\n\"\n            \"┃ ┃  4  \\n\"\n            \"┃ ┃ ┏┻┓ \\n\"\n            \"1 2 0 3 \\n\"\n        )\n        # fmt: on\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = ts.first()\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(order=\"tree\"), tree)\n\n        drawn = t.draw_text(orientation=\"bottom\", order=\"tree\")\n        # fmt: off\n        tree = (\n            \"1 2 0 3\\n\"\n            \"┃ ┃ ┗┳┛\\n\"\n            \"┃ ┃  4 \\n\"\n            \"┃ ┗━┳┛ \\n\"\n            \"┃   5  \\n\"\n            \"┗━┳━┛  \\n\"\n            \"  6    \\n\"\n        )\n        # fmt: on\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \" ┏━━━━1\\n\"\n            \" ┃     \\n\"\n            \"6┫ ┏━━2\\n\"\n            \" ┃ ┃   \\n\"\n            \" ┗5┫ ┏0\\n\"\n            \"   ┗4┫  \\n\"\n            \"     ┗3\\n\"\n        )\n        # fmt: on\n        self.verify_text_rendering(t.draw_text(orientation=\"left\", order=\"tree\"), tree)\n\n        # fmt: off\n        tree = (\n            \"2.92┊   6     ┊\\n\"\n            \"    ┊ ┏━┻━┓   ┊\\n\"\n            \"1.49┊ ┃   5   ┊\\n\"\n            \"    ┊ ┃ ┏━┻┓  ┊\\n\"\n            \"0.27┊ ┃ ┃  4  ┊\\n\"\n            \"    ┊ ┃ ┃ ┏┻┓ ┊\\n\"\n            \"0.00┊ 1 2 0 3 ┊\\n\"\n            \"    0         1\\n\"\n        )\n        # fmt: on\n        self.verify_text_rendering(ts.draw_text(order=\"tree\"), tree)\n\n        # fmt: off\n        tree = (\n            \"  6    \\n\"\n            \"+-+-+  \\n\"\n            \"|   5  \\n\"\n            \"| +-++ \\n\"\n            \"| |  4 \\n\"\n            \"| | +++\\n\"\n            \"1 2 0 3\\n\"\n        )\n        # fmt: on\n        drawn = t.draw(format=\"ascii\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \"  6     \\n\"\n            \"┏━┻━┓   \\n\"\n            \"┃xxxxxxxxxx\\n\"\n            \"┃ ┏━┻┓  \\n\"\n            \"┃ ┃  4  \\n\"\n            \"┃ ┃ ┏┻┓ \\n\"\n            \"1 2 0 3 \\n\"\n        )\n        # fmt: on\n        labels = {u: str(u) for u in t.nodes()}\n        labels[5] = \"xxxxxxxxxx\"\n        drawn = t.draw_text(node_labels=labels, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \" ┏━━━━━━━━━━━━━1\\n\"\n            \" ┃              \\n\"\n            \"6┫          ┏━━2\\n\"\n            \" ┃          ┃   \\n\"\n            \" ┗xxxxxxxxxx┫ ┏0\\n\"\n            \"            ┗4┫ \\n\"\n            \"              ┗3\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(node_labels=labels, orientation=\"left\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \"2.92┊   6         ┊\\n\"\n            \"    ┊ ┏━┻━┓       ┊\\n\"\n            \"1.49┊ ┃xxxxxxxxxx ┊\\n\"\n            \"    ┊ ┃ ┏━┻┓      ┊\\n\"\n            \"0.27┊ ┃ ┃  4      ┊\\n\"\n            \"    ┊ ┃ ┃ ┏┻┓     ┊\\n\"\n            \"0.00┊ 1 2 0 3     ┊\\n\"\n            \"    0             1\\n\"\n        )\n        # fmt: on\n        drawn = ts.draw_text(node_labels=labels, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n\n    def test_trident_tree(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   1           0\n        3   1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       3       0\n        0       1       3       1\n        0       1       3       2\n        \"\"\"\n        )\n        # fmt: off\n        tree = (\n            \"  3  \\n\"\n            \"┏━╋━┓\\n\"\n            \"0 1 2\\n\"\n        )\n        # fmt: on\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(), tree)\n\n        # fmt: off\n        tree = (\n            \" ┏0\\n\"\n            \" ┃\\n\"\n            \"3╋1\\n\"\n            \" ┃\\n\"\n            \" ┗2\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"left\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \"0┓\\n\"\n            \" ┃\\n\"\n            \"1╋3\\n\"\n            \" ┃\\n\"\n            \"2┛\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"right\")\n        self.verify_text_rendering(drawn, tree)\n\n    def test_pitchfork_tree(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   1           0\n        3   1           0\n        4   1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       4       0\n        0       1       4       1\n        0       1       4       2\n        0       1       4       3\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        # fmt: off\n        tree = (\n            \"   4   \\n\"\n            \"┏━┳┻┳━┓\\n\"\n            \"0 1 2 3\\n\"\n        )\n        # fmt: on\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(), tree)\n\n        # No labels\n        # fmt: off\n        tree = (\n            \"   ┃   \\n\"\n            \"┏━┳┻┳━┓\\n\"\n            \"┃ ┃ ┃ ┃\\n\"\n        )\n        # fmt: on\n        drawn = t.draw(format=\"unicode\", node_labels={}, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(node_labels={}), tree)\n        # Some labels\n        # fmt: off\n        tree = (\n            \"   ┃   \\n\"\n            \"┏━┳┻┳━┓\\n\"\n            \"0 ┃ ┃ 3\\n\"\n        )\n        # fmt: on\n        labels = {0: \"0\", 3: \"3\"}\n        drawn = t.draw(format=\"unicode\", node_labels=labels, order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(node_labels=labels), tree)\n\n        # fmt: off\n        tree = (\n            \" ┏0\\n\"\n            \" ┃\\n\"\n            \" ┣1\\n\"\n            \"4┫\\n\"\n            \" ┣2\\n\"\n            \" ┃\\n\"\n            \" ┗3\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"left\")\n        self.verify_text_rendering(drawn, tree)\n\n        # fmt: off\n        tree = (\n            \"0┓\\n\"\n            \" ┃\\n\"\n            \"1┫\\n\"\n            \" ┣4\\n\"\n            \"2┫\\n\"\n            \" ┃\\n\"\n            \"3┛\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"right\")\n        self.verify_text_rendering(drawn, tree)\n\n    def test_stick_tree(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           1\n        2   1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        0       1       2       1\n        \"\"\"\n        )\n        # fmt: off\n        tree = (\n            \"2\\n\"\n            \"┃\\n\"\n            \"1\\n\"\n            \"┃\\n\"\n            \"0\\n\"\n        )\n        # fmt: on\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(), tree)\n\n        # fmt: off\n        tree = (\n            \"0\\n\"\n            \"┃\\n\"\n            \"1\\n\"\n            \"┃\\n\"\n            \"2\\n\"\n        )\n        # fmt: on\n        drawn = t.draw_text(orientation=\"bottom\")\n        self.verify_text_rendering(drawn, tree)\n\n        tree = \"2━1━0\\n\"\n        drawn = t.draw_text(orientation=\"left\")\n        self.verify_text_rendering(drawn, tree)\n\n        tree = \"0━1━2\\n\"\n        drawn = t.draw_text(orientation=\"right\")\n        self.verify_text_rendering(drawn, tree)\n\n    def test_draw_forky_tree(self):\n        tree = (\n            \"      14            \\n\"\n            \"  ┏━━━━┻━━━━┓       \\n\"\n            \"  ┃        13       \\n\"\n            \"  ┃   ┏━┳━┳━╋━┳━━┓  \\n\"\n            \"  ┃   ┃ ┃ ┃ ┃ ┃ 12  \\n\"\n            \"  ┃   ┃ ┃ ┃ ┃ ┃ ┏┻┓ \\n\"\n            \" 11   ┃ ┃ ┃ ┃ ┃ ┃ ┃ \\n\"\n            \"┏━┻┓  ┃ ┃ ┃ ┃ ┃ ┃ ┃ \\n\"\n            \"┃ 10  ┃ ┃ ┃ ┃ ┃ ┃ ┃ \\n\"\n            \"┃ ┏┻┓ ┃ ┃ ┃ ┃ ┃ ┃ ┃ \\n\"\n            \"8 0 3 2 4 5 6 9 1 7 \\n\"\n        )\n\n        nodes = io.StringIO(\n            \"\"\"\\\n            id      is_sample   population      individual      time    metadata\n            0       1       0       -1      0.00000000000000\n            1       1       0       -1      0.00000000000000\n            2       1       0       -1      0.00000000000000\n            3       1       0       -1      0.00000000000000\n            4       1       0       -1      0.00000000000000\n            5       1       0       -1      0.00000000000000\n            6       1       0       -1      0.00000000000000\n            7       1       0       -1      0.00000000000000\n            8       1       0       -1      0.00000000000000\n            9       1       0       -1      0.00000000000000\n            10      0       0       -1      0.02398248117831\n            11      0       0       -1      0.17378680550869\n            12      0       0       -1      0.19950200178411\n            13      0       0       -1      0.20000000000000\n            14      0       0       -1      5.68339203134457\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n            left            right           parent  child\n            0.00000000      1.00000000      10      0\n            0.00000000      1.00000000      10      3\n            0.00000000      1.00000000      11      8\n            0.00000000      1.00000000      11      10\n            0.00000000      1.00000000      12      1\n            0.00000000      1.00000000      12      7\n            0.00000000      1.00000000      13      2\n            0.00000000      1.00000000      13      4\n            0.00000000      1.00000000      13      5\n            0.00000000      1.00000000      13      6\n            0.00000000      1.00000000      13      9\n            0.00000000      1.00000000      13      12\n            0.00000000      1.00000000      14      11\n            0.00000000      1.00000000      14      13\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(order=\"tree\"), tree)\n\n        tree = (\n            \"        14              \\n\"\n            \"  ┏━━━━━━┻━━━━━━┓       \\n\"\n            \"  ┃            13       \\n\"\n            \"  ┃        ┏━┳━┳┻┳━┳━━┓ \\n\"\n            \"  ┃        ┃ ┃ ┃ ┃ ┃ 12 \\n\"\n            \"  ┃        ┃ ┃ ┃ ┃ ┃ ┏┻┓\\n\"\n            \"x11xxxxxxx ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┏━┻┓       ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┃ 10       ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┃ ┏┻┓      ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"8 0 3      2 4 5 6 9 1 7\\n\"\n        )\n        labels = {u: str(u) for u in t.nodes()}\n        labels[11] = \"x11xxxxxxx\"\n        self.verify_text_rendering(t.draw_text(node_labels=labels, order=\"tree\"), tree)\n\n        tree = (\n            \"      14           \\n\"\n            \"  ┏━━━━┻━━━━┓      \\n\"\n            \"  ┃        13      \\n\"\n            \"  ┃    ┏━━┳━╋━┳━┳━┓\\n\"\n            \"  ┃   12  ┃ ┃ ┃ ┃ ┃\\n\"\n            \"  ┃   ┏┻┓ ┃ ┃ ┃ ┃ ┃\\n\"\n            \" 11   ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \" ┏┻━┓ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"10  ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┏┻┓ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"0 3 8 1 7 2 4 5 6 9\\n\"\n        )\n        self.verify_text_rendering(t.draw_text(order=\"minlex\"), tree)\n\n    def test_draw_multiroot_forky_tree(self):\n        tree = (\n            \"           13      \\n\"\n            \"      ┏━┳━┳━╋━┳━━┓ \\n\"\n            \"      ┃ ┃ ┃ ┃ ┃ 12 \\n\"\n            \"      ┃ ┃ ┃ ┃ ┃ ┏┻┓\\n\"\n            \" 11   ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┏━┻┓  ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┃ 10  ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┃ ┏┻┓ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"8 0 3 2 4 5 6 9 1 7\\n\"\n        )\n        nodes = io.StringIO(\n            \"\"\"\\\n            id      is_sample   population      individual      time    metadata\n            0       1       0       -1      0.00000000000000\n            1       1       0       -1      0.00000000000000\n            2       1       0       -1      0.00000000000000\n            3       1       0       -1      0.00000000000000\n            4       1       0       -1      0.00000000000000\n            5       1       0       -1      0.00000000000000\n            6       1       0       -1      0.00000000000000\n            7       1       0       -1      0.00000000000000\n            8       1       0       -1      0.00000000000000\n            9       1       0       -1      0.00000000000000\n            10      0       0       -1      0.02398248117831\n            11      0       0       -1      0.17378680550869\n            12      0       0       -1      0.19950200178411\n            13      0       0       -1      0.20000000000000\n            14      0       0       -1      5.68339203134457\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n            left            right           parent  child\n            0.00000000      1.00000000      10      0\n            0.00000000      1.00000000      10      3\n            0.00000000      1.00000000      11      8\n            0.00000000      1.00000000      11      10\n            0.00000000      1.00000000      12      1\n            0.00000000      1.00000000      12      7\n            0.00000000      1.00000000      13      2\n            0.00000000      1.00000000      13      4\n            0.00000000      1.00000000      13      5\n            0.00000000      1.00000000      13      6\n            0.00000000      1.00000000      13      9\n            0.00000000      1.00000000      13      12\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        t = next(ts.trees())\n        drawn = t.draw(format=\"unicode\", order=\"tree\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(order=\"tree\"), tree)\n\n        tree = (\n            \"           13      \\n\"\n            \"       ┏━━┳━╋━┳━┳━┓\\n\"\n            \"      12  ┃ ┃ ┃ ┃ ┃\\n\"\n            \"      ┏┻┓ ┃ ┃ ┃ ┃ ┃\\n\"\n            \" 11   ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \" ┏┻━┓ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"10  ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"┏┻┓ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃\\n\"\n            \"0 3 8 1 7 2 4 5 6 9\\n\"\n        )\n        drawn = t.draw(format=\"unicode\")\n        self.verify_text_rendering(drawn, tree)\n        self.verify_text_rendering(t.draw_text(), tree)\n        self.verify_text_rendering(t.draw_text(order=\"minlex\"), tree)\n\n    def test_simple_tree_sequence(self):\n        ts = self.get_simple_ts()\n        ts_drawing = (\n            \"9.08┊    9    ┊         ┊         ┊         ┊         ┊\\n\"\n            \"    ┊  ┏━┻━┓  ┊         ┊         ┊         ┊         ┊\\n\"\n            \"6.57┊  ┃   ┃  ┊         ┊         ┊         ┊    8    ┊\\n\"\n            \"    ┊  ┃   ┃  ┊         ┊         ┊         ┊  ┏━┻━┓  ┊\\n\"\n            \"5.31┊  ┃   ┃  ┊    7    ┊         ┊    7    ┊  ┃   ┃  ┊\\n\"\n            \"    ┊  ┃   ┃  ┊  ┏━┻━┓  ┊         ┊  ┏━┻━┓  ┊  ┃   ┃  ┊\\n\"\n            \"1.75┊  ┃   ┃  ┊  ┃   ┃  ┊    6    ┊  ┃   ┃  ┊  ┃   ┃  ┊\\n\"\n            \"    ┊  ┃   ┃  ┊  ┃   ┃  ┊  ┏━┻━┓  ┊  ┃   ┃  ┊  ┃   ┃  ┊\\n\"\n            \"1.11┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊\\n\"\n            \"    ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊\\n\"\n            \"0.11┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊\\n\"\n            \"    ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊\\n\"\n            \"0.00┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\\n\"\n            \"  0.00      0.06      0.79      0.91      0.91      1.00\\n\"\n        )\n        self.verify_text_rendering(ts.draw_text(), ts_drawing)\n\n        ts_drawing = (\n            \"9.08|    9    |         |         |         |         |\\n\"\n            \"    |  +-+-+  |         |         |         |         |\\n\"\n            \"6.57|  |   |  |         |         |         |    8    |\\n\"\n            \"    |  |   |  |         |         |         |  +-+-+  |\\n\"\n            \"5.31|  |   |  |    7    |         |    7    |  |   |  |\\n\"\n            \"    |  |   |  |  +-+-+  |         |  +-+-+  |  |   |  |\\n\"\n            \"1.75|  |   |  |  |   |  |    6    |  |   |  |  |   |  |\\n\"\n            \"    |  |   |  |  |   |  |  +-+-+  |  |   |  |  |   |  |\\n\"\n            \"1.11|  |   5  |  |   5  |  |   5  |  |   5  |  |   5  |\\n\"\n            \"    |  |  +++ |  |  +++ |  |  +++ |  |  +++ |  |  +++ |\\n\"\n            \"0.11|  4  | | |  4  | | |  4  | | |  4  | | |  4  | | |\\n\"\n            \"    | +++ | | | +++ | | | +++ | | | +++ | | | +++ | | |\\n\"\n            \"0.00| 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 |\\n\"\n            \"  0.00      0.06      0.79      0.91      0.91      1.00\\n\"\n        )\n        self.verify_text_rendering(ts.draw_text(use_ascii=True), ts_drawing)\n\n        ts_drawing = (\n            \"┊    9    ┊         ┊         ┊         ┊         ┊\\n\"\n            \"┊  ┏━┻━┓  ┊         ┊         ┊         ┊         ┊\\n\"\n            \"┊  ┃   ┃  ┊         ┊         ┊         ┊    8    ┊\\n\"\n            \"┊  ┃   ┃  ┊         ┊         ┊         ┊  ┏━┻━┓  ┊\\n\"\n            \"┊  ┃   ┃  ┊    7    ┊         ┊    7    ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   ┃  ┊  ┏━┻━┓  ┊         ┊  ┏━┻━┓  ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   ┃  ┊  ┃   ┃  ┊    6    ┊  ┃   ┃  ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   ┃  ┊  ┃   ┃  ┊  ┏━┻━┓  ┊  ┃   ┃  ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊\\n\"\n            \"┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊\\n\"\n            \"┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊\\n\"\n            \"┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊\\n\"\n            \"┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\\n\"\n            \"0.00    0.06      0.79      0.91      0.91      1.00\\n\"\n        )\n        self.verify_text_rendering(ts.draw_text(time_label_format=\"\"), ts_drawing)\n\n        ts_drawing = (\n            \"┊    9    ┊         ┊         ┊         ┊         ┊\\n\"\n            \"┊  ┏━┻━┓  ┊         ┊         ┊         ┊         ┊\\n\"\n            \"┊  ┃   ┃  ┊         ┊         ┊         ┊    8    ┊\\n\"\n            \"┊  ┃   ┃  ┊         ┊         ┊         ┊  ┏━┻━┓  ┊\\n\"\n            \"┊  ┃   ┃  ┊    7    ┊         ┊    7    ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   ┃  ┊  ┏━┻━┓  ┊         ┊  ┏━┻━┓  ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   ┃  ┊  ┃   ┃  ┊    6    ┊  ┃   ┃  ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   ┃  ┊  ┃   ┃  ┊  ┏━┻━┓  ┊  ┃   ┃  ┊  ┃   ┃  ┊\\n\"\n            \"┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊\\n\"\n            \"┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊\\n\"\n            \"┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊\\n\"\n            \"┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊\\n\"\n            \"┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\\n\"\n            \"┊         ┊         ┊         ┊         ┊         ┊\\n\"\n        )\n        self.verify_text_rendering(\n            ts.draw_text(time_label_format=\"\", position_label_format=\"\"), ts_drawing\n        )\n\n    def test_tree_sequence_non_minlex(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n            id      is_sample       time    population      individual      metadata\n            0       1       0.000000        0       -1\n            1       1       0.000000        0       -1\n            2       1       0.000000        0       -1\n            3       1       0.000000        0       -1\n            4       1       0.000000        0       -1\n            5       0       1.174545        0       -1\n            6       0       1.207717        0       -1\n            7       0       1.276422        0       -1\n            8       0       1.613390        0       -1\n            9       0       2.700069        0       -1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n            left    right   parent  child\n            0.000000        1.000000        5       0\n            0.000000        1.000000        5       1\n            0.000000        0.209330        6       4\n            0.000000        0.209330        6       5\n            0.000000        1.000000        7       2\n            0.209330        1.000000        7       5\n            0.000000        0.209330        7       6\n            0.209330        1.000000        8       3\n            0.209330        1.000000        8       4\n            0.000000        0.209330        9       3\n            0.000000        1.000000        9       7\n            0.209330        1.000000        9       8\n        \"\"\"\n        )\n\n        ts = tskit.load_text(nodes, edges, strict=False)\n\n        drawn_minlex = (\n            \"2.70┊       9   ┊     9     ┊\\n\"\n            \"    ┊     ┏━┻━┓ ┊   ┏━┻━━┓  ┊\\n\"\n            \"1.61┊     ┃   ┃ ┊   ┃    8  ┊\\n\"\n            \"    ┊     ┃   ┃ ┊   ┃   ┏┻┓ ┊\\n\"\n            \"1.28┊     7   ┃ ┊   7   ┃ ┃ ┊\\n\"\n            \"    ┊   ┏━┻━┓ ┃ ┊  ┏┻━┓ ┃ ┃ ┊\\n\"\n            \"1.21┊   6   ┃ ┃ ┊  ┃  ┃ ┃ ┃ ┊\\n\"\n            \"    ┊  ┏┻━┓ ┃ ┃ ┊  ┃  ┃ ┃ ┃ ┊\\n\"\n            \"1.17┊  5  ┃ ┃ ┃ ┊  5  ┃ ┃ ┃ ┊\\n\"\n            \"    ┊ ┏┻┓ ┃ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┃ ┊\\n\"\n            \"0.00┊ 0 1 4 2 3 ┊ 0 1 2 3 4 ┊\\n\"\n            \"  0.00        0.21        1.00\\n\"\n        )\n        self.verify_text_rendering(ts.draw_text(order=\"minlex\"), drawn_minlex)\n        self.verify_text_rendering(ts.draw_text(), drawn_minlex)\n\n        drawn_tree = (\n            \"2.70┊   9       ┊     9     ┊\\n\"\n            \"    ┊ ┏━┻━┓     ┊   ┏━┻━━┓  ┊\\n\"\n            \"1.61┊ ┃   ┃     ┊   ┃    8  ┊\\n\"\n            \"    ┊ ┃   ┃     ┊   ┃   ┏┻┓ ┊\\n\"\n            \"1.28┊ ┃   7     ┊   7   ┃ ┃ ┊\\n\"\n            \"    ┊ ┃ ┏━┻━┓   ┊ ┏━┻┓  ┃ ┃ ┊\\n\"\n            \"1.21┊ ┃ ┃   6   ┊ ┃  ┃  ┃ ┃ ┊\\n\"\n            \"    ┊ ┃ ┃ ┏━┻┓  ┊ ┃  ┃  ┃ ┃ ┊\\n\"\n            \"1.17┊ ┃ ┃ ┃  5  ┊ ┃  5  ┃ ┃ ┊\\n\"\n            \"    ┊ ┃ ┃ ┃ ┏┻┓ ┊ ┃ ┏┻┓ ┃ ┃ ┊\\n\"\n            \"0.00┊ 3 2 4 0 1 ┊ 2 0 1 3 4 ┊\\n\"\n            \"  0.00        0.21        1.00\\n\"\n        )\n        self.verify_text_rendering(ts.draw_text(order=\"tree\"), drawn_tree)\n\n    def test_max_time(self):\n        ts = self.get_simple_ts()\n        tree = (\n            \"   9   \\n\"\n            \" ┏━┻━┓ \\n\"\n            \" ┃   ┃ \\n\"\n            \" ┃   ┃ \\n\"\n            \" ┃   ┃ \\n\"\n            \" ┃   ┃ \\n\"\n            \" ┃   ┃ \\n\"\n            \" ┃   ┃ \\n\"\n            \" ┃   5 \\n\"\n            \" ┃  ┏┻┓\\n\"\n            \" 4  ┃ ┃\\n\"\n            \"┏┻┓ ┃ ┃\\n\"\n            \"0 1 2 3\\n\"\n        )\n        t = ts.first()\n        self.verify_text_rendering(t.draw_text(max_time=\"ts\"), tree)\n\n        # fmt: off\n        tree = (\n            \"   9   \\n\"\n            \" ┏━┻━┓ \\n\"\n            \" ┃   5 \\n\"\n            \" ┃  ┏┻┓\\n\"\n            \" 4  ┃ ┃\\n\"\n            \"┏┻┓ ┃ ┃\\n\"\n            \"0 1 2 3\\n\"\n        )\n        # fmt: on\n        t = ts.first()\n        self.verify_text_rendering(t.draw_text(max_time=\"tree\"), tree)\n        for bad_max_time in [1, \"sdfr\", \"\"]:\n            with pytest.raises(ValueError):\n                t.draw_text(max_time=bad_max_time)\n\n    def test_no_repr_svg(self):\n        tree = self.get_simple_ts().first()\n        output = tree.draw(format=\"unicode\")\n        with pytest.raises(AttributeError, match=\"no attribute\"):\n            output._repr_svg_()\n\n\nclass TestDrawSvgBase(TestTreeDraw, xmlunittest.XmlTestMixin):\n    \"\"\"\n    Base class for testing the SVG tree drawing method\n    \"\"\"\n\n    def verify_basic_svg(self, svg, width=200, height=200, num_trees=1, has_root=True):\n        prefix = \"{http://www.w3.org/2000/svg}\"\n        root = xml.etree.ElementTree.fromstring(svg)\n        assert root.tag == prefix + \"svg\"\n        assert width * num_trees == int(root.attrib[\"width\"])\n        assert height == int(root.attrib[\"height\"])\n\n        # Verify the class structure of the svg\n        root_group = root.find(prefix + \"g\")\n        assert \"class\" in root_group.attrib\n        assert re.search(r\"\\b(tree|tree-sequence)\\b\", root_group.attrib[\"class\"])\n        first_plotbox = None\n        if \"tree-sequence\" in root_group.attrib[\"class\"]:\n            trees = None\n            for g in root_group.findall(prefix + \"g\"):\n                if \"trees\" in g.attrib.get(\"class\", \"\"):\n                    trees = g\n                    break\n            assert trees is not None  # Must have found a trees group\n            first_tree = trees.find(prefix + \"g\")\n            assert \"class\" in first_tree.attrib\n            assert re.search(r\"\\btree\\b\", first_tree.attrib[\"class\"])\n            for g in first_tree.findall(prefix + \"g\"):\n                if \"class\" in g.attrib and re.search(r\"\\bplotbox\\b\", g.attrib[\"class\"]):\n                    first_plotbox = g\n        else:\n            for g in root_group.findall(prefix + \"g\"):\n                if \"class\" in g.attrib and re.search(r\"\\bplotbox\\b\", g.attrib[\"class\"]):\n                    first_plotbox = g\n        assert first_plotbox is not None\n        # Check that we have edges, symbols, and labels groups\n        groups = first_plotbox.findall(prefix + \"g\")\n        assert len(groups) > 0\n        for group in groups:\n            assert \"class\" in group.attrib\n            cls = group.attrib[\"class\"]\n            # if a subtree plot, the top of the displayed topology is not a local root\n            if has_root:\n                assert re.search(r\"\\broot\\b\", cls)\n            else:\n                assert not re.search(r\"\\broot\\b\", cls)\n\n\nclass TestDrawSvg(TestDrawSvgBase):\n    \"\"\"\n    Simple testing for the draw_svg method\n    \"\"\"\n\n    def test_repr_svg(self):\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg()\n        assert str(svg) == svg._repr_svg_()\n        svg = ts.first().draw_svg()\n        assert str(svg) == svg._repr_svg_()\n        svg = ts.first().draw(format=\"svg\")\n        assert str(svg) == svg._repr_svg_()\n\n    def test_draw_to_file(self, tmp_path):\n        # NB: to view output files for testing changes to drawing code, it is possible\n        # to save to a fixed directory using e.g. `pytest --basetemp=/tmp/svgtest ...`\n        t = self.get_binary_tree()\n        filename = tmp_path / \"tree-draw.svg\"\n        svg = t.draw(path=filename)\n        assert os.path.getsize(filename) > 0\n        with open(filename) as tmp:\n            other_svg = tmp.read()\n        assert svg == other_svg\n\n        filename = tmp_path / \"tree-draw_svg.svg\"\n        svg = t.draw_svg(path=filename)\n        assert os.path.getsize(filename) > 0\n        with open(filename) as tmp:\n            other_svg = tmp.read()\n        self.verify_basic_svg(svg)\n        self.verify_basic_svg(other_svg)\n\n        filename = tmp_path / \"ts-draw_svg.svg\"\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(path=filename)\n        assert os.path.getsize(filename) > 0\n        with open(filename) as tmp:\n            other_svg = tmp.read()\n        self.verify_basic_svg(svg, num_trees=ts.num_trees)\n        self.verify_basic_svg(other_svg, num_trees=ts.num_trees)\n\n    def test_nonimplemented_base_class(self):\n        ts = self.get_simple_ts()\n        plot = drawing.SvgAxisPlot(\n            ts, (100, 100), {}, \"\", \"dummy-class\", None, True, True\n        )\n        plot.set_spacing()\n        with pytest.raises(NotImplementedError):\n            plot.draw_x_axis(tick_positions=ts.breakpoints(as_array=True))\n\n    def test_bad_tick_spacing(self):\n        # Integer y_ticks to give auto-generated tick locs is not currently implemented\n        t = self.get_binary_tree()\n        with pytest.raises(TypeError):\n            t.draw_svg(y_axis=True, y_ticks=6)\n        ts = self.get_simple_ts()\n        with pytest.raises(TypeError):\n            ts.draw_svg(y_axis=True, y_ticks=6)\n\n    def test_no_mixed_yscales(self):\n        ts = self.get_simple_ts()\n        with pytest.raises(ValueError, match=\"vary in timescale\"):\n            ts.draw_svg(y_axis=True, max_time=\"tree\")\n\n    def test_draw_defaults(self):\n        t = self.get_binary_tree()\n        svg = t.draw()\n        self.verify_basic_svg(svg)\n        svg = t.draw_svg()\n        self.verify_basic_svg(svg)\n\n    @pytest.mark.parametrize(\"y_axis\", (\"left\", \"right\", True, False))\n    @pytest.mark.parametrize(\"y_label\", (True, False))\n    @pytest.mark.parametrize(\n        \"time_scale\",\n        (\n            \"rank\",\n            \"time\",\n        ),\n    )\n    @pytest.mark.parametrize(\"y_ticks\", ([], [0, 1], None))\n    @pytest.mark.parametrize(\"y_gridlines\", (True, False))\n    def test_draw_svg_y_axis_parameter_combos(\n        self, y_axis, y_label, time_scale, y_ticks, y_gridlines\n    ):\n        t = self.get_binary_tree()\n        svg = t.draw_svg(\n            y_axis=y_axis,\n            y_label=y_label,\n            y_ticks=y_ticks,\n            y_gridlines=y_gridlines,\n            time_scale=time_scale,\n        )\n        self.verify_basic_svg(svg)\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(\n            y_axis=y_axis,\n            y_label=y_label,\n            y_ticks=y_ticks,\n            y_gridlines=y_gridlines,\n            time_scale=time_scale,\n        )\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n\n    def test_draw_multiroot(self):\n        t = self.get_multiroot_tree()\n        svg = t.draw()\n        self.verify_basic_svg(svg)\n        svg = t.draw_svg()\n        self.verify_basic_svg(svg)\n\n    def test_draw_mutations_over_roots(self):\n        t = self.get_mutations_over_roots_tree()\n        with pytest.warns(UserWarning, match=\"nodes which are not present\"):\n            svg = t.draw()\n            self.verify_basic_svg(svg)\n        with pytest.warns(UserWarning, match=\"nodes which are not present\"):\n            svg = t.draw_svg()\n            self.verify_basic_svg(svg)\n\n    def test_draw_unary(self):\n        t = self.get_unary_node_tree()\n        svg = t.draw()\n        self.verify_basic_svg(svg)\n        svg = t.draw_svg()\n        self.verify_basic_svg(svg)\n\n    def test_draw_empty(self):\n        t = self.get_empty_tree()\n        with pytest.raises(ValueError):\n            t.draw()\n        with pytest.raises(ValueError):\n            t.draw_svg()\n\n    def test_draw_zero_roots(self):\n        t = self.get_zero_roots_tree()\n        with pytest.raises(ValueError):\n            t.draw()\n        with pytest.raises(ValueError):\n            t.draw_svg()\n\n    def test_draw_zero_edge(self):\n        t = self.get_zero_edge_tree()\n        svg = t.draw()\n        self.verify_basic_svg(svg)\n        svg = t.draw_svg()\n        self.verify_basic_svg(svg)\n\n    def test_mutations_present(self):\n        t = self.get_binary_tree()\n        assert t.tree_sequence.num_mutations > 0\n        svg = t.draw()\n        self.verify_basic_svg(svg)\n        assert svg.count('class=\"mut') == t.tree_sequence.num_mutations\n        svg = t.draw_svg()\n        self.verify_basic_svg(svg)\n        assert svg.count('class=\"mut') == t.tree_sequence.num_mutations\n        svg = t.tree_sequence.draw_svg()\n        self.verify_basic_svg(svg)\n        assert 'class=\"site' in svg\n        assert svg.count('class=\"site') == t.tree_sequence.num_sites\n\n    def test_sites_omitted(self):\n        t = self.get_binary_tree()\n        assert t.tree_sequence.num_mutations > 0\n        svg = t.draw(omit_sites=True)\n        self.verify_basic_svg(svg)\n        assert svg.count('class=\"mut') == 0\n        svg = t.draw_svg(omit_sites=True)\n        self.verify_basic_svg(svg)\n        assert svg.count('class=\"mut') == 0\n        svg = t.tree_sequence.draw_svg(omit_sites=True)\n        self.verify_basic_svg(svg)\n        assert svg.count('class=\"mut') == 0\n        assert svg.count('class=\"site') == 0\n\n    def test_width_height(self):\n        t = self.get_binary_tree()\n        w = 123\n        h = 456\n        svg = t.draw(width=w, height=h)\n        self.verify_basic_svg(svg, w, h)\n        svg = t.draw_svg(size=(w, h))\n        self.verify_basic_svg(svg, w, h)\n\n    def test_node_labels(self):\n        t = self.get_binary_tree()\n        labels = {u: \"XXX\" for u in t.nodes()}\n        svg = t.draw(format=\"svg\", node_labels=labels)\n        self.verify_basic_svg(svg)\n        assert svg.count(\"XXX\") == t.tree_sequence.num_nodes\n        svg = t.draw_svg(node_label_attrs={u: {\"text\": labels[u]} for u in t.nodes()})\n        self.verify_basic_svg(svg)\n        assert svg.count(\"XXX\") == t.tree_sequence.num_nodes\n\n    def test_one_node_label(self):\n        t = self.get_binary_tree()\n        labels = {0: \"XXX\"}\n        svg = t.draw(format=\"svg\", node_labels=labels)\n        self.verify_basic_svg(svg)\n        assert svg.count(\"XXX\") == 1\n        svg = t.draw_svg(node_label_attrs={0: {\"text\": \"XXX\"}})\n        self.verify_basic_svg(svg)\n        assert svg.count(\"XXX\") == 1\n\n    def test_no_node_labels(self):\n        t = self.get_binary_tree()\n        labels = {}\n        svg = t.draw(format=\"svg\", node_labels=labels)\n        self.verify_basic_svg(svg)\n        # Can't really test for much here if we don't understand the SVG\n\n    def test_one_node_colour(self):\n        t = self.get_binary_tree()\n        colour = \"rgb(0, 1, 2)\"\n        colours = {0: colour}\n        svg = t.draw(format=\"svg\", node_colours=colours)\n        self.verify_basic_svg(svg)\n        assert svg.count(f\"fill:{colour}\") == 1\n        svg = t.draw_svg(node_attrs={0: {\"fill\": colour}})\n        self.verify_basic_svg(svg)\n        assert svg.count(f'fill=\"{colour}\"') == 1\n\n    def test_all_nodes_colour(self):\n        t = self.get_binary_tree()\n        colours = {u: f\"rgb({u}, {u}, {u})\" for u in t.nodes()}\n        svg = t.draw(format=\"svg\", node_colours=colours)\n        self.verify_basic_svg(svg)\n        for colour in colours.values():\n            assert svg.count(f\"fill:{colour}\") == 1\n\n        svg = t.draw_svg(node_attrs={u: {\"fill\": colours[u]} for u in t.nodes()})\n        self.verify_basic_svg(svg)\n        assert svg.count(f'fill=\"{colour}\"') == 1\n        for colour in colours.values():\n            assert svg.count(f'fill=\"{colour}\"') == 1\n\n    def test_unplotted_node(self):\n        t = self.get_binary_tree()\n        colour = None\n        colours = {0: colour}\n        svg = t.draw(format=\"svg\", node_colours=colours)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"opacity:0\") == 1\n\n    def test_one_edge_colour(self):\n        t = self.get_binary_tree()\n        colour = \"rgb(0, 1, 2)\"\n        colours = {0: colour}\n        svg = t.draw(format=\"svg\", edge_colours=colours)\n        self.verify_basic_svg(svg)\n        assert svg.count(f\"stroke:{colour}\") > 0\n        svg = t.draw_svg(edge_attrs={0: {\"stroke\": colour}})\n        self.verify_basic_svg(svg)\n        assert svg.count(f'stroke=\"{colour}\"') == 1\n\n    def test_one_mutation_label_colour(self):\n        t = self.get_binary_tree()\n        colour = \"rgb(0, 1, 2)\"\n        svg = t.draw_svg(mutation_label_attrs={0: {\"stroke\": colour}})\n        self.verify_basic_svg(svg)\n        assert svg.count(f'stroke=\"{colour}\"') == 1\n\n    def test_bad_y_axis(self):\n        t = self.get_binary_tree()\n        for bad_axis in [\"te\", \"asdf\", \"\", [], b\"23\"]:\n            with pytest.raises(ValueError):\n                t.draw_svg(y_axis=bad_axis)\n\n    def test_bad_time_scale(self):\n        t = self.get_binary_tree()\n        for bad_scale in [\"te\", \"asdf\", \"\", [], b\"23\"]:\n            with pytest.raises(ValueError):\n                t.draw_svg(time_scale=bad_scale)\n            with pytest.raises(ValueError):\n                with pytest.warns(FutureWarning):\n                    t.draw_svg(tree_height_scale=bad_scale)\n\n    def test_bad_max_time(self):\n        t = self.get_binary_tree()\n        for bad_height in [\"te\", \"asdf\", \"\", [], b\"23\"]:\n            with pytest.raises(ValueError):\n                t.draw_svg(max_time=bad_height)\n            with pytest.raises(ValueError):\n                with pytest.warns(FutureWarning):\n                    t.draw_svg(max_tree_height=bad_height)\n\n    def test_bad_min_time(self):\n        t = self.get_binary_tree()\n        for bad_min in [\"te\", \"asdf\", \"\", [], b\"23\"]:\n            with pytest.raises(ValueError):\n                t.draw_svg(min_time=bad_min)\n            with pytest.raises(ValueError):\n                with pytest.warns(FutureWarning):\n                    t.draw_svg(max_tree_height=bad_min)\n\n    def test_bad_neg_log_time(self):\n        t = self.get_ts_varying_min_times().at_index(1)\n        assert min(t.time(u) for u in t.nodes()) < 0\n        with pytest.raises(ValueError, match=\"negative times\"):\n            with np.errstate(invalid=\"ignore\"):\n                t.draw_svg(t.draw_svg(time_scale=\"log_time\"))\n\n    def test_time_scale_time_and_max_time(self):\n        ts = msprime.simulate(5, recombination_rate=2, random_seed=2)\n        t = ts.first()\n        # The default should be the same as tree.\n        svg1 = t.draw_svg(max_time=\"tree\")\n        self.verify_basic_svg(svg1)\n        svg2 = t.draw_svg()\n        assert svg1 == svg2\n        svg3 = t.draw_svg(max_time=\"ts\")\n        assert svg1 != svg3\n        svg4 = t.draw_svg(max_time=max(ts.tables.nodes.time))\n        assert svg3 == svg4\n        with pytest.warns(FutureWarning):\n            svg5 = t.draw_svg(max_tree_height=\"tree\")\n        assert svg5 == svg1\n        svg6 = t.draw_svg(max_time=\"tree\", max_tree_height=\"i should be ignored\")\n        assert svg6 == svg1\n\n    def test_time_scale_rank_and_max_time(self):\n        # Make sure the rank height scale and max_time interact properly.\n        ts = msprime.simulate(5, recombination_rate=2, random_seed=2)\n        t = ts.first()\n        # The default should be the same as tree.\n        svg1 = t.draw_svg(max_time=\"tree\", time_scale=\"rank\", y_axis=True)\n        self.verify_basic_svg(svg1)\n        svg2 = t.draw_svg(time_scale=\"rank\", y_axis=True)\n        assert svg1 == svg2\n        svg3 = t.draw_svg(max_time=\"ts\", time_scale=\"rank\", y_axis=True)\n        assert svg1 != svg3\n        self.verify_basic_svg(svg3)\n        # Numeric max time not supported for rank scale.\n        with pytest.raises(ValueError):\n            t.draw_svg(max_time=2, time_scale=\"rank\", y_axis=True)\n\n    def test_min_tree_time(self):\n        ts = self.get_ts_varying_min_times()\n        t = ts.first()\n        # The default should be the same as tree.\n        svg1 = t.draw_svg(min_time=\"tree\", y_axis=True)\n        self.verify_basic_svg(svg1)\n        svg2 = t.draw_svg(y_axis=True)\n        assert svg1 == svg2\n        svg3 = t.draw_svg(min_time=\"ts\", y_axis=True)\n        assert svg1 != svg3\n        svg4 = t.draw_svg(min_time=min(ts.tables.nodes.time), y_axis=True)\n        assert svg3 == svg4\n\n    def test_min_ts_time(self):\n        ts = self.get_ts_varying_min_times()\n        svg1 = ts.draw_svg(y_axis=True)\n        self.verify_basic_svg(svg1, width=200 * ts.num_trees)\n        svg2 = ts.draw_svg(min_time=\"ts\", y_axis=True)\n        assert svg1 == svg2\n        with pytest.raises(ValueError, match=\"vary in timescale\"):\n            ts.draw_svg(min_time=\"tree\", y_axis=True)\n        svg3 = ts.draw_svg(min_time=min(ts.tables.nodes.time), y_axis=True)\n        assert svg2 == svg3\n\n    def test_numeric_max_time_with_mutations_over_roots(self):\n        max_time_value = 0.1  # Use a numeric max_time value\n        params = {\"y_ticks\": [1.23], \"y_axis\": True}\n        test_draw = {\n            \"svg_nomin\": {},\n            \"svg_min\": {\"max_time\": max_time_value},\n            \"svg_log_min\": {\"max_time\": max_time_value, \"time_scale\": \"log_time\"},\n        }\n\n        t = self.get_mutations_over_roots_tree()\n        assert t.tree_sequence.max_time > max_time_value\n\n        for name, extra in test_draw.items():\n            with pytest.warns(\n                UserWarning, match=\"Mutations .* are above nodes which are not present\"\n            ):\n                svg = t.draw_svg(**{**params, **extra})\n            assert svg.count('class=\"tick\"') == 1\n            m = re.search(r'<g class=\"tick\" transform=\"translate\\((.*?)\\)\">', svg)\n            assert m is not None\n            translate_coords = [float(x) for x in m.group(1).split()]\n            if name == \"svg_nomin\":\n                # single tick within the plot region\n                assert translate_coords[1] > 0\n            else:\n                assert translate_coords[1] < 0\n\n    #\n    # TODO: update the tests below here to check the new SVG based interface.\n    #\n    def test_all_edges_colour(self):\n        t = self.get_binary_tree()\n        colours = {u: \"rgb({u},255,{u})\".format(u=u) for u in t.nodes() if u != t.root}\n        svg = t.draw(format=\"svg\", edge_colours=colours)\n        self.verify_basic_svg(svg)\n        for colour in colours.values():\n            assert svg.count(f\"stroke:{colour}\") > 0\n\n    def test_unplotted_edge(self):\n        t = self.get_binary_tree()\n        colour = None\n        colours = {0: colour}\n        svg = t.draw(format=\"svg\", edge_colours=colours)\n        self.verify_basic_svg(svg)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"opacity:0\") == 1\n\n    def test_mutations_unknown_time(self):\n        ts = self.get_simple_ts(use_mutation_times=True)\n        svg = ts.draw_svg()\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        assert \"unknown_time\" not in svg\n        ts = self.get_simple_ts(use_mutation_times=False)\n        svg = ts.draw_svg()\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        assert svg.count(\"unknown_time\") == ts.num_mutations\n\n    def test_mutation_labels(self):\n        t = self.get_binary_tree()\n        labels = {u.id: \"XXX\" for u in t.mutations()}\n        svg = t.draw(format=\"svg\", mutation_labels=labels)\n        self.verify_basic_svg(svg)\n        assert svg.count(\"XXX\") == t.num_mutations\n\n    def test_one_mutation_label(self):\n        t = self.get_binary_tree()\n        labels = {0: \"XXX\"}\n        svg = t.draw(format=\"svg\", mutation_labels=labels)\n        self.verify_basic_svg(svg)\n        assert svg.count(\"XXX\") == 1\n\n    def test_no_mutation_labels(self):\n        t = self.get_binary_tree()\n        labels = {}\n        svg = t.draw(format=\"svg\", mutation_labels=labels)\n        self.verify_basic_svg(svg)\n        # Can't really test for much here if we don't understand the SVG\n\n    def test_one_mutation_colour(self):\n        t = self.get_binary_tree()\n        colour = \"rgb(0, 1, 2)\"\n        colours = {0: colour}\n        svg = t.draw(format=\"svg\", mutation_colours=colours)\n        self.verify_basic_svg(svg)\n        assert svg.count(f\"fill:{colour}\") == 1\n\n    def test_all_mutations_colour(self):\n        t = self.get_binary_tree()\n        colours = {mut.id: f\"rgb({mut.id}, {mut.id}, {mut.id})\" for mut in t.mutations()}\n        svg = t.draw(format=\"svg\", mutation_colours=colours)\n        self.verify_basic_svg(svg)\n        for colour in colours.values():\n            assert svg.count(f\"fill:{colour}\") == 1\n\n    def test_unplotted_mutation(self):\n        t = self.get_binary_tree()\n        colour = None\n        colours = {0: colour}\n        svg = t.draw(format=\"svg\", mutation_colours=colours)\n        self.verify_basic_svg(svg)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"fill-opacity:0\") == 1\n\n    @pytest.mark.parametrize(\"all_muts\", [False, True])\n    @pytest.mark.parametrize(\"x_axis\", [False, True])\n    def test_extra_mutations(self, all_muts, x_axis):\n        # The simple_ts has 2 mutations on an edge which spans the whole ts\n        # One mut is within tree 1, the other within tree 3\n        ts = self.get_simple_ts()\n        extra_mut_copies = 0\n        if all_muts:\n            extra_mut_copies = 2 if x_axis else 1\n        extra_right = ts.at_index(1)\n        svg = extra_right.draw_svg(all_edge_mutations=all_muts, x_axis=x_axis)\n        self.verify_basic_svg(svg)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"extra\") == 1 * extra_mut_copies\n\n        extra_right_and_left = ts.at_index(2)\n        svg = extra_right_and_left.draw_svg(all_edge_mutations=all_muts, x_axis=x_axis)\n        self.verify_basic_svg(svg)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"extra\") == 2 * extra_mut_copies\n\n        extra_left = ts.at_index(3)\n        svg = extra_left.draw_svg(all_edge_mutations=all_muts, x_axis=x_axis)\n        self.verify_basic_svg(svg)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"extra\") == 1 * extra_mut_copies\n\n    def test_max_time(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   1           0\n        3   0           1\n        4   0           2\n        5   0           3\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       5       2\n        0       1       5       3\n        1       2       4       2\n        1       2       4       3\n        0       2       3       0\n        0       2       3       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n\n        svg1 = ts.at_index(0).draw()\n        svg2 = ts.at_index(1).draw()\n        # if not scaled to ts, the edge above node 0 is of a different length in both\n        # trees, because the root is at a different height. We expect a group like\n        # <path class=\"edge\" d=\"M 0 0 V -46 H 22.5\" /><text>0</text>\n        str_pos = svg1.find(\">0<\")\n        snippet1 = svg1[svg1.rfind(\"edge\", 0, str_pos) : str_pos]\n        str_pos = svg2.find(\">0<\")\n        snippet2 = svg2[svg2.rfind(\"edge\", 0, str_pos) : str_pos]\n        assert snippet1 != snippet2\n\n        svg1 = ts.at_index(0).draw(max_time=\"ts\")\n        svg2 = ts.at_index(1).draw(max_time=\"ts\")\n        with pytest.warns(FutureWarning):\n            svg3 = ts.at_index(1).draw(max_tree_height=\"ts\")\n        assert svg3 == svg2\n        # when scaled, node 3 should be at the *same* height in both trees, so the edge\n        # definition should be the same\n        self.verify_basic_svg(svg1)\n        self.verify_basic_svg(svg2)\n        str_pos = svg1.find(\">0<\")\n        snippet1 = svg1[svg1.rfind(\"edge\", 0, str_pos) : str_pos]\n        str_pos = svg2.find(\">0<\")\n        snippet2 = svg2[svg2.rfind(\"edge\", 0, str_pos) : str_pos]\n        assert snippet1 == snippet2\n\n    def test_min_time(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   0           -1.11\n        1   1           2.22\n        2   1           2.22\n        3   0           3.33\n        4   0           4.44\n        5   0           5.55\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       5       2\n        0       1       5       3\n        1       2       4       2\n        1       2       4       3\n        0       1       3       0\n        0       2       3       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        svg1a = ts.at_index(0).draw_svg(y_axis=True)\n        svg1b = ts.at_index(0).draw_svg(y_axis=True, min_time=\"ts\")\n        svg2a = ts.at_index(1).draw_svg(y_axis=True)\n        svg2b = ts.at_index(1).draw_svg(y_axis=True, min_time=\"ts\")\n        # axis should start at -1.11\n        assert svg1a == svg1b\n        assert \">-1.11<\" in svg1a\n        # 2nd tree should be different depending on whether min_time is \"tree\" or \"ts\"\n        assert svg2a != svg2b\n        assert \">-1.11<\" not in svg2a\n        assert \">-1.11<\" not in svg2b\n\n    def test_draw_sized_tree(self):\n        tree = self.get_binary_tree()\n        svg = tree.draw_svg(size=(600, 400))\n        self.verify_basic_svg(svg, width=600, height=400)\n\n    def test_canvas_size_tree(self):\n        tree = self.get_binary_tree()\n        svg1 = tree.draw_svg(size=(200, 200))\n        svg2 = tree.draw_svg(size=(200, 200), canvas_size=(700, 500))\n        self.verify_basic_svg(svg1, width=200, height=200)\n        self.verify_basic_svg(svg2, width=700, height=500)\n        # height and width are specified in the starting <svg> tag\n        assert svg1.startswith(\"<svg\")\n        assert svg2.startswith(\"<svg\")\n        # after the close of the tag, the two strings should be the same\n        assert svg1[svg1.find(\">\") :] == svg2[svg2.find(\">\") :]\n\n    def test_draw_bad_sized_treebox(self):\n        tree = self.get_binary_tree()\n        with pytest.raises(ValueError, match=\"too small to fit\"):\n            # Too small for plotbox\n            tree.draw_svg(size=(20, 20))\n\n    def test_draw_bad_sized_tree(self):\n        tree = self.get_binary_tree()\n        with pytest.raises(ValueError, match=\"too small to allow space\"):\n            # Too small for standard-sized labels on tree\n            tree.draw_svg(size=(50, 50))\n\n    def test_draw_simple_ts(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=1)\n        svg = ts.draw_svg()\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n\n    def test_draw_integer_breaks_ts(self):\n        ts = msprime.sim_ancestry(\n            5, sequence_length=10, recombination_rate=0.05, random_seed=1\n        )\n        assert ts.num_trees > 2\n        svg = ts.draw_svg()\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        axis_pos = svg.find('class=\"x-axis\"')\n        for b in ts.breakpoints():\n            assert b == round(b)\n            assert svg.find(f\">{b:.0f}<\", axis_pos) != -1\n\n    def test_draw_integer_times_ts(self):\n        ts = msprime.sim_ancestry(\n            5, population_size=5, sequence_length=10, model=\"dtwf\", random_seed=1\n        )\n        svg = ts.draw_svg(y_axis=True)\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        axis_pos = svg.find('class=\"y-axis\"')\n        for t in ts.tables.nodes.time:\n            assert t == round(t)\n            assert svg.find(f\">{t:.0f}<\", axis_pos) != -1\n\n    def test_draw_integer_times_tree(self):\n        ts = msprime.sim_ancestry(\n            5, population_size=5, sequence_length=10, model=\"dtwf\", random_seed=1\n        )\n        svg = ts.first().draw_svg(y_axis=True)\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        axis_pos = svg.find('class=\"y-axis\"')\n        for t in ts.tables.nodes.time:\n            assert t == round(t)\n            assert svg.find(f\">{t:.0f}<\", axis_pos) != -1\n\n    def test_draw_even_height_ts(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=1)\n        svg = ts.draw_svg(max_time=\"tree\")\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        with pytest.warns(FutureWarning):\n            svg = ts.draw_svg(max_tree_height=\"tree\")\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n\n    def test_draw_sized_ts(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=1)\n        svg = ts.draw_svg(size=(600, 400))\n        self.verify_basic_svg(svg, width=600, height=400)\n\n    def test_canvas_size_ts(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=1)\n        svg1 = ts.draw_svg(size=(600, 400))\n        svg2 = ts.draw_svg(size=(600, 400), canvas_size=(1000, 500))\n        self.verify_basic_svg(svg1, width=600, height=400)\n        self.verify_basic_svg(svg2, width=1000, height=500)\n        # height and width are specified in the starting <svg> tag\n        assert svg1.startswith(\"<svg\")\n        assert svg2.startswith(\"<svg\")\n        # after the close of the tag, the two strings should be the same\n        assert svg1[svg1.find(\">\") :] == svg2[svg2.find(\">\") :]\n\n    def test_time_scale(self):\n        ts = msprime.simulate(4, random_seed=2)\n        svg = ts.draw_svg(time_scale=\"time\")\n        self.verify_basic_svg(svg)\n        svg = ts.draw_svg(time_scale=\"log_time\")\n        self.verify_basic_svg(svg)\n        with pytest.warns(FutureWarning):\n            svg2 = ts.draw_svg(tree_height_scale=\"log_time\")\n        assert svg2 == svg\n        svg = ts.draw_svg(time_scale=\"rank\")\n        self.verify_basic_svg(svg)\n        svg3 = ts.draw_svg(time_scale=\"rank\", tree_height_scale=\"ignore me please\")\n        assert svg3 == svg\n        for bad_scale in [0, \"\", \"NOT A SCALE\"]:\n            with pytest.raises(ValueError):\n                ts.draw_svg(time_scale=bad_scale)\n            with pytest.raises(ValueError):\n                with pytest.warns(FutureWarning):\n                    ts.draw_svg(tree_height_scale=bad_scale)\n\n    def test_x_scale(self):\n        ts = msprime.simulate(4, random_seed=2)\n        svg = ts.draw_svg(x_scale=\"physical\")\n        self.verify_basic_svg(svg)\n        svg = ts.draw_svg(x_scale=\"treewise\")\n        self.verify_basic_svg(svg)\n\n    def test_bad_x_scale(self):\n        ts = msprime.simulate(4, random_seed=2)\n        for bad_x_scale in [\"te\", \"asdf\", \"\", [], b\"23\"]:\n            with pytest.raises(ValueError):\n                ts.draw_svg(x_scale=bad_x_scale)\n\n    def test_x_axis(self):\n        tree = msprime.simulate(4, random_seed=2).first()\n        svg = tree.draw_svg(x_axis=True)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert \"Genome position\" in svg_no_css\n        assert svg_no_css.count(\"axes\") == 1\n        assert svg_no_css.count(\"x-axis\") == 1\n        assert svg_no_css.count(\"y-axis\") == 0\n\n    def test_y_axis(self):\n        tree = self.get_simple_ts().first()\n        for hscale, label in [\n            (None, \"Time\"),\n            (\"time\", \"Time\"),\n            (\"log_time\", \"Time\"),\n            (\"rank\", \"Node time\"),\n        ]:\n            svg = tree.draw_svg(y_axis=True, time_scale=hscale)\n            if hscale is not None:\n                with pytest.warns(FutureWarning):\n                    svg2 = tree.draw_svg(y_axis=True, tree_height_scale=hscale)\n                assert svg2 == svg\n                svg3 = tree.draw_svg(\n                    y_axis=True, time_scale=hscale, tree_height_scale=\"ignore me please\"\n                )\n                assert svg3 == svg\n            svg_no_css = svg[svg.find(\"</style>\") :]\n            assert label in svg_no_css\n            assert svg_no_css.count(\"axes\") == 1\n            assert svg_no_css.count(\"x-axis\") == 0\n            assert svg_no_css.count(\"y-axis\") == 1\n            assert svg_no_css.count(\"ticks\") == 1\n            assert svg_no_css.count('class=\"tick\"') == len(\n                {tree.time(u) for u in tree.nodes()}\n            )\n\n    def test_y_axis_noticks(self):\n        tree = msprime.simulate(4, random_seed=2).first()\n        svg = tree.draw_svg(y_axis=True, y_label=\"Time\", y_ticks=[])\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"axes\") == 1\n        assert svg_no_css.count(\"x-axis\") == 0\n        assert svg_no_css.count(\"y-axis\") == 1\n        assert svg_no_css.count('\"tick\"') == 0\n\n    def test_y_axis_tick_warning(self, caplog):\n        tree = msprime.simulate(4, random_seed=2).first()\n        upper = int(tree.time(tree.root))\n        with caplog.at_level(logging.WARNING):\n            tree.draw_svg(\n                y_axis=True,\n                y_label=\"Time\",\n                y_ticks={upper + 100: \"above\", upper / 3: \"inside\"},\n            )\n            assert (\n                f\"Ticks {{{upper + 100}: 'above'}} lie outside the plotted axis\"\n                in caplog.text\n            )\n        with caplog.at_level(logging.WARNING):\n            tree.draw_svg(\n                y_axis=True, y_label=\"Time\", y_ticks={upper / 2: \"inside\", -1: \"below\"}\n            )\n            assert \"Ticks {-1: 'below'} lie outside the plotted axis\" in caplog.text\n\n    def test_symbol_size(self):\n        tree = msprime.simulate(4, random_seed=2, mutation_rate=8).first()\n        sz = 24\n        svg = tree.draw_svg(symbol_size=sz)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        num_mutations = len([_ for _ in tree.mutations()])\n        num_nodes = len([_ for _ in tree.nodes()])\n        # Squares have 'height=\"sz\" width=\"sz\"'\n        assert svg_no_css.count(f'\"{sz}\"') == tree.num_samples() * 2\n        # Circles define a radius like 'r=\"sz/2\"'\n        assert svg_no_css.count(f'r=\"{sz / 2:g}\"') == num_nodes - tree.num_samples()\n        # Mutations draw a line on the cross using 'l sz,sz'\n        assert svg_no_css.count(f\"l {sz},{sz} \") == num_mutations\n\n    def test_no_edges_invalid(self):\n        full_ts = msprime.simulate(10, random_seed=2)\n        tables = full_ts.dump_tables()\n        tables.edges.clear()\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError, match=\"To plot an empty tree sequence\"):\n            ts.draw_svg()\n        with pytest.raises(ValueError, match=\"To plot an empty tree sequence\"):\n            ts.draw_svg(x_lim=[None, 1])\n        with pytest.raises(ValueError, match=\"To plot an empty tree sequence\"):\n            ts.draw_svg(x_lim=[0, None])\n\n    def test_no_edges_show_empty(self):\n        # Should be possible to print empty trees if xlim=[0, seq_len]\n        full_ts = msprime.simulate(10, random_seed=2)\n        tables = full_ts.dump_tables()\n        tables.edges.clear()\n        ts = tables.tree_sequence()\n        for time_scale in (\"time\", \"log_time\", \"rank\"):\n            # SVG should just be a row of 10 sample nodes\n            svg = ts.draw_svg(time_scale=time_scale, x_lim=[0, ts.sequence_length])\n            self.verify_basic_svg(svg)\n            assert svg.count(\"<rect\") == 10  # Sample nodes are rectangles\n            assert svg.count('<path class=\"edge') == 0\n        svg = ts.draw_svg(force_root_branch=True, x_lim=[0, ts.sequence_length])\n        self.verify_basic_svg(svg)\n        assert svg.count(\"<rect\") == 10\n        assert svg.count('<path class=\"edge') == 10\n\n    def test_no_edges_with_muts(self):\n        # If there is a mutation above a sample, the root branches should be there too\n        # And we should be able to plot the \"empty\" tree because the region still has\n        # mutations\n        full_ts = msprime.simulate(10, mutation_rate=1, random_seed=2)\n        tables = full_ts.dump_tables()\n        tables.edges.clear()\n        ts = tables.tree_sequence().simplify()\n        assert ts.num_mutations > 0  # Should have some singletons\n        svg = ts.draw_svg()\n        self.verify_basic_svg(svg)\n        assert svg.count(\"<rect\") == 10\n        assert svg.count('<path class=\"edge') == 10\n        assert svg.count('<path class=\"sym\"') == ts.num_mutations\n        assert svg.count('<line class=\"sym\"') == ts.num_sites\n\n    def test_empty_flanks(self):\n        ts = msprime.simulate(10, random_seed=2, recombination_rate=0.1)\n        assert ts.num_trees == 2\n        assert 0.2 < ts.first().interval.right < 0.8\n        degree_2_ts = ts.keep_intervals([[0.2, 0.8]])\n        svg = degree_2_ts.draw_svg(y_axis=False)\n        assert svg.count('class=\"tick\"') == 3\n        assert svg.count('<text class=\"lab\">0.2') == 1\n        assert svg.count('<text class=\"lab\">0.8') == 1\n        degree_1_ts = ts.keep_intervals([[0.05, 0.15]])\n        svg = degree_1_ts.draw_svg(y_axis=False)\n        assert svg.count('class=\"tick\"') == 2\n        assert svg.count('<text class=\"lab\">0.05') == 1\n        assert svg.count('<text class=\"lab\">0.15') == 1\n\n    def test_bad_xlim(self):\n        ts = msprime.simulate(10, random_seed=2)\n        svg = ts.draw_svg(x_lim=[None, None])\n        self.verify_basic_svg(svg)\n        with pytest.raises(ValueError, match=\"must be a list of length 2\"):\n            ts.draw_svg(x_lim=[0])\n        with pytest.raises(TypeError, match=\"must be numeric\"):\n            ts.draw_svg(x_lim=[0, \"a\"])\n        with pytest.raises(ValueError, match=\"must be less than\"):\n            ts.draw_svg(x_lim=[0.5, 0.5])\n        with pytest.raises(ValueError, match=\"cannot be negative\"):\n            ts.draw_svg(x_lim=[-1, 0])\n        with pytest.raises(ValueError, match=\"cannot be greater than\"):\n            ts.draw_svg(x_lim=[0, ts.sequence_length * 2])\n\n    def test_xlim_on_empty(self):\n        full_ts = msprime.simulate(10, random_seed=2)\n        tables = full_ts.dump_tables()\n        tables.edges.clear()\n        ts = tables.tree_sequence()\n        ts.draw_svg(x_lim=[0, ts.sequence_length])\n        with pytest.raises(ValueError, match=\"whole region is empty\"):\n            ts.draw_svg(x_lim=[0, 0.9])\n\n    def test_xlim_edge_cases(self):\n        tables = msprime.simulate(10, random_seed=2, mutation_rate=10).dump_tables()\n        # Delete edges but keep mutations\n        old_sites = tables.sites.copy()\n        tables.keep_intervals([[0.4, 0.6]], simplify=False)\n        tables.sites.set_columns(**old_sites.asdict())\n        ts = tables.tree_sequence().simplify(filter_sites=False)\n        assert np.any(ts.tables.sites.position < 0.4)\n        assert np.any(ts.tables.sites.position > 0.6)\n        for x_lim in [None, (0, 1), (None, 1), (0, None)]:\n            # All have sites in the deleted region, so should have all trees\n            svg = ts.draw_svg(x_lim=x_lim)\n            self.verify_basic_svg(svg, width=200 * 3)\n            assert svg.count('class=\"tree ') == 3\n        tables.sites.clear()\n        tables.mutations.clear()\n        ts = tables.tree_sequence().simplify()\n        for x_lim, n_trees in {None: 1, (0, 1): 3, (None, 1): 2, (0, None): 2}.items():\n            # No sites in the deleted region, so x_lim determines # plotted trees\n            svg = ts.draw_svg(x_lim=x_lim)\n            self.verify_basic_svg(svg, width=200 * n_trees)\n            assert svg.count('class=\"tree ') == n_trees\n\n    def test_xlim_maintains_tree_ids(self):\n        ts = self.get_simple_ts()\n        breaks = ts.breakpoints(as_array=True)\n        svg = ts.draw_svg(x_lim=[breaks[1], breaks[4]])\n        assert \"t0\" not in svg\n        assert \"t4\" not in svg\n        svg = ts.draw_svg(x_lim=[np.nextafter(breaks[1], 0), np.nextafter(breaks[4], 1)])\n        assert \"t0\" in svg\n        assert \"t4\" in svg\n\n    def test_xlim_maintains_site_and_mutation_ids(self):\n        ts = self.get_simple_ts()\n        breaks = ts.breakpoints(as_array=True)\n        tree_svg = ts.at_index(1).draw_svg(x_axis=True)\n\n        ts_svg = ts.draw_svg(x_lim=[breaks[1], breaks[2]])\n        assert re.findall(r\">\\d+<\", tree_svg) == re.findall(r\">\\d+<\", ts_svg)  # labels\n        for identifier in [\"s\", \"m\"]:\n            tree_ids = re.findall(rf\"{identifier}\\d+\", tree_svg)\n            assert len(tree_ids) > 0\n            ts_ids = re.findall(rf\"{identifier}\\d+\", ts_svg)\n            assert tree_ids == ts_ids\n\n        site_pos0_in_tree1 = next(ts.at_index(1).sites()).position\n        ts_svg = ts.draw_svg(x_lim=[site_pos0_in_tree1, breaks[2]])\n        assert re.findall(r\">\\d+<\", tree_svg) == re.findall(r\">\\d+<\", ts_svg)  # labels\n        for identifier in [\"s\", \"m\"]:\n            tree_ids = re.findall(rf\"{identifier}\\d+\", tree_svg)\n            ts_ids = re.findall(rf\"{identifier}\\d+\", ts_svg)\n            assert tree_ids == ts_ids\n\n        ts_svg = ts.draw_svg(x_lim=[np.nextafter(site_pos0_in_tree1, 1), breaks[2]])\n        assert re.findall(r\">\\d+<\", tree_svg) != re.findall(r\">\\d+<\", ts_svg)  # labels\n        for identifier in [\"s\", \"m\"]:\n            tree_ids = re.findall(rf\"{identifier}\\d+\", tree_svg)\n            ts_ids = re.findall(rf\"{identifier}\\d+\", ts_svg)\n            assert tree_ids != ts_ids\n\n    def test_xlim_with_ranks(self):\n        ts = self.get_simple_ts()\n        xlim = ts.breakpoints(as_array=True)[:2]  # plot first tree only\n        svg = ts.draw_svg(x_lim=xlim, time_scale=\"rank\", y_axis=True, y_gridlines=True)\n        # excluding \".grid\" in the stylesheet, there should be only 4 y-axis steps\n        # for a 4 tip tree with all samples at 0: simplest check is to count gridlines\n        assert len(re.findall(r\"[^.]grid\", svg)) == 4\n\n    def test_half_truncated(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = ts.delete_intervals([[0.4, 0.6]])\n        svg = ts.draw_svg(x_lim=(0.5, 0.7), y_axis=False)\n        # Only one tree and one tick shown (leftmost is an empty region)\n        assert svg.count('class=\"tree ') == 1\n        assert svg.count('class=\"tick\"') == 1\n\n    def test_tree_root_branch(self):\n        # in the simple_ts, there are root mutations in the first tree but not the last\n        ts = self.get_simple_ts()\n        tree_with_root_mutations = ts.at_index(0)\n        root1 = tree_with_root_mutations.root\n        tree_without_root_mutations = ts.at_index(-1)\n        root2 = tree_without_root_mutations.root\n        svg1 = tree_with_root_mutations.draw_svg()\n        svg2a = tree_without_root_mutations.draw_svg()\n        svg2b = tree_without_root_mutations.draw_svg(force_root_branch=True)\n        self.verify_basic_svg(svg1)\n        self.verify_basic_svg(svg2a)\n        self.verify_basic_svg(svg2b)\n        # Last <path> should be the root branch, if it exists\n        edge_str = '<path class=\"edge root\" d='\n        str_pos1 = svg1.rfind(edge_str, 0, svg1.find(f\">{root1}<\"))\n        assert edge_str not in svg2a\n        str_pos2b = svg2b.rfind(edge_str, 0, svg2b.find(f\">{root2}<\"))\n        snippet1 = svg1[str_pos1 + len(edge_str) : svg1.find(\">\", str_pos1)]\n        snippet2b = svg2b[str_pos2b + len(edge_str) : svg2b.find(\">\", str_pos2b)]\n        assert snippet1.startswith('\"M 0 0')\n        assert snippet2b.startswith('\"M 0 0')\n        assert \"H 0\" in snippet1\n        assert \"H 0\" in snippet2b\n\n    def test_debug_box(self):\n        ts = self.get_simple_ts()\n        svg = ts.first().draw_svg(debug_box=True)\n        self.verify_basic_svg(svg)\n        assert svg.count(\"outer_plotbox\") == 1\n        assert svg.count(\"inner_plotbox\") == 1\n        svg = ts.draw_svg(debug_box=True)\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        assert svg.count(\"outer_plotbox\") == ts.num_trees + 1\n        assert svg.count(\"inner_plotbox\") == ts.num_trees + 1\n\n    @pytest.mark.parametrize(\"max_trees\", [-1, 0, 1])\n    def test_bad_max_num_trees(self, max_trees):\n        ts = self.get_simple_ts()\n        with pytest.raises(ValueError, match=\"at least 2\"):\n            ts.draw_svg(max_num_trees=max_trees)\n\n    @pytest.mark.parametrize(\"max_trees\", [2, 4, 9])\n    def test_max_num_trees(self, max_trees):\n        ts = msprime.sim_ancestry(\n            3, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        ts = msprime.sim_mutations(ts, rate=0.1, random_seed=1)\n        assert ts.num_trees > 10\n        num_sites = 0\n        num_unplotted_sites = 0\n        svg = ts.draw_svg(max_num_trees=max_trees)\n        for tree in ts.trees():\n            if (\n                tree.index < (max_trees + 1) // 2\n                or ts.num_trees - tree.index <= max_trees // 2\n            ):\n                num_sites += tree.num_sites\n                assert re.search(rf\"t{tree.index}[^\\d]\", svg) is not None\n            else:\n                assert re.search(rf\"t{tree.index}[^\\d]\", svg) is None\n                num_unplotted_sites += tree.num_sites\n        assert num_unplotted_sites > 0\n        site_strings_in_stylesheet = svg.count(\".site\")\n        assert svg.count(\"site\") - site_strings_in_stylesheet == num_sites\n        self.verify_basic_svg(svg, width=200 * (max_trees + 1))\n\n    def test_edge_ids(self):\n        ts = self.get_simple_ts()\n        for tree in ts.trees():\n            svg = tree.draw_svg()\n            mut_nodes = {m.node for m in tree.mutations()}\n            assert svg.count('\"edge root\"') == (1 if tree.root in mut_nodes else 0)\n            edges = {tree.edge(u) for u in tree.nodes()}\n            for e in range(tree.num_edges):\n                assert svg.count(f'\"edge e{e}\"') == (1 if e in edges else 0)\n\n    def test_draw_tree_symbol_titles(self):\n        tree = self.get_binary_tree()\n        assert tree.tree_sequence.num_mutations > 0\n        svg = tree.draw_svg(\n            node_titles={u: f\"NODE{u}$\" for u in tree.nodes()},\n            mutation_titles={m.id: f\"MUT{m.id}$\" for m in tree.mutations()},\n        )\n        for u in tree.nodes():\n            assert svg.count(f\"<title>NODE{u}$</title>\") == 1\n        for m in tree.mutations():\n            assert svg.count(f\"<title>MUT{m.id}$</title>\") == 1\n        self.verify_basic_svg(svg)\n\n    def test_nodraw_x_axis(self):\n        ts = msprime.sim_ancestry(\n            1, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        svg = ts.first().draw_svg(x_axis=False, y_axis=False)\n        assert 'class=\"x-axis\"' not in svg\n\n    def test_x_regions_ts(self):\n        ts = msprime.sim_ancestry(\n            3, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        regions = [(0, 10), (9, 20), (50, 90)]\n        svg = ts.draw_svg(x_regions={r: f\"reg{'ABC'[i]}\" for i, r in enumerate(regions)})\n        self.verify_basic_svg(svg, width=200 * ts.num_trees)\n        assert svg.count(\"x-regions\") == 2  # one in stylesheet, one in svg\n        assert svg.count(\"r0\") == 1\n        assert svg.count(\"r1\") == 1\n        assert svg.count(\"r2\") == 1\n        assert svg.count(\"r3\") == 0\n        assert svg.count(\"regA\") == 1\n        assert svg.count(\"regB\") == 1\n        assert svg.count(\"regC\") == 1\n        # \"rect\" string present for 6 samples in each tree + 3 regions + 1 in stylesheet\n        assert svg.count(\"rect\") == 6 * ts.num_trees + 3 + 1\n\n    def test_x_regions_tree(self):\n        ts = msprime.sim_ancestry(\n            3, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        svg = ts.first().draw_svg(x_regions={(0, 10): \"💩\"})\n        assert svg.count(\"💩\") == 0\n        svg = ts.first().draw_svg(x_axis=True, x_regions={(0, 10): \"💩\"})\n        assert svg.count(\"💩\") == 1\n\n    def test_unsupported_x_regions(self):\n        ts = msprime.sim_ancestry(\n            1, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        ts.draw_svg(x_scale=\"treewise\")\n        with pytest.raises(ValueError, match=\"not supported for treewise\"):\n            ts.draw_svg(x_scale=\"treewise\", x_regions={(0, 1): \"bad\"})\n\n    def test_bad_x_regions(self):\n        ts = msprime.sim_ancestry(\n            1, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        with pytest.raises(ValueError, match=\"Invalid coordinates\"):\n            ts.draw_svg(x_regions={(-1, 1): \"bad\"})\n        with pytest.raises(ValueError, match=\"Invalid coordinates\"):\n            ts.draw_svg(x_regions={(0, ts.sequence_length + 1): \"bad\"})\n        with pytest.raises(ValueError, match=\"Invalid coordinates\"):\n            ts.draw_svg(x_regions={(1, 0): \"bad\"})\n\n    def test_title(self):\n        ts = msprime.sim_ancestry(1, sequence_length=100, random_seed=1)\n        svg = ts.draw_svg(title=\"This is a title\")\n        assert \"This is a title\" in svg\n        svg = ts.first().draw_svg(title=\"This is another title\")\n        assert \"This is another title\" in svg\n\n    def test_bad_ts_order(self):\n        ts = msprime.sim_ancestry(1, sequence_length=100, random_seed=1)\n        with pytest.raises(ValueError, match=\"Unknown display order\"):\n            ts.draw_svg(order=(ts.first().nodes(order=\"minlex_postorder\")))\n\n    def test_good_tree_order(self):\n        ts = msprime.sim_ancestry(1, sequence_length=100, random_seed=1)\n        ts.first().draw_svg(order=(ts.first().nodes(order=\"minlex_postorder\")))\n\n    def test_nonpostorder_tree_order(self):\n        tree = tskit.Tree.generate_balanced(10)\n        with pytest.raises(ValueError, match=\"must be passed in postorder\"):\n            tree.draw_svg(order=(tree.nodes(order=\"preorder\")))\n\n    def test_only_subset_nodes_in_rank(self, caplog):\n        tree = tskit.Tree.generate_comb(100)\n        # Only show the last few tips of the comb. We should only use the ranks\n        # from those tip times, so ticks > 5 should raise a warning\n        with caplog.at_level(logging.WARNING):\n            tree.draw_svg(\n                order=tree.nodes(root=105, order=\"minlex_postorder\"),\n                time_scale=\"rank\",\n                y_axis=True,\n                y_ticks=[0, 1, 6],\n            )\n            assert \"lie outside the plotted axis\" not in caplog.text\n        with caplog.at_level(logging.WARNING):\n            tree.draw_svg(\n                order=tree.nodes(root=105, order=\"minlex_postorder\"),\n                time_scale=\"rank\",\n                y_axis=True,\n                y_ticks=[0, 1, 10],\n            )\n            assert \"Ticks {10: '10'} lie outside the plotted axis\" in caplog.text\n\n    def test_polytomy_collapsing(self):\n        tree = tskit.Tree.generate_balanced(20, arity=4, tracked_samples=np.arange(2, 8))\n        svg = tree.draw_svg(pack_untracked_polytomies=True)\n        # Should have one collapsed node (untracked samples 8 and 9)\n        # and two \"polytomy lines\" (from nodes 21 and 28 (the root))\n        assert svg.count('class=\"polytomy\"') == 2  # poolytomy lines\n        collapsed_symbol = re.search(\"<polygon[^>]*>\", svg)\n        assert collapsed_symbol is not None\n        assert collapsed_symbol.group(0).count(\"sym\") == 1\n        assert collapsed_symbol.group(0).count(\"multi\") == 1\n\n    @pytest.mark.parametrize(\n        \"tree_or_ts\",\n        [tskit.Tree.generate_comb(3), tskit.Tree.generate_comb(3).tree_sequence],\n    )\n    def test_preamble(self, tree_or_ts):\n        embed = tskit.Tree.generate_comb(4)  # svg string to embed\n        svg = tree_or_ts.draw_svg(\n            size=(200, 200),\n            canvas_size=(400, 200),\n            preamble=embed.draw_svg(root_svg_attributes={\"x\": 200, \"class\": \"embedded\"}),\n        )\n        self.verify_basic_svg(svg, width=400, height=200)\n        assert svg.count(\"<svg\") == 2\n        assert svg.count('class=\"embedded\"') == 1\n\n    @pytest.mark.parametrize(\n        \"tree_or_ts\",\n        [tskit.Tree.generate_comb(3), tskit.Tree.generate_comb(3).tree_sequence],\n    )\n    def test_non_svg_preamble(self, tree_or_ts):\n        svg = tree_or_ts.draw_svg(\n            size=(200, 200), canvas_size=(400, 200), preamble=\"<UnbalancedTag>\"\n        )\n        with pytest.raises(xml.etree.ElementTree.ParseError):\n            self.verify_basic_svg(svg, width=400, height=200)\n\n\nclass TestDrawKnownSvg(TestDrawSvgBase):\n    \"\"\"\n    Compare against known files\n    \"\"\"\n\n    def verify_known_svg(self, svg, filename, save=False, **kwargs):\n        # expected SVG files can be inspected in tests/data/svg/*.svg\n        svg = xml.dom.minidom.parseString(svg).toprettyxml()  # Prettify for easy viewing\n        self.verify_basic_svg(svg, **kwargs)\n        svg_fn = pathlib.Path(__file__).parent / \"data\" / \"svg\" / filename\n        if save:\n            logging.warning(f\"Overwriting SVG file `{svg_fn}` with new version\")\n            with open(svg_fn, \"w\") as file:\n                file.write(svg)\n        with open(svg_fn, \"rb\") as file:\n            expected_svg = file.read()\n        self.assertXmlEquivalentOutputs(svg, expected_svg)\n\n    def test_known_svg_tree_no_mut(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts().at_index(-1)\n        svg = tree.draw_svg(\n            root_svg_attributes={\"id\": \"XYZ\"},\n            style=\".edge {stroke: blue}\",\n            debug_box=draw_plotbox,\n        )\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"axes\") == 0\n        assert svg_no_css.count(\"x-axis\") == 0\n        assert svg_no_css.count(\"y-axis\") == 0\n        self.verify_known_svg(svg, \"tree.svg\", overwrite_viz)\n\n    def test_known_svg_tree_x_axis(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts().at_index(1)\n        svg = tree.draw_svg(\n            x_axis=True,\n            x_label=\"pos on genome\",\n            size=(400, 200),\n            debug_box=draw_plotbox,\n        )\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"axes\") == 1\n        assert svg_no_css.count(\"x-axis\") == 1\n        assert svg_no_css.count(\"title\") == 1\n        assert svg_no_css.count(\"y-axis\") == 0\n        self.verify_known_svg(svg, \"tree_x_axis.svg\", overwrite_viz, width=400)\n\n    def test_known_svg_tree_y_axis_rank(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts().at_index(1)\n        label = \"Time (relative steps)\"\n        svg = tree.draw_svg(\n            y_axis=True,\n            y_label=label,\n            y_gridlines=True,\n            time_scale=\"rank\",\n            style=\".y-axis line.grid {stroke: #CCCCCC}\",\n            debug_box=draw_plotbox,\n        )\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        node_times = [tree.time(u) for u in tree.nodes()]\n        assert label in svg_no_css\n        assert svg_no_css.count('class=\"grid\"') == len(set(node_times))\n        assert svg_no_css.count(\"axes\") == 1\n        assert svg_no_css.count(\"x-axis\") == 0\n        assert svg_no_css.count(\"y-axis\") == 1\n        assert svg_no_css.count(\"title\") == 1\n        self.verify_known_svg(svg, \"tree_y_axis_rank.svg\", overwrite_viz)\n\n    def test_known_svg_tree_both_axes(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts().at_index(-1)\n        svg = tree.draw_svg(x_axis=True, y_axis=True, debug_box=draw_plotbox)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"axes\") == 1\n        assert svg_no_css.count(\"x-axis\") == 1\n        assert svg_no_css.count(\"y-axis\") == 1\n        assert svg_no_css.count(\"title\") == 2\n        self.verify_known_svg(svg, \"tree_both_axes.svg\", overwrite_viz)\n\n    def test_known_svg_tree_root_mut(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts().at_index(0)  # Tree 0 has a few mutations above root\n        svg = tree.draw_svg(debug_box=draw_plotbox)\n        self.verify_known_svg(svg, \"tree_muts.svg\", overwrite_viz)\n\n    def test_known_svg_tree_mut_all_edge(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts().at_index(1)\n        size = (300, 400)\n        svg = tree.draw_svg(\n            size=size,\n            debug_box=draw_plotbox,\n            all_edge_mutations=True,\n            x_axis=True,\n            title=\"All mutations tree: background shading shown\",\n        )\n        self.verify_known_svg(\n            svg, \"tree_muts_all_edge.svg\", overwrite_viz, width=size[0], height=size[1]\n        )\n\n    def test_known_svg_tree_timed_root_mut(self, overwrite_viz, draw_plotbox):\n        tree = self.get_simple_ts(use_mutation_times=True).at_index(0)\n        # Also look at y_axis=right\n        svg = tree.draw_svg(debug_box=draw_plotbox, y_axis=\"right\")\n        self.verify_known_svg(svg, \"tree_timed_muts.svg\", overwrite_viz)\n\n    def test_known_svg_ts(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(debug_box=draw_plotbox)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"axes\") == 1\n        assert svg_no_css.count(\"x-axis\") == 1\n        assert svg_no_css.count(\"y-axis\") == 0\n        assert svg_no_css.count('class=\"site ') == ts.num_sites\n        assert svg_no_css.count('class=\"mut ') == ts.num_mutations * 2\n        self.verify_known_svg(svg, \"ts.svg\", overwrite_viz, width=200 * ts.num_trees)\n\n    def test_known_svg_ts_title(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(title=\"The main plot title\", debug_box=draw_plotbox)\n        self.verify_known_svg(\n            svg, \"ts_title.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_no_axes(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(x_axis=False, debug_box=draw_plotbox)\n        svg_no_css = svg[svg.find(\"</style>\") :]\n        assert svg_no_css.count(\"axes\") == 0\n        assert svg_no_css.count(\"x-axis\") == 0\n        assert svg_no_css.count(\"y-axis\") == 0\n        assert 'class=\"site ' not in svg_no_css\n        assert svg_no_css.count('class=\"mut ') == ts.num_mutations\n        self.verify_known_svg(\n            svg, \"ts_no_axes.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_internal_sample(self, overwrite_viz, draw_plotbox):\n        ts = tsutil.jiggle_samples(self.get_simple_ts())\n        svg = ts.draw_svg(\n            root_svg_attributes={\"id\": \"XYZ\"},\n            style=\"#XYZ .leaf .sym {fill: magenta} #XYZ .sample > .sym {fill: cyan}\",\n            debug_box=draw_plotbox,\n        )\n        self.verify_known_svg(\n            svg, \"internal_sample_ts.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_highlighted_mut(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        style = (\n            \".edge {stroke: grey}\"\n            \".mut .sym{stroke:pink} .mut text{fill:pink}\"\n            \".mut.m2 .sym,.m2>line, .m2>.node .edge{stroke:red} .mut.m2 text{fill:red}\"\n            \".mut.m3 .sym,.m3>line, .m3>.node .edge{stroke:cyan} .mut.m3 text{fill:cyan}\"\n            \".mut.m4 .sym,.m4>line, .m4>.node .edge{stroke:blue} .mut.m4 text{fill:blue}\"\n        )\n        svg = ts.draw_svg(style=style, debug_box=draw_plotbox)\n        self.verify_known_svg(\n            svg, \"ts_mut_highlight.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_rank(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        svg1 = ts.draw_svg(time_scale=\"rank\", y_axis=True, debug_box=draw_plotbox)\n        ts = self.get_simple_ts(use_mutation_times=True)\n        svg2 = ts.draw_svg(time_scale=\"rank\", y_axis=True, debug_box=draw_plotbox)\n        assert svg1.count('class=\"site ') == ts.num_sites\n        assert svg1.count('class=\"mut ') == ts.num_mutations * 2\n        assert svg1.replace(\" unknown_time\", \"\") == svg2  # Trim the unknown_time class\n        self.verify_known_svg(\n            svg1, \"ts_rank.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    @pytest.mark.skip(reason=\"Fails on CI as OSX gives different random numbers\")\n    def test_known_svg_nonbinary_ts(self, overwrite_viz, draw_plotbox):\n        ts = self.get_nonbinary_ts()\n        svg = ts.draw_svg(time_scale=\"log_time\", debug_box=draw_plotbox)\n        assert svg.count('class=\"site ') == ts.num_sites\n        assert svg.count('class=\"mut ') == ts.num_mutations * 2\n        self.verify_known_svg(\n            svg, \"ts_nonbinary.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_plain(self, overwrite_viz, draw_plotbox):\n        \"\"\"\n        Plain style: no background shading and a variable scale X axis with no sites\n        \"\"\"\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(x_scale=\"treewise\", debug_box=draw_plotbox)\n        assert svg.count('class=\"site ') == 0\n        assert svg.count('class=\"mut ') == ts.num_mutations\n        self.verify_known_svg(\n            svg, \"ts_plain.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_plain_no_xlab(self, overwrite_viz, draw_plotbox):\n        \"\"\"\n        Plain style: no background shading and a variable scale X axis with no sites\n        \"\"\"\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(x_scale=\"treewise\", x_label=\"\", debug_box=draw_plotbox)\n        assert \"Genome position\" not in svg\n        self.verify_known_svg(\n            svg, \"ts_plain_no_xlab.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_plain_y(self, overwrite_viz, draw_plotbox):\n        \"\"\"\n        Plain style: no background shading and a variable scale X axis with no sites\n        \"\"\"\n        ts = self.get_simple_ts()\n        ticks = [0, 5, 10]\n        svg = ts.draw_svg(\n            x_scale=\"treewise\",\n            y_axis=True,\n            y_ticks=ticks,\n            y_gridlines=True,\n            style=\".y-axis line.grid {stroke: #CCCCCC}\",\n            debug_box=draw_plotbox,\n        )\n        self.verify_known_svg(\n            svg, \"ts_plain_y.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_with_xlabel(self, overwrite_viz, draw_plotbox):\n        \"\"\"\n        Style with X axis label\n        \"\"\"\n        ts = self.get_simple_ts()\n        x_label = \"genomic position (bp)\"\n        svg = ts.draw_svg(x_label=x_label, debug_box=draw_plotbox)\n        assert x_label in svg\n        self.verify_known_svg(\n            svg, \"ts_xlabel.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_y_axis(self, overwrite_viz, draw_plotbox):\n        tables = self.get_simple_ts().dump_tables()\n        # set units\n        tables.time_units = \"generations\"\n        ts = tables.tree_sequence()\n        svg = ts.draw_svg(y_axis=True, title=\"Y axis test\", debug_box=draw_plotbox)\n        assert \"Time ago (generations)\" in svg\n        self.verify_known_svg(svg, \"ts_y_axis.svg\", True, width=200 * ts.num_trees)\n\n    def test_known_svg_ts_y_axis_regular(self, overwrite_viz, draw_plotbox):\n        # This should have gridlines\n        ts = self.get_simple_ts()\n        ticks = np.arange(0, max(ts.tables.nodes.time), 1)\n        svg = ts.draw_svg(\n            y_axis=True, y_ticks=ticks, y_gridlines=True, debug_box=draw_plotbox\n        )\n        assert svg.count('class=\"grid\"') == len(ticks)\n        self.verify_known_svg(\n            svg, \"ts_y_axis_regular.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_y_axis_log(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(\n            y_axis=True,\n            y_label=\"Time (log scale)\",\n            time_scale=\"log_time\",\n            debug_box=draw_plotbox,\n        )\n        self.verify_known_svg(\n            svg, \"ts_y_axis_log.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_mutation_times(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts(use_mutation_times=True)\n        # also look at y_axis=\"right\"\n        svg = ts.draw_svg(debug_box=draw_plotbox, y_axis=\"right\")\n        assert svg.count('class=\"site ') == ts.num_sites\n        assert svg.count('class=\"mut ') == ts.num_mutations * 2\n        self.verify_known_svg(\n            svg, \"ts_mut_times.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_titles(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts(use_mutation_times=True)\n        svg = ts.draw_svg(\n            node_titles={nd.id: f\"NoDe{nd.id}!\" for nd in ts.nodes()},\n            mutation_titles={m.id: f\"MuT{m.id}!\" for m in ts.mutations()},\n            debug_box=draw_plotbox,\n        )\n        for nd in ts.nodes():\n            if nd.is_sample():\n                assert svg.count(f\"<title>NoDe{nd.id}!</title>\") == ts.num_trees\n            else:\n                assert f\"<title>NoDe{nd.id}!</title>\" in svg\n        for m in ts.mutations():\n            assert svg.count(f\"<title>MuT{m.id}!</title>\") == 2  # 1 on tree, 1 on x-axis\n        self.verify_known_svg(\n            svg, \"ts_mut_times_titles.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_mutation_times_logscale(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts(use_mutation_times=True)\n        svg = ts.draw_svg(time_scale=\"log_time\", debug_box=draw_plotbox)\n        assert svg.count('class=\"site ') == ts.num_sites\n        assert svg.count('class=\"mut ') == ts.num_mutations * 2\n        self.verify_known_svg(\n            svg, \"ts_mut_times_logscale.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n\n    def test_known_svg_ts_mut_no_edges(self, overwrite_viz, draw_plotbox):\n        # An example with some muts on axis but not on a visible node\n        ts = msprime.simulate(10, random_seed=2, mutation_rate=1)\n        tables = ts.dump_tables()\n        tables.edges.clear()\n        tables.mutations.time = np.full_like(tables.mutations.time, tskit.UNKNOWN_TIME)\n        ts_no_edges = tables.tree_sequence()\n        with pytest.warns(UserWarning, match=\"nodes which are not present\"):\n            svg = ts_no_edges.draw_svg(debug_box=draw_plotbox)\n            self.verify_known_svg(\n                svg,\n                \"ts_mutations_no_edges.svg\",\n                overwrite_viz,\n                width=200 * ts.num_trees,\n            )\n\n    def test_known_svg_ts_timed_mut_no_edges(self, overwrite_viz, draw_plotbox):\n        # An example with some muts on axis but not on a visible node\n        ts = msprime.simulate(10, random_seed=2, mutation_rate=1)\n        tables = ts.dump_tables()\n        tables.edges.clear()\n        tables.mutations.time = np.arange(\n            ts.num_mutations, dtype=tables.mutations.time.dtype\n        )\n        ts_no_edges = tables.tree_sequence()\n\n        with pytest.warns(UserWarning, match=\"nodes which are not present\"):\n            svg = ts_no_edges.draw_svg(debug_box=draw_plotbox)\n            self.verify_known_svg(\n                svg,\n                \"ts_mutations_timed_no_edges.svg\",\n                overwrite_viz,\n                width=200 * ts.num_trees,\n            )\n\n    def test_known_svg_ts_multiroot(self, overwrite_viz, draw_plotbox):\n        tables = wf.wf_sim(\n            6,\n            5,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=8,\n        )\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        tables = tsutil.jukes_cantor(ts, 10, mu=0.1, seed=123).dump_tables()\n        # Set unknown times, so we are msprime 0.7.4 and 1.0.0 compatible\n        tables.mutations.time = np.full(tables.mutations.num_rows, tskit.UNKNOWN_TIME)\n        svg = tables.tree_sequence().draw_svg(\n            y_axis=True, y_gridlines=True, debug_box=draw_plotbox\n        )\n        self.verify_known_svg(\n            svg, \"ts_multiroot.svg\", overwrite_viz, width=200 * ts.num_trees\n        )\n        assert \"Time ago (generations)\" in svg\n\n    def test_known_svg_ts_xlim(self, overwrite_viz, draw_plotbox):\n        ts = self.get_simple_ts()\n        svg = ts.draw_svg(x_lim=[0.051, 0.9], debug_box=draw_plotbox)\n        num_trees = sum(1 for b in ts.breakpoints() if 0.051 <= b < 0.9) + 1\n        self.verify_known_svg(svg, \"ts_x_lim.svg\", overwrite_viz, width=200 * num_trees)\n\n    @pytest.mark.skipif(IS_WINDOWS, reason=\"Msprime gives different result on Windows\")\n    def test_known_max_num_trees(self, overwrite_viz, draw_plotbox):\n        max_trees = 5\n        ts = msprime.sim_ancestry(\n            3, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        ts = msprime.sim_mutations(ts, rate=0.01, random_seed=1)\n        assert ts.num_trees > 10\n        first_break = next(ts.trees()).interval.right\n        # limit to just past the first tree\n        svg = ts.draw_svg(\n            max_num_trees=max_trees,\n            x_lim=(first_break + 0.1, ts.sequence_length - 0.1),\n            y_axis=True,\n            time_scale=\"log_time\",\n            debug_box=draw_plotbox,\n        )\n        self.verify_known_svg(\n            svg, \"ts_max_trees.svg\", overwrite_viz, width=200 * (max_trees + 1)\n        )\n\n    @pytest.mark.skipif(IS_WINDOWS, reason=\"Msprime gives different result on Windows\")\n    def test_known_max_num_trees_treewise(self, overwrite_viz, draw_plotbox):\n        max_trees = 5\n        ts = msprime.sim_ancestry(\n            3, sequence_length=100, recombination_rate=0.1, random_seed=1\n        )\n        ts = msprime.sim_mutations(ts, rate=0.01, random_seed=1)\n        assert ts.num_trees > 10\n        first_break = next(ts.trees()).interval.right\n        svg = ts.draw_svg(\n            max_num_trees=max_trees,\n            x_lim=(first_break + 0.1, ts.sequence_length - 0.1),\n            y_axis=True,\n            x_scale=\"treewise\",\n            debug_box=draw_plotbox,\n        )\n        self.verify_known_svg(\n            svg, \"ts_max_trees_treewise.svg\", overwrite_viz, width=200 * (max_trees + 1)\n        )\n\n    def test_known_svg_tree_collapsed(self, overwrite_viz, draw_plotbox):\n        tree = tskit.Tree.generate_balanced(8)\n        remove_nodes = set()\n        remove_nodes_below = {8, 13}\n        for u in remove_nodes_below:\n            subtree_nodes = set(tree.nodes(root=u)) - {u}\n            remove_nodes.update(subtree_nodes)\n        order = [\n            u for u in tree.nodes(order=\"minlex_postorder\") if u not in remove_nodes\n        ]\n        svg = tree.draw_svg(order=order, debug_box=draw_plotbox)\n        assert svg.count(\"multi\") == len(remove_nodes_below)\n        assert svg.count(\">+2<\") == 1  # One tip has 2 samples below it\n        assert svg.count(\">+4<\") == 1  # Another tip has 4 samples below it\n        for u in order:\n            assert f'n{u}\"' in svg or f\"n{u} \" in svg\n        for u in remove_nodes:\n            assert f'n{u}\"' not in svg and f\"n{u} \" not in svg\n        self.verify_known_svg(svg, \"tree_simple_collapsed.svg\", overwrite_viz)\n\n    def test_known_svg_tree_subtree(self, overwrite_viz, draw_plotbox):\n        tree = tskit.Tree.generate_balanced(8)\n        order = [u for u in tree.nodes(root=10, order=\"minlex_postorder\")]\n        # The balanced tree has all descendants of nodes 10 with IDs < 10\n        assert np.all(np.array(order) <= 10)\n        svg = tree.draw_svg(order=order, debug_box=draw_plotbox)\n        for u in order:\n            assert f'n{u}\"' in svg or f\"n{u} \" in svg\n        for u in set(tree.nodes()) - set(order):\n            assert f'n{u}\"' not in svg and f\"n{u} \" not in svg\n        self.verify_known_svg(svg, \"tree_subtree.svg\", overwrite_viz, has_root=False)\n\n    def test_known_svg_tree_subtrees_with_collapsed(self, overwrite_viz, draw_plotbox):\n        # Two subtrees, one with a collapsed node below node 16\n        tree = tskit.Tree.generate_balanced(16)\n        roots = [22, 25]\n        order = []\n        remove_nodes_below = 16\n        remove_nodes = set(tree.nodes(root=remove_nodes_below)) - {remove_nodes_below}\n        for root in roots:\n            order += [\n                u\n                for u in tree.nodes(root=root, order=\"minlex_postorder\")\n                if u not in remove_nodes\n            ]\n        svg = tree.draw_svg(order=order, debug_box=draw_plotbox)\n        assert svg.count(\"multi\") == 1  # One tip representing multiple nodes\n        for u in order:\n            assert f'n{u}\"' in svg or f\"n{u} \" in svg\n        for u in remove_nodes:\n            assert f'n{u}\"' not in svg and f\"n{u} \" not in svg\n        self.verify_known_svg(\n            svg, \"tree_subtrees_with_collapsed.svg\", overwrite_viz, has_root=False\n        )\n\n    def test_known_svg_tree_polytomy(self, overwrite_viz, draw_plotbox):\n        tracked_nodes = [20, 24, 25, 27, 28, 29]\n        tree = tskit.Tree.generate_balanced(30, arity=4)\n        svg = tree.draw_svg(\n            time_scale=\"rank\",\n            debug_box=draw_plotbox,\n            size=(600, 200),\n            style=\"\".join(f\".n{u} > .sym {{fill: cyan}}\" for u in tracked_nodes + [39]),\n        )\n        self.verify_known_svg(svg, \"tree_poly.svg\", overwrite_viz, width=600, height=200)\n\n    def test_known_svg_tree_polytomy_tracked(self, overwrite_viz, draw_plotbox):\n        tracked_nodes = [20, 24, 25, 27, 28, 29]\n        tree = tskit.Tree.generate_balanced(30, arity=4, tracked_samples=tracked_nodes)\n        svg = tree.draw_svg(\n            time_scale=\"rank\",\n            order=drawing._postorder_tracked_minlex_traversal(tree),\n            debug_box=draw_plotbox,\n            pack_untracked_polytomies=True,\n            size=(600, 200),\n            style=\"\".join(f\".n{u} > .sym {{fill: cyan}}\" for u in tracked_nodes + [39]),\n        )\n        self.verify_known_svg(\n            svg, \"tree_poly_tracked.svg\", overwrite_viz, width=600, height=200\n        )\n\n    def test_known_svg_tree_polytomy_tracked_collapse(self, overwrite_viz, draw_plotbox):\n        tracked_nodes = [20, 24, 25, 27, 28, 29]\n        tree = tskit.Tree.generate_balanced(30, arity=4, tracked_samples=tracked_nodes)\n        svg = tree.draw_svg(\n            time_scale=\"rank\",\n            order=drawing._postorder_tracked_minlex_traversal(\n                tree, collapse_tracked=True\n            ),\n            debug_box=draw_plotbox,\n            size=(600, 200),\n            pack_untracked_polytomies=True,\n            style=\"\".join(f\".n{u} > .sym {{fill: cyan}}\" for u in tracked_nodes + [39]),\n        )\n        self.verify_known_svg(\n            svg, \"tree_poly_tracked_collapse.svg\", overwrite_viz, width=600, height=200\n        )\n\n\nclass TestRounding:\n    def test_rnd(self):\n        assert 0 == drawing.rnd(0)\n        assert math.inf == drawing.rnd(math.inf)\n        assert 1 == drawing.rnd(1)\n        assert 1.1 == drawing.rnd(1.1)\n        assert 1.11111 == drawing.rnd(1.111111)\n        assert 1111110 == drawing.rnd(1111111)\n        assert 123.457 == drawing.rnd(123.4567)\n        assert 123.456 == drawing.rnd(123.4564)\n\n\nclass TestDrawingTraversals:\n    # TODO: test drawing._postorder_tracked_minlex_traversal and\n    # drawing._postorder_tracked_node_traversal\n    pass\n"
  },
  {
    "path": "python/tests/test_extend_haplotypes.py",
    "content": "import msprime\nimport numpy as np\nimport pytest\n\nimport _tskit\nimport tests.test_wright_fisher as wf\nimport tskit\nfrom tests import tsutil\nfrom tests.tsutil import get_example_tree_sequences\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this.\n\n\ndef _slide_mutation_nodes_up(ts, mutations):\n    # adjusts mutations' nodes to place each mutation on the correct edge given\n    # their time; requires mutation times be nonmissing and the mutation times\n    # be >= their nodes' times.\n\n    assert np.all(~tskit.is_unknown_time(mutations.time)), \"times must be known\"\n    new_nodes = mutations.node.copy()\n\n    mut = 0\n    for tree in ts.trees():\n        _, right = tree.interval\n        while (\n            mut < mutations.num_rows and ts.sites_position[mutations.site[mut]] < right\n        ):\n            t = mutations.time[mut]\n            c = mutations.node[mut]\n            p = tree.parent(c)\n            assert ts.nodes_time[c] <= t\n            while p != -1 and ts.nodes_time[p] <= t:\n                c = p\n                p = tree.parent(c)\n            assert ts.nodes_time[c] <= t\n            if p != -1:\n                assert t < ts.nodes_time[p]\n            new_nodes[mut] = c\n            mut += 1\n\n    # in C the node column can be edited in place\n    new_mutations = mutations.copy()\n    new_mutations.clear()\n    for mut, n in zip(mutations, new_nodes):\n        new_mutations.append(mut.replace(node=n))\n\n    return new_mutations\n\n\ndef print_edge_list(head, edges, left, right):\n    print(\"Edge list:\")\n    for j, (e, x) in enumerate(head):\n        print(\n            f\"  {j}: {e} ({x}); \"\n            + (\n                f\"{edges.child[e]}->{edges.parent[e]} on [{left[e]}, {right[e]})\"\n                if e >= 0\n                else \"(null)\"\n            )\n        )\n    print(f\"length = {len(head)}\")\n\n\nclass HaplotypeExtender:\n    def __init__(self, ts, forwards):\n        \"\"\"\n        Below we will iterate through the trees, either to the left or the right,\n        keeping the following state consistent:\n        - we are moving from a previous tree, last_tree, to new one, next_tree\n        - here: the position that separates the last_tree from the next_tree\n        - (here, there): the segment covered by next_tree\n        - edges_out: edges to be removed from last_tree to get next_tree\n        - parent_out: the forest induced by edges_out, a subset of last_tree\n        - edges_in: edges to be added to last_tree to get next_tree\n        - parent_in: the forest induced by edges_in, a subset of next_tree\n        - next_degree: the degree of each node in next_tree\n        - next_nodes_edge: for each node, the edge above it in next_tree\n        - last_degree: the degree of each node in last_tree\n        - last_nodes_edge: for each node, the edge above it in last_tree\n        Except: each of edges_in and edges_out is of the form e, x, and the\n        label x>0 if the edge is postponed to the next segment.\n        The label is x=1 for postponed edges, and x=2 for new edges.\n        In other words:\n        - elements e, x of edges_out with x=0 are in last_tree but not next_tree\n        - elements e, x of edges_in with x=0 are in next_tree but not last_tree\n        - elements e, x of edges_out with x=1 are in both trees,\n            and hence don't count for parent_out\n        - elements e, x of edges_in with x=1 are in neither,\n            and hence don't count for parent_in\n        - elements e, x for edges_out with x=2 have just been added, and so ought\n            to count towards the next tree, but we have to put them in edges out\n            because they'll be removed next time.\n        Notes:\n        - things having to do with last_tree do not change,\n          but things having to do with next_tree might change as we go along\n        - parent_out and parent_in do not refer to the *entire* last/next_tree,\n          but rather to *only* the edges_in/edges_out\n        Edges in can have one of three things happen to them:\n        1. they get added to the next tree, as usual;\n        2. they get postponed to the tree after the next tree,\n            and are thus part of edges_in again next time;\n        3. they get postponed but run out of span so they dissapear entirely.\n        Edges out are similarly of four varieties:\n        0. they are also in case (3) of edges_in, i.e., their extent was modified\n            when they were in edges_in so that they now have left=right;\n        1. they get removed from the last tree, as usual;\n        2. they get extended to the next tree,\n            and are thus part of edges_out again next time;\n        3. they are in fact a newly added edge, and so are part of edges_out next time.\n        \"\"\"\n        self.ts = ts\n        self.edges = ts.tables.edges.copy()\n        self.new_left = ts.edges_left.copy()\n        self.new_right = ts.edges_right.copy()\n        self.last_degree = np.full(ts.num_nodes, 0, dtype=\"int\")\n        self.next_degree = np.full(ts.num_nodes, 0, dtype=\"int\")\n        self.parent_out = np.full(ts.num_nodes, -1, dtype=\"int\")\n        self.parent_in = np.full(ts.num_nodes, -1, dtype=\"int\")\n        self.not_sample = [not n.is_sample() for n in ts.nodes()]\n        self.next_nodes_edge = np.full(ts.num_nodes, -1, dtype=\"int\")\n        self.last_nodes_edge = np.full(ts.num_nodes, -1, dtype=\"int\")\n\n        if forwards:\n            self.direction = 1\n            # in C we can just modify these in place, but in\n            # python they are (silently) immutable\n            self.near_side = list(self.new_left)\n            self.far_side = list(self.new_right)\n        else:\n            self.direction = -1\n            self.near_side = list(self.new_right)\n            self.far_side = list(self.new_left)\n\n        self.edges_out = []\n        self.edges_in = []\n\n    def print_state(self):\n        print(\"~~~~~~~~~~~~~~~~~~~~~~~~\")\n        print(\"edges in:\", self.edges_in)\n        print(\"parent out:\")\n        for j, pj in enumerate(self.parent_out):\n            print(f\"   {j}: {pj}\")\n        print(\"parent in:\")\n        for j, pj in enumerate(self.parent_in):\n            print(f\"   {j}: {pj}\")\n        print(\"edges out:\", self.edges_out)\n        print(\"parent out:\", self.parent_out)\n        print(\"last nodes edge:\")\n        for j, ej in enumerate(self.last_nodes_edge):\n            print(\n                f\"   {j}: {ej}, \"\n                + (\n                    \"(null)\"\n                    if ej == -1\n                    else (\n                        f\"({self.edges.child[ej]}->{self.edges.parent[ej]}, \"\n                        \"{self.near_side[ej]}-{self.far_side[ej]}\"\n                    )\n                )\n            )\n        for e, _ in self.edges_out:\n            print(\n                \"edge out:   \",\n                \"e =\",\n                e,\n                \"c =\",\n                self.edges.child[e],\n                \"p =\",\n                self.edges.parent[e],\n                self.near_side[e],\n                self.far_side[e],\n            )\n\n    def next_tree(self, tree_pos):\n        # Clear out non-extended or postponed edges:\n        # Note: maintaining parent_out is a bit tricky, because\n        # if an edge from p->c has been extended, entirely replacing\n        # another edge from p'->c, then both edges may be in edges_out,\n        # and we only want to include the *first* one.\n\n        for e, x in self.edges_out:\n            self.parent_out[self.edges.child[e]] = tskit.NULL\n            if x > 1:\n                # this is needed to catch newly-created edges\n                self.last_nodes_edge[self.edges.child[e]] = e\n                self.last_degree[self.edges.child[e]] += 1\n                self.last_degree[self.edges.parent[e]] += 1\n            elif x == 0 and self.near_side[e] != self.far_side[e]:\n                self.last_nodes_edge[self.edges.child[e]] = tskit.NULL\n                self.last_degree[self.edges.child[e]] -= 1\n                self.last_degree[self.edges.parent[e]] -= 1\n        tmp = []\n        for e, x in self.edges_out:\n            if x > 0:\n                tmp.append([e, 0])\n        self.edges_out = tmp\n        for e, x in self.edges_in:\n            self.parent_in[self.edges.child[e]] = tskit.NULL\n            if x == 0 and self.near_side[e] != self.far_side[e]:\n                assert self.last_nodes_edge[self.edges.child[e]] == tskit.NULL\n                self.last_nodes_edge[self.edges.child[e]] = e\n                self.last_degree[self.edges.child[e]] += 1\n                self.last_degree[self.edges.parent[e]] += 1\n        tmp = []\n        for e, x in self.edges_in:\n            if x > 0:\n                tmp.append([e, 0])\n        self.edges_in = tmp\n\n        # done cleanup from last tree transition;\n        # now we update the state to reflect the current tree transition\n        for j in range(\n            tree_pos.out_range.start, tree_pos.out_range.stop, self.direction\n        ):\n            e = tree_pos.out_range.order[j]\n            if (self.parent_out[self.edges.child[e]] == tskit.NULL) and (\n                self.near_side[e] != self.far_side[e]\n            ):\n                self.edges_out.append([e, 0])\n\n        for e, _ in self.edges_out:\n            self.parent_out[self.edges.child[e]] = self.edges.parent[e]\n            self.next_nodes_edge[self.edges.child[e]] = tskit.NULL\n            self.next_degree[self.edges.child[e]] -= 1\n            self.next_degree[self.edges.parent[e]] -= 1\n\n        for j in range(tree_pos.in_range.start, tree_pos.in_range.stop, self.direction):\n            e = tree_pos.in_range.order[j]\n            self.edges_in.append([e, 0])\n\n        for e, _ in self.edges_in:\n            self.parent_in[self.edges.child[e]] = self.edges.parent[e]\n            assert self.next_nodes_edge[self.edges.child[e]] == tskit.NULL\n            self.next_nodes_edge[self.edges.child[e]] = e\n            self.next_degree[self.edges.child[e]] += 1\n            self.next_degree[self.edges.parent[e]] += 1\n\n    def check_state_at(self, pos, before, degree, nodes_edge):\n        # if before=True then we construct the state at epsilon-on-near-side-of `pos`,\n        # otherwise, at epsilon-on-far-side-of `pos`.\n        check_degree = np.zeros(self.ts.num_nodes, dtype=\"int\")\n        check_nodes_edge = np.full(self.ts.num_nodes, -1, dtype=\"int\")\n        assert len(self.near_side) == self.edges.num_rows\n        assert len(self.far_side) == self.edges.num_rows\n        for j, (e, l, r) in enumerate(zip(self.edges, self.near_side, self.far_side)):\n            overlaps = (l != r) and (\n                ((pos - l) * (r - pos) > 0)\n                or (r == pos and before)\n                or (l == pos and not before)\n            )\n            if overlaps:\n                check_degree[e.child] += 1\n                check_degree[e.parent] += 1\n                assert check_nodes_edge[e.child] == tskit.NULL\n                check_nodes_edge[e.child] = j\n        np.testing.assert_equal(check_nodes_edge, nodes_edge)\n        np.testing.assert_equal(check_degree, degree)\n\n    def check_parent(self, parent, edge_ids):\n        temp_parent = np.full(self.ts.num_nodes, -1, dtype=\"int\")\n        for j in edge_ids:\n            c = self.edges.child[j]\n            p = self.edges.parent[j]\n            temp_parent[c] = p\n        np.testing.assert_equal(temp_parent, parent)\n\n    def check_state(self, here):\n        for e, x in self.edges_in:\n            assert x == 0\n            assert self.near_side[e] != self.far_side[e]\n        for e, x in self.edges_out:\n            assert x == 0\n            assert self.near_side[e] != self.far_side[e]\n        self.check_state_at(here, False, self.next_degree, self.next_nodes_edge)\n        self.check_state_at(here, True, self.last_degree, self.last_nodes_edge)\n        self.check_parent(self.parent_in, [j for j, x in self.edges_in if x == 0])\n        self.check_parent(self.parent_out, [j for j, x in self.edges_out if x == 0])\n\n    def add_or_extend_edge(self, new_parent, child, left, right):\n        there = right if (self.direction == 1) else left\n        old_edge = self.next_nodes_edge[child]\n        if old_edge != tskit.NULL:\n            old_parent = self.edges.parent[old_edge]\n        else:\n            old_parent = tskit.NULL\n        if new_parent != old_parent:\n            # if our new edge is in edges_out, it should be extended\n            if self.parent_out[child] == new_parent:\n                e_out = self.last_nodes_edge[child]\n                assert e_out >= 0\n                assert self.edges.child[e_out] == child\n                assert self.edges.parent[e_out] == new_parent\n                self.far_side[e_out] = there\n                assert self.near_side[e_out] != self.far_side[e_out]\n                for ex_out in self.edges_out:\n                    if ex_out[0] == e_out:\n                        break\n                assert ex_out[0] == e_out\n                ex_out[1] = 1\n            else:\n                e_out = self.add_edge(new_parent, child, left, right)\n                self.edges_out.append([e_out, 2])\n            # If we're replacing the edge above this node, it must be in edges_in;\n            # note that this assertion excludes the case that we're interrupting\n            # an existing edge.\n            assert (self.next_nodes_edge[child] == tskit.NULL) or (\n                self.next_nodes_edge[child] in [e for e, _ in self.edges_in]\n            )\n            self.next_nodes_edge[child] = e_out\n            self.next_degree[child] += 1\n            self.next_degree[new_parent] += 1\n            self.parent_out[child] = tskit.NULL\n            if old_edge != tskit.NULL:\n                for ex_in in self.edges_in:\n                    e_in = ex_in[0]\n                    if e_in == old_edge and (ex_in[1] == 0):\n                        self.near_side[e_in] = there\n                        if self.far_side[e_in] != there:\n                            ex_in[1] = 1\n                        self.next_nodes_edge[child] = tskit.NULL\n                        self.next_degree[child] -= 1\n                        self.next_degree[self.parent_in[child]] -= 1\n                        self.parent_in[child] = tskit.NULL\n\n    def add_edge(self, parent, child, left, right):\n        new_id = self.edges.add_row(parent=parent, child=child, left=left, right=right)\n        # this appending should not be necessary in C\n        if self.direction == 1:\n            self.near_side.append(left)\n            self.far_side.append(right)\n        else:\n            self.near_side.append(right)\n            self.far_side.append(left)\n        return new_id\n\n    def mergeable(self, c):\n        # returns a finite number of new edges needed\n        # if the paths in parent_in and parent_out\n        # up through nodes that aren't in the other tree\n        # end at the same place and don't have conflicting times;\n        # otherwise, returns Inf\n        p_out = self.parent_out[c]\n        p_in = self.parent_in[c]\n        t_out = np.inf if p_out == tskit.NULL else self.ts.nodes_time[p_out]\n        t_in = np.inf if p_in == tskit.NULL else self.ts.nodes_time[p_in]\n        child = c\n        num_new_edges = 0\n        num_extended = 0\n        while True:\n            climb_in = (\n                p_in != tskit.NULL\n                and self.last_degree[p_in] == 0\n                and self.not_sample[p_in]\n                and t_in < t_out\n            )\n            climb_out = (\n                p_out != tskit.NULL\n                and self.next_degree[p_out] == 0\n                and self.not_sample[p_out]\n                and t_out < t_in\n            )\n            if climb_in:\n                if self.parent_in[child] != p_in and self.parent_out[child] != p_in:\n                    num_new_edges += 1\n                child = p_in\n                p_in = self.parent_in[p_in]\n                t_in = np.inf if p_in == tskit.NULL else self.ts.nodes_time[p_in]\n            elif climb_out:\n                if self.parent_in[child] != p_out and self.parent_out[child] != p_out:\n                    num_new_edges += 1\n                child = p_out\n                p_out = self.parent_out[p_out]\n                t_out = np.inf if p_out == tskit.NULL else self.ts.nodes_time[p_out]\n                num_extended += 1\n            else:\n                break\n        if num_extended == 0 or p_in != p_out or p_in == tskit.NULL:\n            num_new_edges = np.inf\n        return num_new_edges\n\n    def merge_paths(self, c, left, right):\n        p_out = self.parent_out[c]\n        p_in = self.parent_in[c]\n        t_out = self.ts.nodes_time[p_out]\n        t_in = self.ts.nodes_time[p_in]\n        child = c\n        while True:\n            climb_in = (\n                p_in != tskit.NULL\n                and self.last_degree[p_in] == 0\n                and self.not_sample[p_in]\n                and t_in < t_out\n            )\n            climb_out = (\n                p_out != tskit.NULL\n                and self.next_degree[p_out] == 0\n                and self.not_sample[p_out]\n                and t_out < t_in\n            )\n            if climb_in:\n                self.add_or_extend_edge(p_in, child, left, right)\n                child = p_in\n                p_in = self.parent_in[p_in]\n                t_in = np.inf if p_in == tskit.NULL else self.ts.nodes_time[p_in]\n            elif climb_out:\n                self.add_or_extend_edge(p_out, child, left, right)\n                child = p_out\n                p_out = self.parent_out[p_out]\n                t_out = np.inf if p_out == tskit.NULL else self.ts.nodes_time[p_out]\n            else:\n                break\n        assert p_out == p_in\n        self.add_or_extend_edge(p_out, child, left, right)\n\n    def extend_haplotypes(self):\n        tree_pos = tsutil.TreeIndexes(self.ts)\n        if self.direction == 1:\n            valid = tree_pos.next()\n        else:\n            valid = tree_pos.prev()\n        while valid:\n            left, right = tree_pos.interval\n            # there = right if self.direction == 1 else left\n            here = left if self.direction == 1 else right\n            self.next_tree(tree_pos)\n            self.check_state(here)\n            max_new_edges = 0\n            next_max_new_edges = np.inf\n            while max_new_edges < np.inf:\n                for e_in, x in self.edges_in:\n                    if x == 0:\n                        c = self.edges.child[e_in]\n                        assert self.next_degree[c] > 0\n                        if self.last_degree[c] > 0:\n                            ne = self.mergeable(c)\n                            if ne <= max_new_edges:\n                                self.merge_paths(c, left, right)\n                            else:\n                                next_max_new_edges = min(ne, next_max_new_edges)\n                max_new_edges = next_max_new_edges\n                next_max_new_edges = np.inf\n            # end of loop, next tree\n            if self.direction == 1:\n                valid = tree_pos.next()\n            else:\n                valid = tree_pos.prev()\n        if self.direction == 1:\n            self.new_left = np.array(self.near_side)\n            self.new_right = np.array(self.far_side)\n        else:\n            self.new_right = np.array(self.near_side)\n            self.new_left = np.array(self.far_side)\n        # Get rid of adjacent, identical edges\n        keep = np.full(self.edges.num_rows, True, dtype=bool)\n        for j in range(self.edges.num_rows - 1):\n            if (\n                self.edges.parent[j] == self.edges.parent[j + 1]\n                and self.edges.child[j] == self.edges.child[j + 1]\n                and self.new_right[j] == self.new_left[j + 1]\n            ):\n                self.new_right[j] = self.new_right[j + 1]\n                self.new_left[j + 1] = self.new_right[j + 1]\n        for j in range(self.edges.num_rows):\n            left = self.new_left[j]\n            right = self.new_right[j]\n            if left < right:\n                self.edges[j] = self.edges[j].replace(left=left, right=right)\n            else:\n                keep[j] = False\n        self.edges.keep_rows(keep)\n\n\ndef extend_haplotypes(ts, max_iter=10):\n    tables = ts.dump_tables()\n    mutations = tables.mutations.copy()\n    tables.mutations.clear()\n\n    last_num_edges = ts.num_edges\n    for _ in range(max_iter):\n        for forwards in [True, False]:\n            extender = HaplotypeExtender(ts, forwards=forwards)\n            extender.extend_haplotypes()\n            tables.edges.replace_with(extender.edges)\n            tables.sort(mutation_start=tables.mutations.num_rows)\n            tables.build_index()\n            ts = tables.tree_sequence()\n        if ts.num_edges == last_num_edges:\n            break\n        else:\n            last_num_edges = ts.num_edges\n\n    tables = ts.dump_tables()\n    mutations = _slide_mutation_nodes_up(ts, mutations)\n    tables.mutations.replace_with(mutations)\n    ts = tables.tree_sequence()\n    return ts\n\n\ndef _path_pairs(tree):\n    for c in tree.postorder():\n        p = tree.parent(c)\n        while p != tskit.NULL:\n            yield (c, p)\n            p = tree.parent(p)\n\n\ndef _path_up(c, p, tree, include_parent=False):\n    # path from c up to p in tree, not including c or p\n    c = tree.parent(c)\n    while c != p and c != tskit.NULL:\n        yield c\n        c = tree.parent(c)\n    assert c == p\n    if include_parent:\n        yield p\n\n\ndef _path_up_pairs(c, p, tree, others):\n    # others should be a list of nodes\n    otherdict = {tree.time(n): n for n in others}\n    ot = min(otherdict)\n    for n in _path_up(c, p, tree, include_parent=True):\n        nt = tree.time(n)\n        while ot < nt:\n            on = otherdict.pop(ot)\n            yield c, on\n            c = on\n            if len(otherdict) > 0:\n                ot = min(otherdict)\n            else:\n                ot = np.inf\n        yield c, n\n        c = n\n    assert n == p\n    assert len(otherdict) == 0\n\n\ndef _path_overlaps(c, p, tree1, tree2):\n    for n in _path_up(c, p, tree1):\n        if n in tree2.nodes():\n            return True\n    return False\n\n\ndef _paths_mergeable(c, p, tree1, tree2):\n    # checks that the nodes between c and p in each tree\n    # are not present in the other tree\n    # and their sets of times are disjoint\n    nodes1 = set(tree1.nodes())\n    nodes2 = set(tree2.nodes())\n    assert c in nodes1, f\"child node {c} not in tree1\"\n    assert p in nodes1, f\"parent node {p} not in tree1\"\n    assert c in nodes2, f\"child node {c} not in tree2\"\n    assert p in nodes2, f\"parent node {p} not in tree2\"\n    path1 = set(_path_up(c, p, tree1))\n    path2 = set(_path_up(c, p, tree2))\n    times1 = {tree1.time(n) for n in path1}\n    times2 = {tree2.time(n) for n in path2}\n    return (\n        (not _path_overlaps(c, p, tree1, tree2))\n        and (not _path_overlaps(c, p, tree2, tree1))\n        and len(times1.intersection(times2)) == 0\n    )\n\n\ndef _extend_nodes(ts, interval, extendable):\n    tables = ts.dump_tables()\n    tables.edges.clear()\n    mutations = tables.mutations.copy()\n    tables.mutations.clear()\n    left, right = interval\n    # print(\"=================\")\n    # print(\"extending\", left, right)\n    extend_above = {}  # gives the new child->parent mapping\n    todo_edges = np.repeat(True, ts.num_edges)\n    tree = ts.at(left)\n    for c, p, others in extendable:\n        # print(\"c:\", c, \"p:\", p, \"others:\", others)\n        others_not_done_yet = set(others) - set(extend_above)\n        if len(others_not_done_yet) > 0:\n            for cn, pn in _path_up_pairs(c, p, tree, others_not_done_yet):\n                if cn not in extend_above:\n                    assert cn not in extend_above\n                    extend_above[cn] = pn\n    for c, p in extend_above.items():\n        e = tree.edge(c)\n        if e == tskit.NULL or ts.edge(e).parent != p:\n            # print(\"adding\", c, p)\n            tables.edges.add_row(child=c, parent=p, left=left, right=right)\n            if e != tskit.NULL:\n                edge = ts.edge(e)\n                # adjust endpoints on existing edge\n                for el, er in [\n                    (max(edge.left, right), edge.right),\n                    (edge.left, min(edge.right, left)),\n                ]:\n                    if el < er:\n                        # print(\"replacing\", edge, el, er)\n                        tables.edges.append(edge.replace(left=el, right=er))\n                todo_edges[e] = False\n    for todo, edge in zip(todo_edges, ts.edges()):\n        if todo:\n            # print(\"retaining\", edge)\n            tables.edges.append(edge)\n    tables.sort()\n    ts = tables.tree_sequence()\n    mutations = _slide_mutation_nodes_up(ts, mutations)\n    tables.mutations.replace_with(mutations)\n    tables.sort()\n    return tables.tree_sequence()\n\n\ndef _naive_pass(ts, direction):\n    assert direction in (-1, +1)\n    num_trees = ts.num_trees\n    if direction == +1:\n        indexes = range(0, num_trees - 1, 1)\n    else:\n        indexes = range(num_trees - 1, 0, -1)\n    for tj in indexes:\n        extendable = []\n        this_tree = ts.at_index(tj)\n        next_tree = ts.at_index(tj + direction)\n        # print(\"-----------\", this_tree.index)\n        # print(this_tree.draw_text())\n        # print(next_tree.draw_text())\n        for c, p in _path_pairs(this_tree):\n            if (\n                p != this_tree.parent(c)\n                and p in next_tree.nodes()\n                and c in next_tree.nodes(p)\n            ):\n                # print(c, p, \" and \", list(next_tree.nodes(p)))\n                if _paths_mergeable(c, p, this_tree, next_tree):\n                    extendable.append((c, p, list(_path_up(c, p, this_tree))))\n        # print(\"extending to\", extendable)\n        ts = _extend_nodes(ts, next_tree.interval, extendable)\n        assert num_trees == ts.num_trees\n    return ts\n\n\ndef naive_extend_haplotypes(ts, max_iter=20):\n    for _ in range(max_iter):\n        ets = _naive_pass(ts, +1)\n        ets = _naive_pass(ets, -1)\n        if ets == ts:\n            break\n        ts = ets\n    return ts\n\n\nclass TestExtendThings:\n    \"\"\"\n    Common utilities in the two classes below.\n    \"\"\"\n\n    def verify_simplify_equality(self, ts, ets):\n        assert ts.num_nodes == ets.num_nodes\n        assert ts.num_samples == ets.num_samples\n        t = ts.simplify().tables\n        et = ets.simplify().tables\n        et.assert_equals(t, ignore_provenance=True)\n        assert np.all(ts.genotype_matrix() == ets.genotype_matrix())\n\n    def naive_verify(self, ts):\n        ets = naive_extend_haplotypes(ts)\n        self.verify_simplify_equality(ts, ets)\n\n\nclass TestExtendHaplotypes(TestExtendThings):\n    \"\"\"\n    Test the 'extend_haplotypes' method.\n    \"\"\"\n\n    def get_example1(self):\n        # 15.00|         |   13    |         |\n        #      |         |    |    |         |\n        # 12.00|   10    |   10    |    10   |\n        #      |  +-+-+  |  +-+-+  |   +-+-+ |\n        # 10.00|  8   |  |  |   |  |   8   | |\n        #      |  |   |  |  |   |  |  ++-+ | |\n        # 8.00 |  |   |  | 11  12  |  |  | | |\n        #      |  |   |  |  |   |  |  |  | | |\n        # 6.00 |  |   |  |  7   |  |  |  | | |\n        #      |  |   |  |  |   |  |  |  | | |\n        # 4.00 |  6   9  |  |   |  |  |  | | |\n        #      |  |   |  |  |   |  |  |  | | |\n        # 1.00 |  4   5  |  4   5  |  4  | 5 |\n        #      | +++ +++ | +++ +++ | +++ | | |\n        # 0.00 | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 |\n        #      0         3         6         9\n        node_times = {\n            0: 0,\n            1: 0,\n            2: 0,\n            3: 0,\n            4: 1,\n            5: 1,\n            6: 4,\n            7: 6,\n            8: 10,\n            9: 4,\n            10: 12,\n            11: 8,\n            12: 8,\n            13: 15,\n        }\n        # (p,c,l,r)\n        edges = [\n            (4, 0, 0, 9),\n            (4, 1, 0, 9),\n            (5, 2, 0, 6),\n            (5, 3, 0, 9),\n            (6, 4, 0, 3),\n            (9, 5, 0, 3),\n            (7, 4, 3, 6),\n            (11, 7, 3, 6),\n            (12, 5, 3, 6),\n            (8, 2, 6, 9),\n            (8, 4, 6, 9),\n            (8, 6, 0, 3),\n            (10, 5, 6, 9),\n            (10, 8, 0, 3),\n            (10, 8, 6, 9),\n            (10, 9, 0, 3),\n            (10, 11, 3, 6),\n            (10, 12, 3, 6),\n            (13, 10, 3, 6),\n        ]\n        extended_edges = [\n            (4, 0, 0.0, 9.0),\n            (4, 1, 0.0, 9.0),\n            (5, 2, 0.0, 6.0),\n            (5, 3, 0.0, 9.0),\n            (6, 4, 0.0, 9.0),\n            (9, 5, 0.0, 9.0),\n            (7, 6, 0.0, 9.0),\n            (11, 7, 0.0, 9.0),\n            (12, 9, 0.0, 9.0),\n            (8, 2, 6.0, 9.0),\n            (8, 11, 0.0, 9.0),\n            (10, 8, 0.0, 9.0),\n            (10, 12, 0.0, 9.0),\n            (13, 10, 3.0, 6.0),\n        ]\n        samples = list(np.arange(4))\n        tables = tskit.TableCollection(sequence_length=9)\n        for (\n            n,\n            t,\n        ) in node_times.items():\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ts = tables.tree_sequence()\n        tables.edges.clear()\n        for p, c, l, r in extended_edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ets = tables.tree_sequence()\n        assert ts.num_edges == 19\n        assert ets.num_edges == 14\n        return ts, ets\n\n    def get_example2(self):\n        # 12.00|                     |          21         |                     |\n        #      |                     |      +----+-----+   |                     |\n        # 11.00|            20       |      |          |   |            20       |\n        #      |        +----+---+   |      |          |   |        +----+---+   |\n        # 10.00|        |       19   |      |         19   |        |       19   |\n        #      |        |       ++-+ |      |        +-+-+ |        |       ++-+ |\n        # 9.00 |       18       |  | |     18        |   | |       18       |  | |\n        #      |     +--+--+    |  | |   +--+--+     |   | |     +--+--+    |  | |\n        # 8.00 |     |     |    |  | |   |     |     |   | |    17     |    |  | |\n        #      |     |     |    |  | |   |     |     |   | |   +-+-+   |    |  | |\n        # 7.00 |     |     |   16  | |   |     |    16   | |   |   |   |    |  | |\n        #      |     |     |   +++ | |   |     |   +-++  | |   |   |   |    |  | |\n        # 6.00 |    15     |   | | | |   |     |   |  |  | |   |   |   |    |  | |\n        #      |   +-+-+   |   | | | |   |     |   |  |  | |   |   |   |    |  | |\n        # 5.00 |   |   |  14   | | | |   |    14   |  |  | |   |   |  14    |  | |\n        #      |   |   |  ++-+ | | | |   |    ++-+ |  |  | |   |   |  ++-+  |  | |\n        # 4.00 |  13   |  |  | | | | |  13    |  | |  |  | |  13   |  |  |  |  | |\n        #      |  ++-+ |  |  | | | | |  ++-+  |  | |  |  | |  ++-+ |  |  |  |  | |\n        # 3.00 |  |  | |  |  | | | | |  |  |  |  | | 12  | |  |  | |  |  | 12  | |\n        #      |  |  | |  |  | | | | |  |  |  |  | | +++ | |  |  | |  |  | +++ | |\n        # 2.00 | 11  | |  |  | | | | | 11  |  |  | | | | | | 11  | |  |  | | | | |\n        #      | +++ | |  |  | | | | | +++ |  |  | | | | | | +++ | |  |  | | | | |\n        # 1.00 | | | | | 10  | | | | | | | | 10  | | | | | | | | | | 10  | | | | |\n        #      | | | | | +++ | | | | | | | | +++ | | | | | | | | | | +++ | | | | |\n        # 0.00 | 0 7 4 9 2 5 6 1 3 8 | 0 7 4 2 5 6 1 3 9 8 | 0 7 4 1 2 5 6 3 9 8 |\n        #      0                     3                     6                     9\n        node_times = {\n            0: 0,\n            1: 0,\n            2: 0,\n            3: 0,\n            4: 0,\n            5: 0,\n            6: 0,\n            7: 0,\n            8: 0,\n            9: 0,\n            10: 1,\n            11: 2,\n            12: 3,\n            13: 4,\n            14: 5,\n            15: 6,\n            16: 7,\n            17: 8,\n            18: 9,\n            19: 10,\n            20: 11,\n            21: 12,\n        }\n        # (p,c,l,r)\n        edges = [\n            (10, 2, 0, 9),\n            (10, 5, 0, 9),\n            (11, 0, 0, 9),\n            (11, 7, 0, 9),\n            (12, 3, 3, 9),\n            (12, 9, 3, 9),\n            (13, 4, 0, 9),\n            (13, 11, 0, 9),\n            (14, 6, 0, 9),\n            (14, 10, 0, 9),\n            (15, 9, 0, 3),\n            (15, 13, 0, 3),\n            (16, 1, 0, 6),\n            (16, 3, 0, 3),\n            (16, 12, 3, 6),\n            (17, 1, 6, 9),\n            (17, 13, 6, 9),\n            (18, 13, 3, 6),\n            (18, 14, 0, 9),\n            (18, 15, 0, 3),\n            (18, 17, 6, 9),\n            (19, 8, 0, 9),\n            (19, 12, 6, 9),\n            (19, 16, 0, 6),\n            (20, 18, 0, 3),\n            (20, 18, 6, 9),\n            (20, 19, 0, 3),\n            (20, 19, 6, 9),\n            (21, 18, 3, 6),\n            (21, 19, 3, 6),\n        ]\n        extended_edges = [\n            (10, 2, 0.0, 9.0),\n            (10, 5, 0.0, 9.0),\n            (11, 0, 0.0, 9.0),\n            (11, 7, 0.0, 9.0),\n            (12, 3, 0.0, 9.0),\n            (12, 9, 3.0, 9.0),\n            (13, 4, 0.0, 9.0),\n            (13, 11, 0.0, 9.0),\n            (14, 6, 0.0, 9.0),\n            (14, 10, 0.0, 9.0),\n            (15, 9, 0.0, 3.0),\n            (15, 13, 0.0, 9.0),\n            (16, 1, 0.0, 6.0),\n            (16, 12, 0.0, 9.0),\n            (17, 1, 6.0, 9.0),\n            (17, 15, 0.0, 9.0),\n            (18, 14, 0.0, 9.0),\n            (18, 17, 0.0, 9.0),\n            (19, 8, 0.0, 9.0),\n            (19, 16, 0.0, 9.0),\n            (20, 18, 0.0, 3.0),\n            (20, 18, 6.0, 9.0),\n            (20, 19, 0.0, 3.0),\n            (20, 19, 6.0, 9.0),\n            (21, 18, 3.0, 6.0),\n            (21, 19, 3.0, 6.0),\n        ]\n        samples = list(np.arange(10))\n        tables = tskit.TableCollection(sequence_length=9)\n        for (\n            n,\n            t,\n        ) in node_times.items():\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ts = tables.tree_sequence()\n        tables.edges.clear()\n        for p, c, l, r in extended_edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ets = tables.tree_sequence()\n        assert ts.num_edges == 30\n        assert ets.num_edges == 26\n        return ts, ets\n\n    def get_example3(self):\n        # Here is the full tree; extend edges should be able to\n        # recover all unary nodes after simplification:\n        #\n        #       9         9         9          9\n        #     +-+-+    +--+--+  +---+---+  +-+-+--+\n        #     8   |    8     |  8   |   |  8 | |  |\n        #     |   |  +-+-+   |  |   |   |  | | |  |\n        #     7   |  |   7   |  |   7   |  | | |  7\n        #   +-+-+ |  | +-++  |  | +-++  |  | | |  |\n        #   6   | |  | |  6  |  | |  6  |  | | |  6\n        # +-++  | |  | |  |  |  | |  |  |  | | |  |\n        # 1  0  2 3  1 2  0  3  1 2  0  3  1 2 3  0\n        #   +++          +++        +++          +++\n        #   4 5          4 5        4 5          4 5\n        #\n        samples = [0, 1, 2, 3, 4, 5]\n        node_times = [1, 1, 1, 1, 0, 0, 2, 3, 4, 5]\n        # (p, c, l, r)\n        edges = [\n            (0, 4, 0, 10),\n            (0, 5, 0, 10),\n            (6, 0, 0, 10),\n            (6, 1, 0, 3),\n            (7, 2, 0, 7),\n            (7, 6, 0, 10),\n            (8, 1, 3, 10),\n            (8, 7, 0, 5),\n            (9, 2, 7, 10),\n            (9, 3, 0, 10),\n            (9, 7, 5, 10),\n            (9, 8, 0, 10),\n        ]\n        tables = tskit.TableCollection(sequence_length=10)\n        for n, t in enumerate(node_times):\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ets = tables.tree_sequence()\n        ts = ets.simplify()\n        assert ts.num_edges == 16\n        assert ets.num_edges == 12\n        return ts, ets\n\n    def get_example4(self):\n        # 7 and 8 should be extended to the whole sequence;\n        # and also 5 to the second tree\n        #\n        #    6          6      6         6\n        #  +-+-+      +-+-+  +-+-+     +-+-+\n        #  |   |      7   |  |   8     |   |\n        #  |   |     ++-+ |  | +-++    |   |\n        #  4   5     4  | |  4 |  5    4   5\n        # +++ +++   +++ | |  | | +++  +++ +++\n        # 0 1 2 3   0 1 2 3  0 1 2 3  0 1 2 3\n        node_times = (0, 0, 0, 0, 1, 1, 3, 2, 2)\n        samples = (0, 1, 2, 3)\n        # (p, c, l, r)\n        extended_edges = [\n            (4, 0, 0, 10),\n            (4, 1, 0, 5),\n            (4, 1, 7, 10),\n            (5, 2, 0, 2),\n            (5, 2, 5, 10),\n            (5, 3, 0, 10),\n            (7, 2, 2, 5),\n            (7, 4, 0, 10),\n            (8, 1, 5, 7),\n            (8, 5, 0, 10),\n            (6, 7, 0, 10),\n            (6, 8, 0, 10),\n        ]\n        edges = [\n            (4, 0, 0, 10),\n            (4, 1, 0, 5),\n            (4, 1, 7, 10),\n            (5, 2, 0, 2),\n            (5, 2, 5, 10),\n            (5, 3, 0, 2),\n            (5, 3, 5, 10),\n            (7, 2, 2, 5),\n            (7, 4, 2, 5),\n            (8, 1, 5, 7),\n            (8, 5, 5, 7),\n            (6, 3, 2, 5),\n            (6, 4, 0, 2),\n            (6, 4, 5, 10),\n            (6, 5, 0, 2),\n            (6, 5, 7, 10),\n            (6, 7, 2, 5),\n            (6, 8, 5, 7),\n        ]\n        tables = tskit.TableCollection(sequence_length=10)\n        tables.sort()\n        for n, t in enumerate(node_times):\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ts = tables.tree_sequence()\n        tables.edges.clear()\n        for p, c, l, r in extended_edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ets = tables.tree_sequence()\n        assert ts.num_edges == 18\n        assert ets.num_edges == 12\n        return ts, ets\n\n    def get_example5(self):\n        # This is an example where new edges are added\n        # on both forwards and back passes\n        # 4.00┊   ┊ 4 ┊ 4 ┊\n        #     ┊   ┊ ┃ ┊ ┃ ┊\n        # 3.00┊ 2 ┊ ┃ ┊ 2 ┊\n        #     ┊ ┃ ┊ ┃ ┊ ┃ ┊\n        # 2.00┊ ┃ ┊ 3 ┊ ┃ ┊\n        #     ┊ ┃ ┊ ┃ ┊ ┃ ┊\n        # 1.00┊ 1 ┊ ┃ ┊ ┃ ┊\n        #     ┊ ┃ ┊ ┃ ┊ ┃ ┊\n        # 0.00┊ 0 ┊ 0 ┊ 0 ┊\n        #     0   2   4   6\n        node_times = (0, 1, 3, 2, 4)\n        samples = (0,)\n        # (p, c, l, r)\n        edges = [\n            (1, 0, 0, 2),\n            (2, 1, 0, 2),\n            (3, 0, 2, 4),\n            (4, 3, 2, 4),\n            (4, 2, 4, 6),\n            (2, 0, 4, 6),\n        ]\n        extended_edges = [\n            (1, 0, 0, 6),\n            (3, 1, 0, 6),\n            (2, 3, 0, 6),\n            (4, 2, 2, 6),\n        ]\n        site_positions = (3,)\n        # site, node, derived_state, time\n        mutations = [\n            (0, 4, 5, 4.5),\n            (0, 3, 4, 3.5),\n            (0, 3, 3, 2.5),\n            (0, 0, 2, 1.5),\n            (0, 0, 1, 0.5),\n        ]\n        extended_mutations_node = [4, 2, 3, 1, 0]\n        tables = tskit.TableCollection(sequence_length=6)\n        for n, t in enumerate(node_times):\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        for x in site_positions:\n            tables.sites.add_row(ancestral_state=\"0\", position=x)\n        for s, n, d, t in mutations:\n            tables.mutations.add_row(site=s, node=n, derived_state=str(d), time=t)\n        tables.sort()\n        tables.build_index()\n        tables.compute_mutation_parents()\n        ts = tables.tree_sequence()\n        tables.edges.clear()\n        for p, c, l, r in extended_edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        tables.sort()\n        tables.mutations.clear()\n        for (s, _, d, t), n in zip(mutations, extended_mutations_node):\n            tables.mutations.add_row(site=s, node=n, derived_state=str(d), time=t)\n        tables.build_index()\n        tables.compute_mutation_parents()\n        ets = tables.tree_sequence()\n        return ts, ets\n\n    def get_example(self, j):\n        if j == 1:\n            ts, ets = self.get_example1()\n        elif j == 2:\n            ts, ets = self.get_example2()\n        elif j == 3:\n            ts, ets = self.get_example3()\n        elif j == 4:\n            ts, ets = self.get_example4()\n        elif j == 5:\n            ts, ets = self.get_example5()\n        else:\n            raise ValueError\n        return ts, ets\n\n    def verify_extend_haplotypes(self, ts, max_iter=10):\n        ets = ts.extend_haplotypes(max_iter=max_iter)\n        py_ets = extend_haplotypes(ts, max_iter=max_iter)\n        ets.tables.assert_equals(py_ets.tables, ignore_provenance=True)\n        self.verify_simplify_equality(ts, ets)\n\n    def test_runs(self):\n        ts = msprime.simulate(5, mutation_rate=1.0, random_seed=126)\n        self.verify_extend_haplotypes(ts)\n        self.naive_verify(ts)\n\n    @pytest.mark.parametrize(\"j\", [1, 2, 3, 4, 5])\n    def test_example(self, j):\n        ts, correct_ets = self.get_example(j)\n        test_ets = ts.extend_haplotypes()\n        test_ets.tables.assert_equals(correct_ets.tables, ignore_provenance=True)\n        self.verify_extend_haplotypes(ts)\n        self.naive_verify(ts)\n\n    @pytest.mark.parametrize(\"j\", [1, 2, 3, 4, 5])\n    def test_redundant_breakpoitns(self, j):\n        ts, correct_ets = self.get_example(j)\n        ts = tsutil.insert_redundant_breakpoints(ts)\n        test_ets = ts.extend_haplotypes()\n        test_ets.tables.assert_equals(correct_ets.tables, ignore_provenance=True)\n        self.verify_extend_haplotypes(ts)\n        self.naive_verify(ts)\n\n    def test_migrations_disallowed(self):\n        ts = msprime.simulate(5, mutation_rate=1.0, random_seed=126)\n        tables = ts.dump_tables()\n        tables.populations.add_row()\n        tables.populations.add_row()\n        tables.migrations.add_row(0, 1, 0, 0, 1, 0)\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_MIGRATIONS_NOT_SUPPORTED\"\n        ):\n            _ = ts.extend_haplotypes()\n\n    def test_unknown_times(self):\n        ts = msprime.simulate(5, mutation_rate=1.0, random_seed=126)\n        tables = ts.dump_tables()\n        tables.mutations.clear()\n        for mut in ts.mutations():\n            tables.mutations.append(mut.replace(time=tskit.UNKNOWN_TIME))\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME\"\n        ):\n            _ = ts.extend_haplotypes()\n\n    def test_max_iter(self):\n        ts = msprime.simulate(5, random_seed=126)\n        with pytest.raises(_tskit.LibraryError, match=\"positive\"):\n            ets = ts.extend_haplotypes(max_iter=0)\n        with pytest.raises(_tskit.LibraryError, match=\"positive\"):\n            ets = ts.extend_haplotypes(max_iter=-1)\n        ets = ts.extend_haplotypes(max_iter=1)\n        et = ets.extend_haplotypes(max_iter=1).dump_tables()\n        eet = ets.extend_haplotypes(max_iter=2).dump_tables()\n        eet.assert_equals(et)\n\n    def test_very_simple(self):\n        samples = [0]\n        node_times = [0, 1, 2, 3]\n        # (p, c, l, r)\n        edges = [\n            (1, 0, 0, 1),\n            (2, 0, 1, 2),\n            (2, 1, 0, 1),\n            (3, 0, 2, 3),\n            (3, 2, 0, 2),\n        ]\n        correct_edges = [\n            (1, 0, 0, 3),\n            (2, 1, 0, 3),\n            (3, 2, 0, 3),\n        ]\n        tables = tskit.TableCollection(sequence_length=3)\n        for n, t in enumerate(node_times):\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ts = tables.tree_sequence()\n        ets = extend_haplotypes(ts)\n        etables = ets.tables\n        correct_tables = etables.copy()\n        correct_tables.edges.clear()\n        for p, c, l, r in correct_edges:\n            correct_tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        etables.assert_equals(correct_tables, ignore_provenance=True)\n        self.naive_verify(ts)\n\n    def test_internal_samples(self):\n        # Now we should have the same but not extend 5 (where * is),\n        # since 5 is a sample; nor 8 because it's extension depends on 5\n        #\n        #    6         6      6         6\n        #  +-+-+     +-+-+  +-+-+     +-+-+\n        #  7   *     7   *  7   8     7   8\n        #  |   |    ++-+ |  | +-++    |   |\n        #  4   5    4  | *  4 |  5    4   5\n        # +++ +++  +++ | |  | | +++  +++ +++\n        # 0 1 2 3  0 1 2 3  0 1 2 3  0 1 2 3\n        #\n        node_times = {\n            0: 0,\n            1: 0,\n            2: 0,\n            3: 0,\n            4: 1.0,\n            5: 1.0,\n            6: 3.0,\n            7: 2.0,\n            8: 2.0,\n        }\n        # (p, c, l, r)\n        edges = [\n            (4, 0, 0, 10),\n            (4, 1, 0, 5),\n            (4, 1, 7, 10),\n            (5, 2, 0, 2),\n            (5, 2, 5, 10),\n            (5, 3, 0, 2),\n            (5, 3, 5, 10),\n            (7, 2, 2, 5),\n            (7, 4, 0, 10),\n            (8, 1, 5, 7),\n            (8, 5, 5, 10),\n            (6, 3, 2, 5),\n            (6, 5, 0, 2),\n            (6, 7, 0, 10),\n            (6, 8, 5, 10),\n        ]\n        tables = tskit.TableCollection(sequence_length=10)\n        samples = [0, 1, 2, 3, 5]\n        for n, t in node_times.items():\n            flags = tskit.NODE_IS_SAMPLE if n in samples else 0\n            tables.nodes.add_row(time=t, flags=flags)\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n        ts = tables.tree_sequence()\n        ets = extend_haplotypes(ts)\n        # nothing should have happened\n        ets.tables.assert_equals(tables)\n        self.verify_extend_haplotypes(ts)\n        self.naive_verify(ts)\n\n    @pytest.mark.parametrize(\"seed\", [3, 4, 5, 6])\n    def test_wf(self, seed):\n        tables = wf.wf_sim(N=6, ngens=9, num_loci=100, deep_history=False, seed=seed)\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        self.verify_extend_haplotypes(ts)\n        self.naive_verify(ts)\n\n\nclass TestExamples(TestExtendThings):\n    \"\"\"\n    Compare the ts method with local implementation.\n    \"\"\"\n\n    def check(self, ts):\n        if np.any(tskit.is_unknown_time(ts.mutations_time)):\n            tables = ts.dump_tables()\n            tables.compute_mutation_times()\n            ts = tables.tree_sequence()\n        py_ets = extend_haplotypes(ts)\n        self.verify_simplify_equality(ts, py_ets)\n        lib_ts = ts.extend_haplotypes()\n        lib_ts.tables.assert_equals(py_ets.tables)\n        assert np.all(ts.genotype_matrix() == lib_ts.genotype_matrix())\n        sts = ts.simplify()\n        lib_sts = lib_ts.simplify()\n        lib_sts.tables.assert_equals(sts.tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_suite_examples_defaults(self, ts):\n        if ts.num_migrations == 0:\n            self.check(ts)\n        else:\n            pass\n            with pytest.raises(\n                _tskit.LibraryError, match=\"TSK_ERR_MIGRATIONS_NOT_SUPPORTED\"\n            ):\n                _ = ts.extend_haplotypes()\n\n    @pytest.mark.parametrize(\"n\", [3, 4, 5])\n    def test_all_trees_ts(self, n):\n        ts = tsutil.all_trees_ts(n)\n        self.check(ts)\n"
  },
  {
    "path": "python/tests/test_file_format.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2016-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for tskit's file format.\n\"\"\"\n\nimport os\nimport tempfile\nimport unittest\nimport uuid as _uuid\n\nimport kastore\nimport msprime\nimport numpy as np\nimport pytest\nimport tszip as tszip\n\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.exceptions as exceptions\n\nCURRENT_FILE_MAJOR = 12\nCURRENT_FILE_MINOR = 7\n\ntest_data_dir = os.path.join(os.path.dirname(__file__), \"data\")\n\n\ndef single_locus_no_mutation_example():\n    return msprime.simulate(10, random_seed=10)\n\n\ndef single_locus_with_mutation_example():\n    return msprime.simulate(10, mutation_rate=10, random_seed=11)\n\n\ndef multi_locus_with_mutation_example():\n    return msprime.simulate(\n        10, recombination_rate=1, length=10, mutation_rate=10, random_seed=2\n    )\n\n\ndef recurrent_mutation_example():\n    ts = msprime.simulate(10, recombination_rate=1, length=10, random_seed=2)\n    return tsutil.insert_branch_mutations(ts)\n\n\ndef general_mutation_example():\n    ts = msprime.simulate(10, recombination_rate=1, length=10, random_seed=2)\n    tables = ts.dump_tables()\n    tables.sites.add_row(position=0, ancestral_state=\"A\", metadata=b\"{}\")\n    tables.sites.add_row(position=1, ancestral_state=\"C\", metadata=b\"{'id':1}\")\n    tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n    tables.mutations.add_row(site=1, node=0, derived_state=\"G\")\n    return tables.tree_sequence()\n\n\ndef multichar_mutation_example():\n    ts = msprime.simulate(10, recombination_rate=1, length=10, random_seed=2)\n    return tsutil.insert_multichar_mutations(ts)\n\n\ndef migration_example():\n    n = 10\n    t = 1\n    population_configurations = [\n        msprime.PopulationConfiguration(n // 2),\n        msprime.PopulationConfiguration(n // 2),\n        msprime.PopulationConfiguration(0),\n    ]\n    demographic_events = [\n        msprime.MassMigration(time=t, source=0, destination=2),\n        msprime.MassMigration(time=t, source=1, destination=2),\n    ]\n    ts = msprime.simulate(\n        population_configurations=population_configurations,\n        demographic_events=demographic_events,\n        random_seed=1,\n        mutation_rate=1,\n        record_migrations=True,\n    )\n    tables = ts.dump_tables()\n    for j in range(n):\n        tables.individuals.add_row(flags=j, location=(j, j), parents=(j - 1, j - 1))\n    return tables.tree_sequence()\n\n\ndef bottleneck_example():\n    return msprime.simulate(\n        random_seed=1,\n        sample_size=100,\n        recombination_rate=0.1,\n        length=10,\n        demographic_events=[\n            msprime.SimpleBottleneck(time=0.01, population=0, proportion=0.75)\n        ],\n    )\n\n\ndef historical_sample_example():\n    return msprime.simulate(\n        recombination_rate=0.1,\n        length=10,\n        random_seed=1,\n        samples=[(0, j) for j in range(10)],\n    )\n\n\ndef no_provenance_example():\n    ts = msprime.simulate(10, random_seed=1)\n    tables = ts.dump_tables()\n    tables.provenances.clear()\n    return tables.tree_sequence()\n\n\ndef provenance_timestamp_only_example():\n    ts = msprime.simulate(10, random_seed=1)\n    tables = ts.dump_tables()\n    provenances = tskit.ProvenanceTable()\n    provenances.add_row(timestamp=\"12345\", record=\"\")\n    return tables.tree_sequence()\n\n\ndef node_metadata_example():\n    ts = msprime.simulate(\n        sample_size=100, recombination_rate=0.1, length=10, random_seed=1\n    )\n    tables = ts.dump_tables()\n    metadatas = [f\"n_{u}\" for u in range(ts.num_nodes)]\n    packed, offset = tskit.pack_strings(metadatas)\n    tables.nodes.set_columns(\n        metadata=packed,\n        metadata_offset=offset,\n        flags=tables.nodes.flags,\n        time=tables.nodes.time,\n    )\n    return tables.tree_sequence()\n\n\ndef site_metadata_example():\n    ts = msprime.simulate(10, length=10, random_seed=2)\n    tables = ts.dump_tables()\n    for j in range(10):\n        tables.sites.add_row(j, ancestral_state=\"a\", metadata=b\"1234\")\n    return tables.tree_sequence()\n\n\ndef mutation_metadata_example():\n    ts = msprime.simulate(10, length=10, random_seed=2)\n    tables = ts.dump_tables()\n    tables.sites.add_row(0, ancestral_state=\"a\")\n    for j in range(10):\n        tables.mutations.add_row(site=0, node=j, derived_state=\"t\", metadata=b\"1234\")\n    return tables.tree_sequence()\n\n\ndef migration_metadata_example():\n    ts = migration_example()\n    tables = ts.dump_tables()\n    metadatas = [f\"n_{u}\" for u in range(ts.num_migrations)]\n    packed, offset = tskit.pack_strings(metadatas)\n    tables.migrations.set_columns(\n        metadata=packed,\n        metadata_offset=offset,\n        left=tables.migrations.left,\n        right=tables.migrations.right,\n        source=tables.migrations.source,\n        dest=tables.migrations.dest,\n        node=tables.migrations.node,\n        time=tables.migrations.time,\n    )\n    return tables.tree_sequence()\n\n\ndef edge_metadata_example():\n    ts = msprime.simulate(\n        sample_size=100, recombination_rate=0.1, length=10, random_seed=1\n    )\n    tables = ts.dump_tables()\n    metadatas = [f\"edge_{u}\" for u in range(ts.num_edges)]\n    packed, offset = tskit.pack_strings(metadatas)\n    tables.edges.set_columns(\n        metadata=packed,\n        metadata_offset=offset,\n        left=tables.edges.left,\n        right=tables.edges.right,\n        child=tables.edges.child,\n        parent=tables.edges.parent,\n    )\n    return tables.tree_sequence()\n\n\nclass TestFileFormat(unittest.TestCase):\n    \"\"\"\n    Superclass of file format tests.\n    \"\"\"\n\n    def setUp(self):\n        fd, self.temp_file = tempfile.mkstemp(prefix=\"msp_file_test_\")\n        os.close(fd)\n\n    def tearDown(self):\n        os.unlink(self.temp_file)\n\n\nclass TestLoadLegacyExamples(TestFileFormat):\n    \"\"\"\n    Tests using the saved legacy file examples to ensure we can load them.\n    \"\"\"\n\n    def verify_tree_sequence(self, ts):\n        # Just some quick checks to make sure the tree sequence makes sense.\n        assert ts.sample_size > 0\n        assert ts.num_edges > 0\n        assert ts.num_sites > 0\n        assert ts.num_mutations > 0\n        assert ts.sequence_length > 0\n        for t in ts.trees():\n            left, right = t.interval\n            assert right > left\n            for site in t.sites():\n                assert left <= site.position < right\n                for mut in site.mutations:\n                    assert mut.site == site.id\n\n    def verify_0_3_3(self, ts):\n        for table in tskit.TABLE_NAMES:\n            t = getattr(ts.tables, table)\n            assert t.num_rows > 0\n            if hasattr(t, \"metadata_schema\"):\n                assert t.metadata_schema == tskit.MetadataSchema({\"codec\": \"json\"})\n                assert t[2].metadata == f\"n_{table}_2\"\n        assert ts.tables.has_index()\n\n    def test_format_too_old_raised_for_hdf5(self):\n        files = [\n            \"msprime-0.3.0_v2.0.hdf5\",\n            \"msprime-0.4.0_v3.1.hdf5\",\n            \"msprime-0.5.0_v10.0.hdf5\",\n        ]\n        for filename in files:\n            path = os.path.join(test_data_dir, \"hdf5-formats\", filename)\n\n            with pytest.raises(\n                exceptions.FileFormatError,\n                match=\"appears to be in HDF5 format\",\n            ):\n                tskit.load(path)\n            with pytest.raises(\n                exceptions.FileFormatError,\n                match=\"appears to be in HDF5 format\",\n            ):\n                tskit.TableCollection.load(path)\n\n    def test_tskit_v_0_3_3(self):\n        path = os.path.join(test_data_dir, \"old-formats\", \"tskit-0.3.3.trees\")\n        ts = tskit.load(path)\n        self.verify_tree_sequence(ts)\n\n\nclass TestErrors(TestFileFormat):\n    \"\"\"\n    Test various API errors.\n    \"\"\"\n\n    def test_tszip_file(self):\n        ts = msprime.simulate(5)\n        tszip.compress(ts, self.temp_file)\n        with pytest.raises(tskit.FileFormatError, match=\"appears to be in zip format\"):\n            tskit.load(self.temp_file)\n        with pytest.raises(tskit.FileFormatError, match=\"appears to be in zip format\"):\n            tskit.TableCollection.load(self.temp_file)\n\n\nclass TestDumpFormat(TestFileFormat):\n    \"\"\"\n    Tests on the on-disk file format.\n    \"\"\"\n\n    def verify_keys(self, ts):\n        keys = [\n            \"edges/child\",\n            \"edges/left\",\n            \"edges/metadata\",\n            \"edges/metadata_offset\",\n            \"edges/metadata_schema\",\n            \"edges/parent\",\n            \"edges/right\",\n            \"format/name\",\n            \"format/version\",\n            \"indexes/edge_insertion_order\",\n            \"indexes/edge_removal_order\",\n            \"individuals/flags\",\n            \"individuals/location\",\n            \"individuals/location_offset\",\n            \"individuals/metadata\",\n            \"individuals/metadata_offset\",\n            \"individuals/metadata_schema\",\n            \"individuals/parents\",\n            \"individuals/parents_offset\",\n            \"metadata\",\n            \"metadata_schema\",\n            \"migrations/dest\",\n            \"migrations/left\",\n            \"migrations/metadata\",\n            \"migrations/metadata_offset\",\n            \"migrations/metadata_schema\",\n            \"migrations/node\",\n            \"migrations/right\",\n            \"migrations/source\",\n            \"migrations/time\",\n            \"mutations/derived_state\",\n            \"mutations/derived_state_offset\",\n            \"mutations/metadata\",\n            \"mutations/metadata_offset\",\n            \"mutations/metadata_schema\",\n            \"mutations/node\",\n            \"mutations/parent\",\n            \"mutations/site\",\n            \"mutations/time\",\n            \"nodes/flags\",\n            \"nodes/individual\",\n            \"nodes/metadata\",\n            \"nodes/metadata_offset\",\n            \"nodes/metadata_schema\",\n            \"nodes/population\",\n            \"nodes/time\",\n            \"populations/metadata\",\n            \"populations/metadata_offset\",\n            \"populations/metadata_schema\",\n            \"provenances/record\",\n            \"provenances/record_offset\",\n            \"provenances/timestamp\",\n            \"provenances/timestamp_offset\",\n            \"sequence_length\",\n            \"sites/ancestral_state\",\n            \"sites/ancestral_state_offset\",\n            \"sites/metadata\",\n            \"sites/metadata_offset\",\n            \"sites/metadata_schema\",\n            \"sites/position\",\n            \"time_units\",\n            \"uuid\",\n        ]\n        ts.dump(self.temp_file)\n        store = kastore.load(self.temp_file)\n        assert sorted(list(store.keys())) == keys\n\n    def verify_uuid(self, ts, uuid):\n        assert len(uuid) == 36\n        # Check that the UUID is well-formed.\n        parsed = _uuid.UUID(\"{\" + uuid + \"}\")\n        assert str(parsed) == uuid\n        assert uuid == ts.file_uuid\n\n    def verify_dump_format(self, ts):\n        ts.dump(self.temp_file)\n        assert os.path.exists(self.temp_file)\n        assert os.path.getsize(self.temp_file) > 0\n        self.verify_keys(ts)\n\n        store = kastore.load(self.temp_file)\n        # Check the basic root attributes\n        format_name = store[\"format/name\"]\n        assert np.array_equal(\n            np.array(bytearray(b\"tskit.trees\"), dtype=np.int8), format_name\n        )\n        format_version = store[\"format/version\"]\n        assert format_version[0] == CURRENT_FILE_MAJOR\n        assert format_version[1] == CURRENT_FILE_MINOR\n        assert ts.sequence_length == store[\"sequence_length\"][0]\n        assert repr(ts.metadata_schema) == \"\".join(store[\"metadata_schema\"].astype(\"U\"))\n\n        # Load another copy from file so we can check the uuid.\n        other_ts = tskit.load(self.temp_file)\n        self.verify_uuid(other_ts, store[\"uuid\"].tobytes().decode())\n\n        tables = ts.tables\n\n        assert np.array_equal(tables.metadata, b\"\".join(store[\"metadata\"]))\n        assert np.array_equal(tables.individuals.flags, store[\"individuals/flags\"])\n        assert np.array_equal(tables.individuals.location, store[\"individuals/location\"])\n        assert np.array_equal(\n            tables.individuals.location_offset, store[\"individuals/location_offset\"]\n        )\n        assert np.array_equal(tables.individuals.parents, store[\"individuals/parents\"])\n        assert np.array_equal(\n            tables.individuals.parents_offset, store[\"individuals/parents_offset\"]\n        )\n        assert np.array_equal(tables.individuals.metadata, store[\"individuals/metadata\"])\n        assert np.array_equal(\n            tables.individuals.metadata_offset, store[\"individuals/metadata_offset\"]\n        )\n        assert repr(tables.individuals.metadata_schema) == \"\".join(\n            store[\"individuals/metadata_schema\"].astype(\"U\")\n        )\n\n        assert np.array_equal(tables.nodes.flags, store[\"nodes/flags\"])\n        assert np.array_equal(tables.nodes.time, store[\"nodes/time\"])\n        assert np.array_equal(tables.nodes.population, store[\"nodes/population\"])\n        assert np.array_equal(tables.nodes.individual, store[\"nodes/individual\"])\n        assert np.array_equal(tables.nodes.metadata, store[\"nodes/metadata\"])\n        assert np.array_equal(\n            tables.nodes.metadata_offset, store[\"nodes/metadata_offset\"]\n        )\n        assert repr(tables.nodes.metadata_schema) == \"\".join(\n            store[\"nodes/metadata_schema\"].astype(\"U\")\n        )\n\n        assert np.array_equal(tables.edges.left, store[\"edges/left\"])\n        assert np.array_equal(tables.edges.right, store[\"edges/right\"])\n        assert np.array_equal(tables.edges.parent, store[\"edges/parent\"])\n        assert np.array_equal(tables.edges.child, store[\"edges/child\"])\n        assert np.array_equal(tables.edges.metadata, store[\"edges/metadata\"])\n        assert np.array_equal(\n            tables.edges.metadata_offset, store[\"edges/metadata_offset\"]\n        )\n        assert repr(tables.edges.metadata_schema) == \"\".join(\n            store[\"edges/metadata_schema\"].astype(\"U\")\n        )\n\n        left = tables.edges.left\n        right = tables.edges.right\n        parent = tables.edges.parent\n        child = tables.edges.child\n        time = tables.nodes.time\n        in_order = sorted(\n            range(ts.num_edges),\n            key=lambda j: (left[j], time[parent[j]], parent[j], child[j]),\n        )\n        out_order = sorted(\n            range(ts.num_edges),\n            key=lambda j: (right[j], -time[parent[j]], -parent[j], -child[j]),\n        )\n        assert np.array_equal(\n            np.array(in_order, dtype=np.int32),\n            store[\"indexes/edge_insertion_order\"],\n        )\n        assert np.array_equal(\n            np.array(out_order, dtype=np.int32), store[\"indexes/edge_removal_order\"]\n        )\n\n        assert np.array_equal(tables.migrations.left, store[\"migrations/left\"])\n        assert np.array_equal(tables.migrations.right, store[\"migrations/right\"])\n        assert np.array_equal(tables.migrations.node, store[\"migrations/node\"])\n        assert np.array_equal(tables.migrations.source, store[\"migrations/source\"])\n        assert np.array_equal(tables.migrations.dest, store[\"migrations/dest\"])\n        assert np.array_equal(tables.migrations.time, store[\"migrations/time\"])\n        assert np.array_equal(tables.migrations.metadata, store[\"migrations/metadata\"])\n        assert np.array_equal(\n            tables.migrations.metadata_offset, store[\"migrations/metadata_offset\"]\n        )\n        assert repr(tables.migrations.metadata_schema) == \"\".join(\n            store[\"migrations/metadata_schema\"].astype(\"U\")\n        )\n\n        assert np.array_equal(tables.sites.position, store[\"sites/position\"])\n        assert np.array_equal(\n            tables.sites.ancestral_state, store[\"sites/ancestral_state\"]\n        )\n        assert np.array_equal(\n            tables.sites.ancestral_state_offset,\n            store[\"sites/ancestral_state_offset\"],\n        )\n        assert np.array_equal(tables.sites.metadata, store[\"sites/metadata\"])\n        assert np.array_equal(\n            tables.sites.metadata_offset, store[\"sites/metadata_offset\"]\n        )\n        assert repr(tables.sites.metadata_schema) == \"\".join(\n            store[\"sites/metadata_schema\"].astype(\"U\")\n        )\n\n        assert np.array_equal(tables.mutations.site, store[\"mutations/site\"])\n        assert np.array_equal(tables.mutations.node, store[\"mutations/node\"])\n        # Default mutation time is a NaN value so we want to check for\n        # bit equality, not numeric equality\n        assert tables.mutations.time.tobytes() == store[\"mutations/time\"].tobytes()\n        assert np.array_equal(tables.mutations.parent, store[\"mutations/parent\"])\n        assert np.array_equal(\n            tables.mutations.derived_state, store[\"mutations/derived_state\"]\n        )\n        assert np.array_equal(\n            tables.mutations.derived_state_offset,\n            store[\"mutations/derived_state_offset\"],\n        )\n        assert np.array_equal(tables.mutations.metadata, store[\"mutations/metadata\"])\n        assert np.array_equal(\n            tables.mutations.metadata_offset, store[\"mutations/metadata_offset\"]\n        )\n        assert repr(tables.mutations.metadata_schema) == \"\".join(\n            store[\"mutations/metadata_schema\"].astype(\"U\")\n        )\n\n        assert np.array_equal(tables.populations.metadata, store[\"populations/metadata\"])\n        assert np.array_equal(\n            tables.populations.metadata_offset, store[\"populations/metadata_offset\"]\n        )\n        assert repr(tables.populations.metadata_schema) == \"\".join(\n            store[\"populations/metadata_schema\"].astype(\"U\")\n        )\n\n        assert np.array_equal(tables.provenances.record, store[\"provenances/record\"])\n        assert np.array_equal(\n            tables.provenances.record_offset, store[\"provenances/record_offset\"]\n        )\n        assert np.array_equal(\n            tables.provenances.timestamp, store[\"provenances/timestamp\"]\n        )\n        assert np.array_equal(\n            tables.provenances.timestamp_offset,\n            store[\"provenances/timestamp_offset\"],\n        )\n\n        store.close()\n\n    def test_single_locus_no_mutation(self):\n        self.verify_dump_format(single_locus_no_mutation_example())\n\n    def test_single_locus_with_mutation(self):\n        self.verify_dump_format(single_locus_with_mutation_example())\n\n    def test_multi_locus_with_mutation(self):\n        self.verify_dump_format(multi_locus_with_mutation_example())\n\n    def test_migration_example(self):\n        self.verify_dump_format(migration_example())\n\n    def test_bottleneck_example(self):\n        self.verify_dump_format(bottleneck_example())\n\n    def test_historical_sample_example(self):\n        self.verify_dump_format(historical_sample_example())\n\n    def test_node_metadata_example(self):\n        self.verify_dump_format(node_metadata_example())\n\n    def test_edge_metadata_example(self):\n        self.verify_dump_format(edge_metadata_example())\n\n    def test_site_metadata_example(self):\n        self.verify_dump_format(site_metadata_example())\n\n    def test_mutation_metadata_example(self):\n        self.verify_dump_format(mutation_metadata_example())\n\n    def test_migration_metadata_example(self):\n        self.verify_dump_format(migration_metadata_example())\n\n    def test_general_mutation_example(self):\n        self.verify_dump_format(general_mutation_example())\n\n    def test_multichar_mutation_example(self):\n        self.verify_dump_format(multichar_mutation_example())\n\n\nclass TestUuid(TestFileFormat):\n    \"\"\"\n    Basic tests for the UUID generation.\n    \"\"\"\n\n    def test_different_files_same_ts(self):\n        ts = msprime.simulate(10)\n        uuids = []\n        for _ in range(10):\n            ts.dump(self.temp_file)\n            with kastore.load(self.temp_file) as store:\n                uuids.append(store[\"uuid\"].tobytes().decode())\n        assert len(uuids) == len(set(uuids))\n\n\nclass TestOptionalColumns(TestFileFormat):\n    \"\"\"\n    Checks that optional columns in the file format are correctly handled.\n    \"\"\"\n\n    def test_empty_edge_metadata(self):\n        ts1 = migration_example()\n        ts1.dump(self.temp_file)\n        ts2 = tskit.load(self.temp_file)\n        assert ts1.tables == ts2.tables\n        assert len(ts1.tables.edges.metadata) == 0\n\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"edges/metadata\"]\n        del all_data[\"edges/metadata_offset\"]\n        kastore.dump(all_data, self.temp_file)\n        ts3 = tskit.load(self.temp_file)\n        assert ts1.tables == ts3.tables\n\n    def test_empty_migration_metadata(self):\n        ts1 = migration_example()\n        ts1.dump(self.temp_file)\n        ts2 = tskit.load(self.temp_file)\n        assert ts1.tables == ts2.tables\n        assert len(ts1.tables.migrations.metadata) == 0\n\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"migrations/metadata\"]\n        del all_data[\"migrations/metadata_offset\"]\n        kastore.dump(all_data, self.temp_file)\n        ts3 = tskit.load(self.temp_file)\n        assert ts1.tables == ts3.tables\n\n    def test_empty_mutation_time(self):\n        ts1 = migration_example()\n        ts1.dump(self.temp_file)\n        ts2 = tskit.load(self.temp_file)\n        assert ts1.tables == ts2.tables\n        assert len(ts1.tables.mutations.metadata) == 0\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"mutations/time\"]\n        kastore.dump(all_data, self.temp_file)\n        ts3 = tskit.load(self.temp_file)\n        # Null out the time column\n        t1 = ts1.dump_tables()\n        t1.mutations.time = np.full_like(t1.mutations.time, tskit.UNKNOWN_TIME)\n        t1.assert_equals(ts3.tables)\n\n    def test_empty_individual_parents(self):\n        ts1 = migration_example()\n        ts1.dump(self.temp_file)\n        ts2 = tskit.load(self.temp_file)\n        assert ts1.tables == ts2.tables\n        assert len(ts1.tables.individuals.parents) > 0\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"individuals/parents\"]\n        del all_data[\"individuals/parents_offset\"]\n        kastore.dump(all_data, self.temp_file)\n        ts3 = tskit.load(self.temp_file)\n        tables = ts1.dump_tables()\n        tables.individuals.packset_parents(\n            [\n                [],\n            ]\n            * tables.individuals.num_rows\n        )\n        tables.assert_equals(ts3.tables)\n\n\nclass TestReferenceSequence:\n    def test_fixture_has_reference_sequence(self, ts_fixture):\n        assert ts_fixture.has_reference_sequence()\n\n    def test_round_trip(self, ts_fixture, tmp_path):\n        ts1 = ts_fixture\n        temp_file = tmp_path / \"tmp.trees\"\n        ts1.dump(temp_file)\n        ts2 = tskit.load(temp_file)\n        ts1.tables.assert_equals(ts2.tables)\n\n    def test_no_reference_sequence(self, ts_fixture, tmp_path):\n        ts1 = ts_fixture\n        temp_file = tmp_path / \"tmp.trees\"\n        ts1.dump(temp_file)\n        with kastore.load(temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"reference_sequence/metadata_schema\"]\n        del all_data[\"reference_sequence/metadata\"]\n        del all_data[\"reference_sequence/data\"]\n        del all_data[\"reference_sequence/url\"]\n        for key in all_data.keys():\n            assert not key.startswith(\"reference_sequence\")\n        kastore.dump(all_data, temp_file)\n        ts2 = tskit.load(temp_file)\n        assert not ts2.has_reference_sequence()\n        tables = ts2.dump_tables()\n        tables.reference_sequence = ts1.reference_sequence\n        tables.assert_equals(ts1.tables)\n\n    @pytest.mark.parametrize(\"attr\", [\"data\", \"url\"])\n    def test_missing_attr(self, ts_fixture, tmp_path, attr):\n        ts1 = ts_fixture\n        temp_file = tmp_path / \"tmp.trees\"\n        ts1.dump(temp_file)\n        with kastore.load(temp_file) as store:\n            all_data = dict(store)\n        del all_data[f\"reference_sequence/{attr}\"]\n        kastore.dump(all_data, temp_file)\n        ts2 = tskit.load(temp_file)\n        assert ts2.has_reference_sequence\n        assert getattr(ts2.reference_sequence, attr) == \"\"\n\n    def test_missing_metadata(self, ts_fixture, tmp_path):\n        ts1 = ts_fixture\n        temp_file = tmp_path / \"tmp.trees\"\n        ts1.dump(temp_file)\n        with kastore.load(temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"reference_sequence/metadata\"]\n        kastore.dump(all_data, temp_file)\n        ts2 = tskit.load(temp_file)\n        assert ts2.has_reference_sequence\n        assert ts2.reference_sequence.metadata_bytes == b\"\"\n\n    def test_missing_metadata_schema(self, ts_fixture, tmp_path):\n        ts1 = ts_fixture\n        temp_file = tmp_path / \"tmp.trees\"\n        ts1.dump(temp_file)\n        with kastore.load(temp_file) as store:\n            all_data = dict(store)\n        del all_data[\"reference_sequence/metadata_schema\"]\n        kastore.dump(all_data, temp_file)\n        ts2 = tskit.load(temp_file)\n        assert ts2.has_reference_sequence\n        assert repr(ts2.reference_sequence.metadata_schema) == \"\"\n\n\nclass TestFileFormatErrors(TestFileFormat):\n    \"\"\"\n    Tests for errors in the HDF5 format.\n    \"\"\"\n\n    def verify_missing_fields(self, ts):\n        ts.dump(self.temp_file)\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        for key in all_data.keys():\n            # We skip these keys as they are optional\n            if \"metadata_schema\" not in key and key not in [\n                \"metadata\",\n                \"time_units\",\n                \"mutations/time\",\n            ]:\n                data = dict(all_data)\n                del data[key]\n                kastore.dump(data, self.temp_file)\n                with pytest.raises(\n                    (exceptions.FileFormatError, exceptions.LibraryError)\n                ):\n                    tskit.load(self.temp_file)\n\n    def test_missing_fields(self):\n        self.verify_missing_fields(migration_example())\n\n    def verify_equal_length_columns(self, ts, table):\n        ts.dump(self.temp_file)\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        table_cols = [\n            colname for colname in all_data.keys() if colname.startswith(table)\n        ]\n        # Remove all the 'offset' columns\n        for col in list(table_cols):\n            if col.endswith(\"_offset\"):\n                main_col = col[: col.index(\"_offset\")]\n                table_cols.remove(main_col)\n                table_cols.remove(col)\n            if \"metadata_schema\" in col:\n                table_cols.remove(col)\n        # Remaining columns should all be the same length\n        for col in table_cols:\n            for bad_val in [[], all_data[col][:-1]]:\n                data = dict(all_data)\n                data[col] = bad_val\n                kastore.dump(data, self.temp_file)\n                with pytest.raises(exceptions.FileFormatError):\n                    tskit.load(self.temp_file)\n\n    def test_equal_length_columns(self):\n        ts = migration_example()\n        for table in [\"nodes\", \"edges\", \"migrations\", \"sites\", \"mutations\"]:\n            self.verify_equal_length_columns(ts, table)\n\n    def verify_offset_columns(self, ts):\n        ts.dump(self.temp_file)\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n        offset_col_pairs = []\n        for col in all_data.keys():\n            if col.endswith(\"_offset\"):\n                main_col = col[: col.index(\"_offset\")]\n                offset_col_pairs.append((main_col, col))\n        for col, offset_col in offset_col_pairs:\n            num_rows = len(all_data[offset_col]) - 1\n            data = dict(all_data)\n            # Check bad lengths of the offset col\n            for bad_col_length in [[], range(2 * num_rows)]:\n                data[offset_col] = bad_col_length\n                kastore.dump(data, self.temp_file)\n                with pytest.raises(exceptions.FileFormatError):\n                    tskit.load(self.temp_file)\n\n            # Check for a bad offset\n            data = dict(all_data)\n            original_offset = data[offset_col]\n            original_col = data[col]\n            data[offset_col] = np.zeros_like(original_offset)\n            data[col] = np.zeros(10, dtype=original_col.dtype)\n            kastore.dump(data, self.temp_file)\n            with pytest.raises(exceptions.LibraryError):\n                tskit.load(self.temp_file)\n\n    def test_offset_columns(self):\n        ts = migration_example()\n        self.verify_offset_columns(ts)\n\n    def test_index_columns(self):\n        ts = migration_example()\n        ts.dump(self.temp_file)\n        with kastore.load(self.temp_file) as store:\n            all_data = dict(store)\n\n        edge_removal_order = \"indexes/edge_removal_order\"\n        edge_insertion_order = \"indexes/edge_insertion_order\"\n\n        data = dict(all_data)\n        del data[edge_removal_order]\n        del data[edge_insertion_order]\n        kastore.dump(data, self.temp_file)\n        with pytest.raises(exceptions.LibraryError):\n            tskit.load(self.temp_file)\n\n        data = dict(all_data)\n        del data[edge_removal_order]\n        kastore.dump(data, self.temp_file)\n        with pytest.raises(exceptions.LibraryError):\n            tskit.load(self.temp_file)\n\n        data = dict(all_data)\n        del data[edge_insertion_order]\n        kastore.dump(data, self.temp_file)\n        with pytest.raises(exceptions.LibraryError):\n            tskit.load(self.temp_file)\n\n        data = dict(all_data)\n        data[edge_insertion_order] = data[edge_insertion_order][:1]\n        kastore.dump(data, self.temp_file)\n        with pytest.raises(exceptions.FileFormatError):\n            tskit.load(self.temp_file)\n\n        data = dict(all_data)\n        data[edge_removal_order] = data[edge_removal_order][:1]\n        kastore.dump(data, self.temp_file)\n        with pytest.raises(exceptions.FileFormatError):\n            tskit.load(self.temp_file)\n\n    def test_load_empty_kastore(self):\n        kastore.dump({}, self.temp_file)\n        with pytest.raises(exceptions.LibraryError):\n            tskit.load(self.temp_file)\n\n    def test_old_version_load_error(self):\n        ts = msprime.simulate(10, random_seed=1)\n        for bad_version in [(0, 1), (0, 8), (2, 0), (CURRENT_FILE_MAJOR - 1, 0)]:\n            ts.dump(self.temp_file)\n            with kastore.load(self.temp_file) as store:\n                data = dict(store)\n            data[\"format/version\"] = np.array(bad_version, dtype=np.uint32)\n            kastore.dump(data, self.temp_file)\n            with pytest.raises(tskit.VersionTooOldError):\n                tskit.load(self.temp_file)\n\n    def test_new_version_load_error(self):\n        ts = msprime.simulate(10, random_seed=1)\n        for bad_version in [(CURRENT_FILE_MAJOR + j, 0) for j in range(1, 5)]:\n            ts.dump(self.temp_file)\n            with kastore.load(self.temp_file) as store:\n                data = dict(store)\n            data[\"format/version\"] = np.array(bad_version, dtype=np.uint32)\n            kastore.dump(data, self.temp_file)\n            with pytest.raises(tskit.VersionTooNewError):\n                tskit.load(self.temp_file)\n\n    def test_format_name_error(self):\n        ts = msprime.simulate(10)\n        for bad_name in [\"tskit.tree\", \"tskit.treesAndOther\", \"\", \"x\" * 100]:\n            ts.dump(self.temp_file)\n            with kastore.load(self.temp_file) as store:\n                data = dict(store)\n            data[\"format/name\"] = np.array(bytearray(bad_name.encode()), dtype=np.int8)\n            kastore.dump(data, self.temp_file)\n            with pytest.raises(exceptions.FileFormatError):\n                tskit.load(self.temp_file)\n\n    def test_load_bad_formats(self):\n        # try loading a bunch of files in various formats.\n        # First, check the empty file.\n        with pytest.raises(EOFError):\n            tskit.load(self.temp_file)\n        # Now some ascii text\n        with open(self.temp_file, \"wb\") as f:\n            f.write(b\"Some ASCII text\")\n        with pytest.raises(exceptions.FileFormatError):\n            tskit.load(self.temp_file)\n        # Now write 8k of random bytes\n        with open(self.temp_file, \"wb\") as f:\n            f.write(os.urandom(8192))\n        with pytest.raises(exceptions.FileFormatError):\n            tskit.load(self.temp_file)\n\n    def test_load_bad_formats_fileobj(self):\n        def load():\n            with open(self.temp_file, \"rb\") as f:\n                tskit.load(f)\n\n        with pytest.raises(EOFError):\n            load()\n        with open(self.temp_file, \"wb\") as f:\n            f.write(b\"Some ASCII text\")\n        with pytest.raises(exceptions.FileFormatError):\n            load()\n\n\ndef assert_tables_empty(tables):\n    for table in tables.table_name_map.values():\n        assert len(table) == 0\n\n\nclass TestSkipTables:\n    \"\"\"\n    Test `skip_tables` flag to TreeSequence.load() and TableCollection.load().\n    \"\"\"\n\n    def test_ts_read_path_interface(self, tmp_path, ts_fixture):\n        # Check the fixture has metadata and a schema\n        assert ts_fixture.metadata_schema is not None\n        assert len(ts_fixture.metadata) > 0\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        ts_no_tables = tskit.load(save_path, skip_tables=True)\n        assert not ts_no_tables.equals(ts_fixture)\n        assert ts_no_tables.equals(ts_fixture, ignore_tables=True)\n        assert_tables_empty(ts_no_tables.tables)\n\n    def test_ts_read_one_stream(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        with open(save_path, \"rb\") as f:\n            ts_no_tables = tskit.load(f, skip_tables=True)\n        assert not ts_no_tables.equals(ts_fixture)\n        assert ts_no_tables.equals(ts_fixture, ignore_tables=True)\n        assert_tables_empty(ts_no_tables.tables)\n\n    def test_ts_twofile_stream_noskip(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        with open(save_path, \"wb\") as f:\n            ts_fixture.dump(f)\n            ts_fixture.dump(f)\n        with open(save_path, \"rb\") as f:\n            ts1 = tskit.load(f)\n            ts2 = tskit.load(f)\n        assert ts_fixture.equals(ts1)\n        assert ts_fixture.equals(ts2)\n\n    def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture):\n        # We can't skip_tables while reading from a stream\n        save_path = tmp_path / \"tmp.trees\"\n        with open(save_path, \"wb\") as f:\n            ts_fixture.dump(f)\n            ts_fixture.dump(f)\n        with open(save_path, \"rb\") as f:\n            tskit.load(f, skip_tables=True)\n            with pytest.raises(exceptions.FileFormatError):\n                tskit.load(f)\n\n    def test_table_collection_load_path(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        tables_skipped = tskit.TableCollection.load(save_path, skip_tables=True)\n        tables = ts_fixture.tables\n        assert not tables_skipped.equals(tables)\n        assert tables_skipped.equals(tables, ignore_tables=True)\n        assert_tables_empty(tables_skipped)\n\n    def test_table_collection_load_stream(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        with open(save_path, \"rb\") as f:\n            tables_skipped = tskit.TableCollection.load(f, skip_tables=True)\n        tables = ts_fixture.tables\n        assert not tables_skipped.equals(tables)\n        assert tables_skipped.equals(tables, ignore_tables=True)\n        assert_tables_empty(tables_skipped)\n\n\nclass TestSkipReferenceSequence:\n    \"\"\"\n    Test `skip_reference_sequence` flag to TreeSequence.load() and\n    TableCollection.load().\n    \"\"\"\n\n    def test_ts_load_path(self, tmp_path, ts_fixture):\n        assert ts_fixture.has_reference_sequence()\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        ts_no_refseq = tskit.load(save_path, skip_reference_sequence=True)\n        assert not ts_no_refseq.equals(ts_fixture)\n        assert ts_no_refseq.equals(ts_fixture, ignore_reference_sequence=True)\n        assert not ts_no_refseq.has_reference_sequence()\n\n    def test_ts_load_stream(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        with open(save_path, \"rb\") as f:\n            ts_no_refseq = tskit.load(f, skip_reference_sequence=True)\n        assert not ts_no_refseq.equals(ts_fixture)\n        assert ts_no_refseq.equals(ts_fixture, ignore_reference_sequence=True)\n        assert not ts_no_refseq.has_reference_sequence()\n\n    def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture):\n        # We can't skip_reference_sequence while reading from a stream\n        save_path = tmp_path / \"tmp.trees\"\n        with open(save_path, \"wb\") as f:\n            ts_fixture.dump(f)\n            ts_fixture.dump(f)\n        with open(save_path, \"rb\") as f:\n            tskit.load(f, skip_reference_sequence=True)\n            with pytest.raises(exceptions.FileFormatError):\n                tskit.load(f)\n\n    def test_table_collection_load_path(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        tables_no_refseq = tskit.TableCollection.load(\n            save_path, skip_reference_sequence=True\n        )\n        tables = ts_fixture.tables\n        assert not tables_no_refseq.equals(tables)\n        assert tables_no_refseq.equals(tables, ignore_reference_sequence=True)\n        assert not tables_no_refseq.has_reference_sequence()\n\n    def test_table_collection_load_stream(self, tmp_path, ts_fixture):\n        save_path = tmp_path / \"tmp.trees\"\n        ts_fixture.dump(save_path)\n        with open(save_path, \"rb\") as f:\n            tables_no_refseq = tskit.TableCollection.load(\n                f, skip_reference_sequence=True\n            )\n        tables = ts_fixture.tables\n        assert not tables_no_refseq.equals(tables)\n        assert tables_no_refseq.equals(tables, ignore_reference_sequence=True)\n        assert not tables_no_refseq.has_reference_sequence()\n"
  },
  {
    "path": "python/tests/test_fileobj.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2023 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for loading and dumping different types of files and streams\n\"\"\"\n\nimport io\nimport multiprocessing\nimport os\nimport pathlib\nimport platform\nimport queue\nimport shutil\nimport socket\nimport socketserver\nimport tempfile\nimport traceback\n\nimport pytest\nimport tszip\n\nimport tskit\n\nIS_WINDOWS = platform.system() == \"Windows\"\nIS_OSX = platform.system() == \"Darwin\"\n\n\nclass TestPath:\n    @pytest.fixture\n    def tempfile_name(self):\n        with tempfile.TemporaryDirectory() as tmp_dir:\n            yield f\"{tmp_dir}/plain_path\"\n\n    def test_pathlib(self, ts_fixture, tempfile_name):\n        ts_fixture.dump(tempfile_name)\n        ts2 = tskit.load(tempfile_name)\n        assert ts_fixture.tables == ts2.tables\n\n\nclass TestPathLib:\n    @pytest.fixture\n    def pathlib_tempfile(self):\n        fd, path = tempfile.mkstemp(prefix=\"tskit_test_pathlib\")\n        os.close(fd)\n        temp_file = pathlib.Path(path)\n        yield temp_file\n        temp_file.unlink()\n\n    def test_pathlib(self, ts_fixture, pathlib_tempfile):\n        ts_fixture.dump(pathlib_tempfile)\n        ts2 = tskit.load(pathlib_tempfile)\n        assert ts_fixture.tables == ts2.tables\n\n\nclass TestFileObj:\n    @pytest.fixture\n    def fileobj(self):\n        with tempfile.TemporaryDirectory() as tmp_dir:\n            with open(f\"{tmp_dir}/fileobj\", \"wb\") as f:\n                yield f\n\n    def test_fileobj(self, ts_fixture, fileobj):\n        ts_fixture.dump(fileobj)\n        fileobj.close()\n        ts2 = tskit.load(fileobj.name)\n        assert ts_fixture.tables == ts2.tables\n\n    def test_fileobj_multi(self, replicate_ts_fixture, fileobj):\n        file_offsets = []\n        for ts in replicate_ts_fixture:\n            ts.dump(fileobj)\n            file_offsets.append(fileobj.tell())\n        fileobj.close()\n        with open(fileobj.name, \"rb\") as f:\n            for ts, file_offset in zip(replicate_ts_fixture, file_offsets):\n                ts2 = tskit.load(f)\n                file_offset2 = f.tell()\n                assert ts.tables == ts2.tables\n                assert file_offset == file_offset2\n\n\nclass TestFileObjRW:\n    @pytest.fixture\n    def fileobj(self):\n        with tempfile.TemporaryDirectory() as tmp_dir:\n            pathlib.Path(f\"{tmp_dir}/fileobj\").touch()\n            with open(f\"{tmp_dir}/fileobj\", \"r+b\") as f:\n                yield f\n\n    def test_fileobj(self, ts_fixture, fileobj):\n        ts_fixture.dump(fileobj)\n        fileobj.seek(0)\n        ts2 = tskit.load(fileobj)\n        assert ts_fixture.tables == ts2.tables\n\n    def test_fileobj_multi(self, replicate_ts_fixture, fileobj):\n        file_offsets = []\n        for ts in replicate_ts_fixture:\n            ts.dump(fileobj)\n            file_offsets.append(fileobj.tell())\n        fileobj.seek(0)\n        for ts, file_offset in zip(replicate_ts_fixture, file_offsets):\n            ts2 = tskit.load(fileobj)\n            file_offset2 = fileobj.tell()\n            assert ts.tables == ts2.tables\n            assert file_offset == file_offset2\n\n\nclass TestFD:\n    @pytest.fixture\n    def fd(self):\n        with tempfile.TemporaryDirectory() as tmp_dir:\n            pathlib.Path(f\"{tmp_dir}/fd\").touch()\n            with open(f\"{tmp_dir}/fd\", \"r+b\") as f:\n                yield f.fileno()\n\n    def test_fd(self, ts_fixture, fd):\n        ts_fixture.dump(fd)\n        os.lseek(fd, 0, os.SEEK_SET)\n        ts2 = tskit.load(fd)\n        assert ts_fixture.tables == ts2.tables\n\n    def test_fd_multi(self, replicate_ts_fixture, fd):\n        for ts in replicate_ts_fixture:\n            ts.dump(fd)\n        os.lseek(fd, 0, os.SEEK_SET)\n        for ts in replicate_ts_fixture:\n            ts2 = tskit.load(fd)\n            assert ts.tables == ts2.tables\n\n\nclass TestUnsupportedObjects:\n    def test_string_io(self, ts_fixture):\n        with pytest.raises(io.UnsupportedOperation, match=r\"fileno\"):\n            ts_fixture.dump(io.StringIO())\n        with pytest.raises(io.UnsupportedOperation, match=r\"fileno\"):\n            tskit.load(io.StringIO())\n        with pytest.raises(io.UnsupportedOperation, match=r\"fileno\"):\n            ts_fixture.dump(io.BytesIO())\n        with pytest.raises(io.UnsupportedOperation, match=r\"fileno\"):\n            tskit.load(io.BytesIO())\n\n\ndef dump_to_stream(q_err, q_in, file_out):\n    \"\"\"\n    Get tree sequences from `q_in` and ts.dump() them to `file_out`.\n    Uncaught exceptions are placed onto the `q_err` queue.\n    \"\"\"\n    try:\n        with open(file_out, \"wb\") as f:\n            while True:\n                ts = q_in.get()\n                if ts is None:\n                    break\n                ts.dump(f)\n    except Exception as exc:\n        tb = traceback.format_exc()\n        q_err.put((exc, tb))\n\n\ndef load_from_stream(q_err, q_out, file_in):\n    \"\"\"\n    tskit.load() tree sequences from `file_in` and put them onto `q_out`.\n    Uncaught exceptions are placed onto the `q_err` queue.\n    \"\"\"\n    try:\n        with open(file_in, \"rb\") as f:\n            while True:\n                try:\n                    ts = tskit.load(f)\n                except EOFError:\n                    break\n                q_out.put(ts)\n    except Exception as exc:\n        tb = traceback.format_exc()\n        q_err.put((exc, tb))\n\n\ndef stream(fifo, ts_list):\n    \"\"\"\n    data -> q_in -> ts.dump(fifo) -> tskit.load(fifo) -> q_out -> data_out\n    \"\"\"\n    q_err = multiprocessing.Queue()\n    q_in = multiprocessing.Queue()\n    q_out = multiprocessing.Queue()\n    proc1 = multiprocessing.Process(target=dump_to_stream, args=(q_err, q_in, fifo))\n    proc2 = multiprocessing.Process(target=load_from_stream, args=(q_err, q_out, fifo))\n    proc1.start()\n    proc2.start()\n    for data in ts_list:\n        q_in.put(data)\n\n    q_in.put(None)  # signal the process that we're done\n    proc1.join(timeout=3)\n    if not q_err.empty():\n        # re-raise the first child exception\n        exc, tb = q_err.get()\n        print(tb)\n        raise exc\n    if proc1.is_alive():\n        # prevent hang if proc1 failed to join\n        proc1.terminate()\n        proc2.terminate()\n        raise RuntimeError(\"proc1 (ts.dump) failed to join\")\n    ts_list_out = []\n    for _ in ts_list:\n        try:\n            data_out = q_out.get(timeout=3)\n        except queue.Empty:\n            # terminate proc2 so we don't hang\n            proc2.terminate()\n            raise\n        ts_list_out.append(data_out)\n    proc2.join(timeout=3)\n    if proc2.is_alive():\n        # prevent hang if proc2 failed to join\n        proc2.terminate()\n        raise RuntimeError(\"proc2 (tskit.load) failed to join\")\n\n    assert len(ts_list) == len(ts_list_out)\n    for ts, ts_out in zip(ts_list, ts_list_out):\n        assert ts.tables == ts_out.tables\n\n\n@pytest.mark.network\n@pytest.mark.skipif(IS_WINDOWS, reason=\"No FIFOs on Windows\")\n@pytest.mark.skipif(IS_OSX, reason=\"FIFO flakey on OS X, issue #1170\")\nclass TestFIFO:\n    @pytest.fixture\n    def fifo(self):\n        temp_dir = tempfile.mkdtemp(prefix=\"tsk_test_streaming\")\n        temp_fifo = os.path.join(temp_dir, \"fifo\")\n        os.mkfifo(temp_fifo)\n        yield temp_fifo\n        shutil.rmtree(temp_dir)\n\n    def test_single_stream(self, fifo, ts_fixture):\n        stream(fifo, [ts_fixture])\n\n    def test_multi_stream(self, fifo, replicate_ts_fixture):\n        stream(fifo, replicate_ts_fixture)\n\n\nADDRESS = (\"localhost\", 10009)\n\n\nclass Server(socketserver.ThreadingTCPServer):\n    allow_reuse_address = True\n\n\nclass StoreEchoHandler(socketserver.BaseRequestHandler):\n    def handle(self):\n        while True:\n            try:\n                ts = tskit.load(self.request.fileno())\n            except EOFError:\n                break\n            ts.dump(self.request.fileno())\n        self.server.shutdown()\n\n\ndef server_process(q):\n    server = Server(ADDRESS, StoreEchoHandler)\n    # Tell the client (on the other end of the queue) that it's OK to open\n    # a connection\n    q.put(None)\n    server.serve_forever()\n\n\n@pytest.mark.network\n@pytest.mark.skipif(IS_WINDOWS or IS_OSX, reason=\"Errors on systems without proper fds\")\nclass TestSocket:\n    @pytest.fixture\n    def client_fd(self):\n        # Use a queue to synchronise the startup of the server and the client.\n        q = multiprocessing.Queue()\n        _server_process = multiprocessing.Process(target=server_process, args=(q,))\n        _server_process.start()\n        q.get(timeout=3)\n        client = socket.create_connection(ADDRESS)\n        yield client.fileno()\n        client.close()\n        _server_process.join(timeout=3)\n\n    def verify_stream(self, ts_list, client_fd):\n        for ts in ts_list:\n            ts.dump(client_fd)\n            echo_ts = tskit.load(client_fd)\n            assert ts.tables == echo_ts.tables\n\n    def test_single_then_multi(self, ts_fixture, replicate_ts_fixture, client_fd):\n        self.verify_stream([ts_fixture], client_fd)\n        self.verify_stream(replicate_ts_fixture, client_fd)\n\n\ndef write_to_fifo(path, file_path):\n    with open(path, \"wb\") as fifo:\n        with open(file_path, \"rb\") as file:\n            fifo.write(file.read())\n\n\ndef read_from_fifo(path, expected_exception, error_text, read_func):\n    with open(path) as fifo:\n        with pytest.raises(expected_exception, match=error_text):\n            read_func(fifo)\n\n\ndef write_and_read_from_fifo(fifo_path, file_path, expected_exception, error_text):\n    os.mkfifo(fifo_path)\n    for read_func in [tskit.load, tskit.TableCollection.load]:\n        read_process = multiprocessing.Process(\n            target=read_from_fifo,\n            args=(fifo_path, expected_exception, error_text, read_func),\n        )\n        read_process.start()\n        write_process = multiprocessing.Process(\n            target=write_to_fifo, args=(fifo_path, file_path)\n        )\n        write_process.start()\n        write_process.join(timeout=3)\n        read_process.join(timeout=3)\n\n\n@pytest.mark.network\n@pytest.mark.skipif(IS_WINDOWS, reason=\"No FIFOs on Windows\")\nclass TestBadStream:\n    def test_bad_stream(self, tmp_path):\n        fifo_path = tmp_path / \"fifo\"\n        bad_file_path = tmp_path / \"bad_file\"\n        bad_file_path.write_bytes(b\"bad data\")\n        write_and_read_from_fifo(\n            fifo_path, bad_file_path, tskit.FileFormatError, \"not in kastore format\"\n        )\n\n    def test_legacy_stream(self, tmp_path):\n        fifo_path = tmp_path / \"fifo\"\n        legacy_file_path = os.path.join(\n            os.path.dirname(__file__), \"data\", \"hdf5-formats\", \"msprime-0.3.0_v2.0.hdf5\"\n        )\n        write_and_read_from_fifo(\n            fifo_path, legacy_file_path, tskit.FileFormatError, \"not in kastore format\"\n        )\n\n    def test_tszip_stream(self, tmp_path, ts_fixture):\n        fifo_path = tmp_path / \"fifo\"\n        zip_file_path = tmp_path / \"tszip_file\"\n        tszip.compress(ts_fixture, zip_file_path)\n        write_and_read_from_fifo(\n            fifo_path, zip_file_path, tskit.FileFormatError, \"not in kastore format\"\n        )\n"
  },
  {
    "path": "python/tests/test_genotype_matching.py",
    "content": "import copy\nimport itertools\n\nimport lshmm as ls\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\n\nEQUAL_BOTH_HOM = 4\nUNEQUAL_BOTH_HOM = 0\nBOTH_HET = 7\nREF_HOM_OBS_HET = 1\nREF_HET_OBS_HOM = 2\n\nMISSING = -1\n\n\ndef mirror_coordinates(ts):\n    \"\"\"\n    Returns a copy of the specified tree sequence in which all\n    coordinates x are transformed into L - x.\n    \"\"\"\n    L = ts.sequence_length\n    tables = ts.dump_tables()\n    left = tables.edges.left\n    right = tables.edges.right\n    tables.edges.left = L - right\n    tables.edges.right = L - left\n    tables.sites.position = L - tables.sites.position  # + 1\n    # TODO migrations.\n    tables.sort()\n    return tables.tree_sequence()\n\n\nclass ValueTransition:\n    \"\"\"Simple struct holding value transition values.\"\"\"\n\n    def __init__(self, tree_node=-1, inner_summation=-1, value_list=-1, value_index=-1):\n        self.tree_node = tree_node\n        self.value_list = value_list\n        self.inner_summation = inner_summation\n        self.value_index = value_index\n\n    def copy(self):\n        return ValueTransition(\n            self.tree_node,\n            self.inner_summation,\n            self.value_list,\n            self.value_index,\n        )\n\n    def __repr__(self):\n        return repr(self.__dict__)\n\n    def __str__(self):\n        return repr(self)\n\n\nclass InternalValueTransition:\n    \"\"\"Simple struct holding the internal value transition values.\"\"\"\n\n    def __init__(self, tree_node=-1, value=-1, inner_summation=-1):\n        self.tree_node = tree_node\n        self.value = value\n        self.inner_summation = inner_summation\n\n    def __repr__(self):\n        return repr(self.__dict__)\n\n    def __str__(self):\n        return repr(self)\n\n\nclass LsHmmAlgorithm:\n    \"\"\"\n    Abstract superclass of Li and Stephens HMM algorithm.\n    \"\"\"\n\n    def __init__(self, ts, rho, mu, precision=30):\n        self.ts = ts\n        self.mu = mu\n        self.rho = rho\n        self.precision = precision\n        # The array of ValueTransitions.\n        self.T = []\n        # indexes in to the T array for each node.\n        self.T_index = np.zeros(ts.num_nodes, dtype=int) - 1\n        # The number of nodes underneath each element in the T array.\n        self.N = np.zeros(ts.num_nodes, dtype=int)\n        # Efficiently compute the allelic state at a site\n        self.allelic_state = np.zeros(ts.num_nodes, dtype=int) - 1\n        # Diffs so we can can update T and T_index between trees.\n        self.edge_diffs = self.ts.edge_diffs()\n        self.parent = np.zeros(self.ts.num_nodes, dtype=int) - 1\n        self.tree = tskit.Tree(self.ts)\n        self.output = None\n\n    def decode_site_dict(self):\n        \"\"\"\n        Decodes the tree encoding of the values into an explicit\n        matrix.\n        \"\"\"\n        A = np.zeros((self.ts.num_samples, self.ts.num_samples))\n        # To look at the inner summations too.\n        B = np.zeros((self.ts.num_samples, self.ts.num_samples))\n        f = {st.tree_node: st for st in self.T}\n\n        for j1, u1 in enumerate(self.ts.samples()):\n            while u1 not in f:\n                u1 = self.tree.parent(u1)\n            f1 = {st.tree_node: st for st in f[u1].value_list}\n            for j2, u2 in enumerate(self.ts.samples()):\n                while u2 not in f1:\n                    u2 = self.tree.parent(u2)\n                A[j1, j2] = f1[u2].value\n                B[j1, j2] = f1[u2].inner_summation\n        return A, B\n\n    def check_integrity(self):\n        M = [st.tree_node for st in self.T if st.tree_node != -1]\n        assert np.all(self.T_index[M] >= 0)\n        index = np.ones_like(self.T_index, dtype=bool)\n        index[M] = 0\n        assert np.all(self.T_index[index] == -1)\n        for j, st in enumerate(self.T):\n            if st.tree_node != -1:\n                assert j == self.T_index[st.tree_node]\n\n    def stupid_compress_dict(self):\n        \"\"\"\n        Duncan created a compression that just runs parsimony so is\n        guaranteed to work.\n        \"\"\"\n        tree = self.tree\n        T = self.T\n        alleles_string_vec = np.zeros(tree.num_samples()).astype(\"object\")\n        genotypes = np.zeros(tree.num_samples(), dtype=int)\n        genotype_index = 0\n        mapping_back = {}\n\n        node_map = {st.tree_node: st for st in self.T}\n\n        for st1 in T:\n            if st1.tree_node != -1:\n                alleles_string_tmp = [\n                    f\"{st2.tree_node}:{st2.value:.16f}\" for st2 in st1.value_list\n                ]\n                alleles_string = \",\".join(alleles_string_tmp)\n                # Add an extra element that tells me the alleles_string there.\n                st1.alleles_string = alleles_string\n                st1.genotype_index = genotype_index\n                # assert alleles_string not in mapping_back\n                if alleles_string not in mapping_back:\n                    mapping_back[alleles_string] = {\n                        \"tree_node\": st1.tree_node,\n                        \"value_list\": st1.value_list,\n                        \"inner_summation\": st1.inner_summation,\n                    }\n                genotype_index += 1\n\n        for leaf in tree.samples():\n            u = leaf\n            while u not in node_map:\n                u = tree.parent(u)\n            genotypes[leaf] = node_map[u].genotype_index\n\n        alleles_string_vec = []\n        for st in T:\n            if st.tree_node != -1:\n                alleles_string_vec.append(st.alleles_string)\n\n        ancestral_allele, mutations = tree.map_mutations(genotypes, alleles_string_vec)\n\n        # Retain the old T_index, because the internal T that's passed up the tree will\n        # retain this ordering.\n        old_T_index = copy.deepcopy(self.T_index)\n        self.T_index = np.zeros(tree.tree_sequence.num_nodes, dtype=int) - 1\n        self.N = np.zeros(tree.tree_sequence.num_nodes, dtype=int)\n        self.T.clear()\n\n        # First, create T root.\n        self.T_index[tree.root] = 0\n        self.T.append(\n            ValueTransition(\n                tree_node=tree.root,\n                value_list=[\n                    InternalValueTransition(\n                        tree_node=tree.root,\n                        value=mapping_back[ancestral_allele][\"value_list\"][\n                            old_T_index[mapping_back[ancestral_allele][\"tree_node\"]]\n                        ].value,\n                    )\n                ],\n            )\n        )\n\n        # Then create the rest of T, adding the root each time to value_list\n        for i, mut in enumerate(mutations):\n            self.T_index[mut.node] = i + 1\n            self.T.append(\n                ValueTransition(\n                    tree_node=mut.node,\n                    value_list=[\n                        InternalValueTransition(\n                            tree_node=tree.root,\n                            value=mapping_back[mut.derived_state][\"value_list\"][\n                                old_T_index[mapping_back[ancestral_allele][\"tree_node\"]]\n                            ].value,\n                        )\n                    ],\n                )\n            )\n\n        # First add to the root\n        for mut in mutations:\n            self.T[self.T_index[tree.root]].value_list.append(\n                InternalValueTransition(\n                    tree_node=mut.node,\n                    value=mapping_back[ancestral_allele][\"value_list\"][\n                        old_T_index[mapping_back[mut.derived_state][\"tree_node\"]]\n                    ].value,\n                )\n            )\n\n        # Then add the rest of T_internal to each internal T.\n        for mut1 in mutations:\n            for mut2 in mutations:\n                self.T[self.T_index[mut1.node]].value_list.append(\n                    InternalValueTransition(\n                        tree_node=mut2.node,\n                        value=mapping_back[mut1.derived_state][\"value_list\"][\n                            old_T_index[mapping_back[mut2.derived_state][\"tree_node\"]]\n                        ].value,\n                    )\n                )\n\n        # General approach here is to use\n        # mapping_back[mut.derived_state]['value_list'][\n        #   old_T_index[mapping_back[mut2.derived_state][\"tree_node\"]\n        # ] and append this to the T_inner.\n\n        node_map = {st.tree_node: st for st in self.T}\n\n        for u in tree.samples():\n            while u not in node_map:\n                u = tree.parent(u)\n            self.N[self.T_index[u]] += 1\n\n    def update_tree(self):\n        \"\"\"\n        Update the internal data structures to move on to the next tree.\n        \"\"\"\n        parent = self.parent\n        T_index = self.T_index\n        T = self.T\n        _, edges_out, edges_in = next(self.edge_diffs)\n\n        for edge in edges_out:\n            u = edge.child\n            if T_index[u] == -1:\n                # Make sure the subtree we're detaching has an T_index-value at the root.\n                while T_index[u] == -1:\n                    u = parent[u]\n                    assert u != -1\n                T_index[edge.child] = len(T)\n                T.append(\n                    ValueTransition(\n                        tree_node=edge.child,\n                        value_list=copy.deepcopy(T[T_index[u]].value_list),\n                    )\n                )\n                # Add on this extra node to each of the internal lists\n                for st in T:\n                    if not (st.value_list == tskit.NULL):\n                        st.value_list.append(\n                            InternalValueTransition(\n                                tree_node=edge.child,\n                                value=st.value_list.copy()[T_index[u]].value,\n                            )\n                        )\n            parent[edge.child] = -1\n\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            u = edge.parent\n            if parent[edge.parent] == -1:\n                # Grafting onto a new root.\n                if T_index[edge.parent] == -1:\n                    T_index[edge.parent] = len(T)\n                    T.append(\n                        ValueTransition(\n                            tree_node=edge.parent,\n                            value_list=copy.deepcopy(T[T_index[edge.child]].value_list),\n                        )\n                    )\n                    # Add on this extra node to each of the internal lists\n                    for st in T:\n                        if not (st.value_list == tskit.NULL):\n                            st.value_list.append(\n                                InternalValueTransition(\n                                    tree_node=edge.parent,\n                                    value=st.value_list.copy()[\n                                        T_index[edge.child]\n                                    ].value,\n                                )\n                            )\n            else:\n                # Grafting into an existing subtree.\n                while T_index[u] == -1:\n                    u = parent[u]\n                    assert u != -1\n            assert T_index[u] != -1 and T_index[edge.child] != -1\n            if (\n                T[T_index[u]].value_list == T[T_index[edge.child]].value_list\n            ):  # DEV: is this fine?\n                st = T[T_index[edge.child]]\n                # Mark the lower ValueTransition as unused.\n                st.value_list = -1\n                # Also need to mark the corresponding InternalValueTransition as\n                # unused for the remaining states\n                for st2 in T:\n                    if not (st2.value_list == tskit.NULL):\n                        st2.value_list[T_index[edge.child]].value = -1\n                        st2.value_list[T_index[edge.child]].tree_node = -1\n\n                st.tree_node = -1\n                T_index[edge.child] = -1\n\n        # We can have values left over still pointing to old roots. Remove\n        for root in self.tree.roots:\n            if T_index[root] != -1:\n                # Use a special marker here to designate the real roots.\n                T[T_index[root]].value_index = -2\n\n        for vt in T:\n            if vt.tree_node != -1:\n                if parent[vt.tree_node] == -1 and vt.value_index != -2:\n                    # Also need to mark the corresponding InternalValueTransition as\n                    # unused for the remaining states\n                    for st2 in T:\n                        if not (st2.value_list == tskit.NULL):\n                            st2.value_list[T_index[vt.tree_node]].value = -1\n                            st2.value_list[T_index[vt.tree_node]].tree_node = -1\n                    T_index[vt.tree_node] = -1\n                    vt.tree_node = -1\n                vt.value_index = -1\n\n        self.N = np.zeros(self.tree.tree_sequence.num_nodes, dtype=int)\n        node_map = {st.tree_node: st for st in self.T}\n\n        for u in self.tree.samples():\n            while u not in node_map:\n                u = self.tree.parent(u)\n            self.N[self.T_index[u]] += 1\n\n    def update_probabilities(self, site, genotype_state):\n        tree = self.tree\n        T_index = self.T_index\n        T = self.T\n        alleles = [\"0\", \"1\"]\n        allelic_state = self.allelic_state\n        # Set the allelic_state for this site.\n        allelic_state[self.tree.root] = alleles.index(site.ancestral_state)\n        normalisation_factor_inner = {}\n\n        for st1 in T:\n            if st1.tree_node != -1:\n                normalisation_factor_inner[st1.tree_node] = (\n                    self.compute_normalisation_factor_inner_dict(st1.tree_node)\n                )\n\n        for st1 in T:\n            if st1.tree_node != -1:\n                for st2 in st1.value_list:\n                    if st2.tree_node != -1:\n                        self.T[self.T_index[st1.tree_node]].value_list[\n                            self.T_index[st2.tree_node]\n                        ].inner_summation = self.inner_summation_evaluation(\n                            normalisation_factor_inner,\n                            st1.tree_node,\n                            st2.tree_node,\n                        )\n                        # (\n                        #     normalisation_factor_inner[st1.tree_node]\n                        #     + normalisation_factor_inner[st2.tree_node]\n                        # )\n\n        for mutation in site.mutations:\n            u = mutation.node\n            allelic_state[u] = alleles.index(mutation.derived_state)\n            if T_index[u] == -1:\n                while T_index[u] == tskit.NULL:\n                    u = tree.parent(u)\n                T_index[mutation.node] = len(T)\n                T.append(\n                    ValueTransition(\n                        tree_node=mutation.node,\n                        value_list=copy.deepcopy(T[T_index[u]].value_list),\n                    )  # DEV: is it possible to not use deepcopies?\n                )\n                for st in T:\n                    if not (st.value_list == tskit.NULL):\n                        st.value_list.append(\n                            InternalValueTransition(\n                                tree_node=mutation.node,\n                                value=st.value_list.copy()[T_index[u]].value,\n                                inner_summation=st.value_list.copy()[\n                                    T_index[u]\n                                ].inner_summation,\n                            )\n                        )\n\n        # Get the allelic state at the leaves.\n        allelic_state[: tree.num_samples()] = tree.tree_sequence.genotype_matrix()[\n            site.id, :\n        ]\n\n        query_is_het = genotype_state == 1\n        query_is_missing = genotype_state == MISSING\n\n        for st1 in T:\n            u1 = st1.tree_node\n\n            if u1 != -1:\n                # Get the allelic_state at u. TODO we can cache these states to\n                # avoid some upward traversals.\n                v1 = u1\n                while allelic_state[v1] == -1:\n                    v1 = tree.parent(v1)\n                    assert v1 != -1\n\n                for st2 in st1.value_list:\n                    u2 = st2.tree_node\n                    if u2 != -1:\n                        # Get the allelic_state at u. TODO we can cache these states to\n                        # avoid some upward traversals.\n                        v2 = u2\n                        while allelic_state[v2] == -1:\n                            v2 = tree.parent(v2)\n                            assert v2 != -1\n\n                        genotype_template_state = allelic_state[v1] + allelic_state[v2]\n                        match = genotype_state == genotype_template_state\n                        template_is_het = genotype_template_state == 1\n                        # Fill in the value at the combination of states: (s1, s2)\n                        st2.value = self.compute_next_probability_dict(\n                            site.id,\n                            st2.value,\n                            st2.inner_summation,\n                            match,\n                            template_is_het,\n                            query_is_het,\n                            query_is_missing,\n                            u1,\n                            u2,\n                            # Last two are not used by forward-backward\n                            # but required by Viterbi\n                        )\n\n                # This will ensure that allelic_state[:n] is filled\n                genotype_template_state = (\n                    allelic_state[v1] + allelic_state[: tree.num_samples()]\n                )\n                # These are vectors of length n (at internal nodes).\n                match = genotype_state == genotype_template_state\n                template_is_het = genotype_template_state == 1\n\n        # Unset the states\n        allelic_state[tree.root] = -1\n        for mutation in site.mutations:\n            allelic_state[mutation.node] = -1\n\n    def process_site(\n        self, site, genotype_state, forwards=True\n    ):  # Note: forwards turned on for Viterbi\n        if forwards:\n            # Forwards algorithm\n            self.update_probabilities(site, genotype_state)\n            self.stupid_compress_dict()\n            s1 = self.compute_normalisation_factor_dict()\n            T = self.T\n\n            for st in T:\n                if st.tree_node != tskit.NULL:\n                    # Need to loop through value copy, and normalise\n                    for st2 in st.value_list:\n                        st2.value /= s1\n                        st2.value = np.round(st2.value, self.precision)\n\n            self.output.store_site(\n                site.id, s1, [(st.tree_node, st.value_list) for st in self.T]\n            )\n        else:\n            # Backwards algorithm\n            self.output.store_site(\n                site.id,\n                self.output.normalisation_factor[site.id],\n                [(st.tree_node, st.value_list) for st in self.T],\n            )\n            self.update_probabilities(site, genotype_state)\n            self.stupid_compress_dict()\n            b_last_sum = self.compute_normalisation_factor_dict()  # (Double sum)\n\n            normalisation_factor_inner = {}\n\n            for st1 in self.T:\n                if st1.tree_node != -1:\n                    normalisation_factor_inner[st1.tree_node] = (\n                        self.compute_normalisation_factor_inner_dict(st1.tree_node)\n                    )\n\n            for st1 in self.T:\n                if st1.tree_node != -1:\n                    for st2 in st1.value_list:\n                        if st2.tree_node != -1:\n                            self.T[self.T_index[st1.tree_node]].value_list[\n                                self.T_index[st2.tree_node]\n                            ].inner_summation = (\n                                normalisation_factor_inner[st1.tree_node]\n                                + normalisation_factor_inner[st2.tree_node]\n                            )\n\n            s = self.output.normalisation_factor[site.id]\n            for st1 in self.T:\n                if st1.tree_node != tskit.NULL:\n                    for st2 in st1.value_list:\n                        st2.value = (\n                            ((self.rho[site.id] / self.ts.num_samples) ** 2) * b_last_sum\n                            + (1 - self.rho[site.id])\n                            * (self.rho[site.id] / self.ts.num_samples)\n                            * st2.inner_summation\n                            + (1 - self.rho[site.id]) ** 2 * st2.value\n                        )\n                        st2.value /= s\n                        st2.value = np.round(st2.value, self.precision)\n\n    def run_forward(self, g):\n        n = self.ts.num_samples\n        self.tree.clear()\n        for u in self.ts.samples():\n            self.T_index[u] = len(self.T)\n            self.T.append(ValueTransition(tree_node=u, value_list=[]))\n            for v in self.ts.samples():\n                self.T[self.T_index[u]].value_list.append(\n                    InternalValueTransition(tree_node=v, value=(1 / n) ** 2)\n                )\n\n        while self.tree.next():\n            self.update_tree()\n            for site in self.tree.sites():\n                self.process_site(site, g[site.id])\n\n        return self.output\n\n    def run_backward(self, g):\n        self.tree.clear()\n        for u in self.ts.samples():\n            self.T_index[u] = len(self.T)\n            self.T.append(ValueTransition(tree_node=u, value_list=[]))\n            for v in self.ts.samples():\n                self.T[self.T_index[u]].value_list.append(\n                    InternalValueTransition(tree_node=v, value=1)\n                )\n\n        while self.tree.next():\n            self.update_tree()\n            for site in self.tree.sites():\n                self.process_site(site, g[site.id], forwards=False)\n        return self.output\n\n    def compute_normalisation_factor_dict(self):\n        raise NotImplementedError()\n\n    def compute_next_probability_dict(\n        self,\n        site_id,\n        p_last,\n        inner_summation,\n        is_match,\n        template_is_het,\n        query_is_het,\n        query_is_missing,\n        node_1,  # Note: these are only used in Viterbi (node_1 and node_2)\n        node_2,\n    ):\n        raise NotImplementedError()\n\n\nclass CompressedMatrix:\n    \"\"\"\n    Class representing a num_samples x num_sites matrix compressed by a\n    tree sequence. Each site is represented by a set of (node, value)\n    pairs, which act as \"mutations\", i.e., any sample that descends\n    from a particular node will inherit that value (unless any other\n    values are on the path).\n    \"\"\"\n\n    def __init__(self, ts, normalisation_factor=None):\n        self.ts = ts\n        self.num_sites = ts.num_sites\n        self.num_samples = ts.num_samples\n        self.value_transitions = [None for _ in range(self.num_sites)]\n        if normalisation_factor is None:\n            self.normalisation_factor = np.zeros(self.num_sites)\n        else:\n            self.normalisation_factor = normalisation_factor\n            assert len(self.normalisation_factor) == self.num_sites\n\n    def store_site(self, site, normalisation_factor, value_transitions):\n        self.normalisation_factor[site] = normalisation_factor\n        self.value_transitions[site] = copy.deepcopy(value_transitions)\n\n    # Expose the same API as the low-level classes\n\n    @property\n    def num_transitions(self):\n        a = [len(self.value_transitions[j]) for j in range(self.num_sites)]\n        return np.array(a, dtype=np.int32)\n\n    def get_site(self, site):\n        return self.value_transitions[site]\n\n    def decode_site_dict(self, tree, site_id):\n        \"\"\"\n        Decodes the tree encoding of the values into an explicit\n        matrix.\n        \"\"\"\n        A = np.zeros((self.num_samples, self.num_samples))\n        f = dict(self.value_transitions[site_id])\n\n        for j1, u1 in enumerate(self.ts.samples()):\n            while u1 not in f:\n                u1 = tree.parent(u1)\n            f1 = {st.tree_node: st for st in f[u1]}\n            for j2, u2 in enumerate(self.ts.samples()):\n                while u2 not in f1:\n                    u2 = tree.parent(u2)\n                A[j1, j2] = f1[u2].value\n        return A\n\n    def decode(self):\n        \"\"\"\n        Decodes the tree encoding of the values into an explicit\n        matrix.\n        \"\"\"\n        A = np.zeros((self.num_sites, self.num_samples, self.num_samples))\n        for tree in self.ts.trees():\n            for site in tree.sites():\n                A[site.id] = self.decode_site_dict(tree, site.id)\n        return A\n\n\nclass ForwardMatrix(CompressedMatrix):\n    \"\"\"Class representing a compressed forward matrix.\"\"\"\n\n\nclass BackwardMatrix(CompressedMatrix):\n    \"\"\"Class representing a compressed forward matrix.\"\"\"\n\n\nclass ViterbiMatrix(CompressedMatrix):\n    \"\"\"\n    Class representing the compressed Viterbi matrix.\n    \"\"\"\n\n    def __init__(self, ts):\n        super().__init__(ts)\n        # Tuples containing the site, the pair of nodes in the tree,\n        # and whether recombination is required\n        self.double_recombination_required = [(-1, 0, 0, False)]\n        self.single_recombination_required = [(-1, 0, 0, False)]\n\n    def add_single_recombination_required(self, site, node_s1, node_s2, required):\n        self.single_recombination_required.append((site, node_s1, node_s2, required))\n\n    def add_double_recombination_required(self, site, node_s1, node_s2, required):\n        self.double_recombination_required.append((site, node_s1, node_s2, required))\n\n    def choose_sample_double(self, site_id, tree):\n        max_value = -1\n        u1 = -1\n        u2 = -1\n\n        for node_s1, value_outer in self.value_transitions[site_id]:\n            for value_list in value_outer:\n                value_tmp = value_list\n                if value_tmp.value > max_value:\n                    max_value = value_tmp.value\n                    u1 = node_s1\n                    u2 = value_tmp.tree_node\n\n        assert u1 != -1\n        assert u2 != -1\n\n        transition_nodes = [u_tmp for (u_tmp, _) in self.value_transitions[site_id]]\n\n        while not tree.is_sample(u1):\n            for v in tree.children(u1):\n                if v not in transition_nodes:\n                    u1 = v\n                    break\n            else:\n                raise AssertionError(\"could not find path\")\n\n        while not tree.is_sample(u2):\n            for v in tree.children(u2):\n                if v not in transition_nodes:\n                    u2 = v\n                    break\n            else:\n                raise AssertionError(\"could not find path\")\n\n        return (u1, u2)\n\n    def choose_sample_single(self, site_id, tree, current_nodes):\n        # I want to find which is the max between any choice if I switch just u1,\n        # and any choice if I switch just u2.\n        node_map = {st[0]: st for st in self.value_transitions[site_id]}\n        to_compute = (\n            np.zeros(2, dtype=int) - 1\n        )  # We have two to compute - one for each single switch set of possibilities.\n\n        for i, v in enumerate(current_nodes):  # (u1, u2)\n            while v not in node_map:\n                v = tree.parent(v)\n            to_compute[i] = v\n\n        # Need to go to the (j1 :)th entries, and the (:,j2)the entries,\n        # and pick the best.\n        T_index = np.zeros(self.ts.num_nodes, dtype=int) - 1\n        for j, st in enumerate(self.value_transitions[site_id]):\n            T_index[st[0]] = j\n\n        node_single_switch_maxes = np.zeros(2, dtype=int) - 1\n        single_switch = np.zeros(2) - 1\n\n        for i, node in enumerate(to_compute):\n            value_list = self.value_transitions[site_id][T_index[node]][1]\n            s_inner = 0\n            for st in value_list:\n                j = st.tree_node\n                if j != -1:\n                    max_st = st.value\n                    max_arg = st.tree_node\n                    if max_st > s_inner:\n                        s_inner = max_st\n                        s_arg = max_arg\n            node_single_switch_maxes[i] = s_arg\n            single_switch[i] = s_inner\n\n        if np.argmax(single_switch) == 0:\n            # u1 is fixed, and we switch u2\n            u1 = current_nodes[0]\n            current_nodes = (u1, node_single_switch_maxes[0])\n        else:\n            # u2 is fixed, and we switch u1.\n            u2 = current_nodes[1]\n            current_nodes = (node_single_switch_maxes[1], u2)\n\n        u1 = current_nodes[0]\n        u2 = current_nodes[1]\n\n        # Find the collection of transition nodes to use to descend down the tree\n        transition_nodes = [u for (u, _) in self.value_transitions[site_id]]\n\n        # Traverse down to find a leaves.\n        while not tree.is_sample(u1):\n            for v in tree.children(u1):\n                if v not in transition_nodes:\n                    u1 = v\n                    break\n            else:\n                raise AssertionError(\"could not find path\")\n\n        while not tree.is_sample(u2):\n            for v in tree.children(u2):\n                if v not in transition_nodes:\n                    u2 = v\n                    break\n            else:\n                raise AssertionError(\"could not find path\")\n\n        current_nodes = (u1, u2)\n\n        return current_nodes\n\n    def traceback(self):\n        # Run the traceback.\n        m = self.ts.num_sites\n        match = np.zeros((m, 2), dtype=int)\n\n        single_recombination_tree = (\n            np.zeros((self.ts.num_nodes, self.ts.num_nodes), dtype=int) - 1\n        )\n        double_recombination_tree = (\n            np.zeros((self.ts.num_nodes, self.ts.num_nodes), dtype=int) - 1\n        )\n\n        tree = tskit.Tree(self.ts)\n        tree.last()\n        double_switch = True\n        current_nodes = (-1, -1)\n        current_node_outer = current_nodes[0]\n\n        rr_single_index = len(self.single_recombination_required) - 1\n        rr_double_index = len(self.double_recombination_required) - 1\n\n        for site in reversed(self.ts.sites()):\n            while tree.interval.left > site.position:\n                tree.prev()\n            assert tree.interval.left <= site.position < tree.interval.right\n\n            # Fill in the recombination single tree\n            j_single = rr_single_index\n            # The above starts from the end of all the recombination required\n            # information, and includes all the information for the current site.\n            while self.single_recombination_required[j_single][0] == site.id:\n                u1, u2, required = self.single_recombination_required[j_single][1:]\n                single_recombination_tree[u1, u2] = required\n                j_single -= 1\n\n            # Fill in the recombination double tree\n            j_double = rr_double_index\n            # The above starts from the end of all the recombination required\n            # information, and includes all the information for the current site.\n            while self.double_recombination_required[j_double][0] == site.id:\n                u1, u2, required = self.double_recombination_required[j_double][1:]\n                double_recombination_tree[u1, u2] = required\n                j_double -= 1\n\n            # Note - current nodes are the leaf nodes.\n            if current_node_outer == -1:\n                if double_switch:\n                    current_nodes = self.choose_sample_double(site.id, tree)\n                else:\n                    current_nodes = self.choose_sample_single(\n                        site.id, tree, current_nodes\n                    )\n\n            match[site.id, :] = current_nodes\n\n            # Now traverse up the tree from the current node. The first marked node\n            # we meet tells us whether we need to recombine.\n            current_node_outer = current_nodes[0]\n            u1 = current_node_outer\n            u2 = current_nodes[1]\n\n            # Just need to move up the tree to evaluate u1 and u2.\n            if double_switch:\n                while u1 != -1 and double_recombination_tree[u1, u1] == -1:\n                    u1 = tree.parent(u1)\n\n                while u2 != -1 and double_recombination_tree[u1, u2] == -1:\n                    u2 = tree.parent(u2)\n            else:\n                while u1 != -1 and single_recombination_tree[u1, u1] == -1:\n                    u1 = tree.parent(u1)\n\n                while u2 != -1 and single_recombination_tree[u1, u2] == -1:\n                    u2 = tree.parent(u2)\n\n            assert u1 != -1\n            assert u2 != -1\n\n            if double_recombination_tree[u1, u2] == 1:\n                # Need to double switch at the next site.\n                current_node_outer = -1\n                double_switch = True\n            elif single_recombination_tree[u1, u2] == 1:\n                # Need to single switch at the next site\n                current_node_outer = -1\n                double_switch = False\n\n            # Reset the nodes in the double recombination tree.\n            j = rr_single_index\n            while self.single_recombination_required[j][0] == site.id:\n                u1_tmp, u2_tmp, _ = self.single_recombination_required[j][1:]\n                single_recombination_tree[u1_tmp, u2_tmp] = -1\n                j -= 1\n            rr_single_index = j\n\n            # Reset the nodes in the single recombination tree.\n            j = rr_double_index\n            while self.double_recombination_required[j][0] == site.id:\n                u1_tmp, u2_tmp, _ = self.double_recombination_required[j][1:]\n                double_recombination_tree[u1_tmp, u2_tmp] = -1\n                j -= 1\n            rr_double_index = j\n\n        return match\n\n\nclass ForwardAlgorithm(LsHmmAlgorithm):\n    \"\"\"Runs the Li and Stephens forward algorithm.\"\"\"\n\n    def __init__(self, ts, rho, mu, precision=30):\n        super().__init__(ts, rho, mu, precision)\n        self.output = ForwardMatrix(ts)\n\n    def inner_summation_evaluation(\n        self, normalisation_factor_inner, st1_tree_node, st2_tree_node\n    ):\n        return (\n            normalisation_factor_inner[st1_tree_node]\n            + normalisation_factor_inner[st2_tree_node]\n        )\n\n    def compute_normalisation_factor_dict(self):\n        s = 0\n        for j, st in enumerate(self.T):\n            assert st.tree_node != tskit.NULL\n            assert self.N[j] > 0\n            s += self.N[j] * self.compute_normalisation_factor_inner_dict(st.tree_node)\n        return s\n\n    def compute_normalisation_factor_inner_dict(self, node):\n        s_inner = 0\n        F_previous = self.T[self.T_index[node]].value_list\n        for st in F_previous:\n            j = st.tree_node\n            if j != -1:\n                s_inner += self.N[self.T_index[j]] * st.value\n        return s_inner\n\n    def compute_next_probability_dict(\n        self,\n        site_id,\n        p_last,\n        inner_normalisation_factor,\n        is_match,\n        template_is_het,\n        query_is_het,\n        query_is_missing,\n        node_1,\n        node_2,\n    ):\n        rho = self.rho[site_id]\n        mu = self.mu[site_id]\n        n = self.ts.num_samples\n\n        p_t = (\n            (rho / n) ** 2\n            + ((1 - rho) * (rho / n)) * inner_normalisation_factor\n            + (1 - rho) ** 2 * p_last\n        )\n\n        if query_is_missing:\n            p_e = 1\n        else:\n            query_is_hom = np.logical_not(query_is_het)\n            template_is_hom = np.logical_not(template_is_het)\n\n            equal_both_hom = np.logical_and(\n                np.logical_and(is_match, template_is_hom), query_is_hom\n            )\n            unequal_both_hom = np.logical_and(\n                np.logical_and(np.logical_not(is_match), template_is_hom), query_is_hom\n            )\n            both_het = np.logical_and(template_is_het, query_is_het)\n            ref_hom_obs_het = np.logical_and(template_is_hom, query_is_het)\n            ref_het_obs_hom = np.logical_and(template_is_het, query_is_hom)\n\n            p_e = (\n                equal_both_hom * (1 - mu) ** 2\n                + unequal_both_hom * (mu**2)\n                + ref_hom_obs_het * (2 * mu * (1 - mu))\n                + ref_het_obs_hom * (mu * (1 - mu))\n                + both_het * ((1 - mu) ** 2 + mu**2)\n            )\n\n        return p_t * p_e\n\n\nclass BackwardAlgorithm(LsHmmAlgorithm):\n    \"\"\"Runs the Li and Stephens forward algorithm.\"\"\"\n\n    def __init__(self, ts, rho, mu, normalisation_factor, precision=10):\n        super().__init__(ts, rho, mu, precision)\n        self.output = BackwardMatrix(ts, normalisation_factor)\n\n    def inner_summation_evaluation(\n        self, normalisation_factor_inner, st1_tree_node, st2_tree_node\n    ):\n        return (\n            normalisation_factor_inner[st1_tree_node]\n            + normalisation_factor_inner[st2_tree_node]\n        )\n\n    def compute_normalisation_factor_dict(self):\n        s = 0\n        for j, st in enumerate(self.T):\n            assert st.tree_node != tskit.NULL\n            assert self.N[j] > 0\n            s += self.N[j] * self.compute_normalisation_factor_inner_dict(st.tree_node)\n        return s\n\n    def compute_normalisation_factor_inner_dict(self, node):\n        s_inner = 0\n        F_previous = self.T[self.T_index[node]].value_list\n        for st in F_previous:\n            j = st.tree_node\n            if j != -1:\n                s_inner += self.N[self.T_index[j]] * st.value\n        return s_inner\n\n    def compute_next_probability_dict(\n        self,\n        site_id,\n        p_next,\n        inner_normalisation_factor,\n        is_match,\n        template_is_het,\n        query_is_het,\n        query_is_missing,\n        node_1,\n        node_2,\n    ):\n        mu = self.mu[site_id]\n        template_is_hom = np.logical_not(template_is_het)\n\n        if query_is_missing:\n            p_e = 1\n        else:\n            query_is_hom = np.logical_not(query_is_het)\n\n            equal_both_hom = np.logical_and(\n                np.logical_and(is_match, template_is_hom), query_is_hom\n            )\n            unequal_both_hom = np.logical_and(\n                np.logical_and(np.logical_not(is_match), template_is_hom), query_is_hom\n            )\n            both_het = np.logical_and(template_is_het, query_is_het)\n            ref_hom_obs_het = np.logical_and(template_is_hom, query_is_het)\n            ref_het_obs_hom = np.logical_and(template_is_het, query_is_hom)\n\n            p_e = (\n                equal_both_hom * (1 - mu) ** 2\n                + unequal_both_hom * (mu**2)\n                + ref_hom_obs_het * (2 * mu * (1 - mu))\n                + ref_het_obs_hom * (mu * (1 - mu))\n                + both_het * ((1 - mu) ** 2 + mu**2)\n            )\n\n        return p_next * p_e\n\n\nclass ViterbiAlgorithm(LsHmmAlgorithm):\n    \"\"\"\n    Runs the Li and Stephens Viterbi algorithm.\n    \"\"\"\n\n    def __init__(self, ts, rho, mu, precision=10):\n        super().__init__(ts, rho, mu, precision)\n        self.output = ViterbiMatrix(ts)\n\n    def inner_summation_evaluation(\n        self, normalisation_factor_inner, st1_tree_node, st2_tree_node\n    ):\n        return max(\n            normalisation_factor_inner[st1_tree_node],\n            normalisation_factor_inner[st2_tree_node],\n        )\n\n    def compute_normalisation_factor_dict(self):\n        s = 0\n        for st in self.T:\n            assert st.tree_node != tskit.NULL\n            max_st = self.compute_normalisation_factor_inner_dict(st.tree_node)\n            if max_st > s:\n                s = max_st\n        if s == 0:\n            raise ValueError(\n                \"Trying to match non-existent allele with zero mutation rate\"\n            )\n        return s\n\n    def compute_normalisation_factor_inner_dict(self, node):\n        s_inner = 0\n        V_previous = self.T[self.T_index[node]].value_list\n        for st in V_previous:\n            j = st.tree_node\n            if j != -1:\n                max_st = st.value\n                if max_st > s_inner:\n                    s_inner = max_st\n\n        return s_inner\n\n    def compute_next_probability_dict(\n        self,\n        site_id,\n        p_last,\n        inner_normalisation_factor,\n        is_match,\n        template_is_het,\n        query_is_het,\n        query_is_missing,\n        node_1,\n        node_2,\n    ):\n        r = self.rho[site_id]\n        mu = self.mu[site_id]\n        n = self.ts.num_samples\n        r_n = r / n\n\n        double_recombination_required = False\n        single_recombination_required = False\n\n        if query_is_missing:\n            p_e = 1\n        else:\n            template_is_hom = np.logical_not(template_is_het)\n            query_is_hom = np.logical_not(query_is_het)\n            equal_both_hom = np.logical_and(\n                np.logical_and(is_match, template_is_hom), query_is_hom\n            )\n            unequal_both_hom = np.logical_and(\n                np.logical_and(np.logical_not(is_match), template_is_hom), query_is_hom\n            )\n            both_het = np.logical_and(template_is_het, query_is_het)\n            ref_hom_obs_het = np.logical_and(template_is_hom, query_is_het)\n            ref_het_obs_hom = np.logical_and(template_is_het, query_is_hom)\n\n            p_e = (\n                equal_both_hom * (1 - mu) ** 2\n                + unequal_both_hom * (mu**2)\n                + ref_hom_obs_het * (2 * mu * (1 - mu))\n                + ref_het_obs_hom * (mu * (1 - mu))\n                + both_het * ((1 - mu) ** 2 + mu**2)\n            )\n\n        no_switch = (1 - r) ** 2 + 2 * (r_n * (1 - r)) + r_n**2\n        single_switch = r_n * (1 - r) + r_n**2\n        double_switch = r_n**2\n\n        V_single_switch = inner_normalisation_factor\n        p_t = p_last * no_switch\n        single_switch_tmp = single_switch * V_single_switch\n\n        if single_switch_tmp > double_switch:\n            # Then single switch is the alternative\n            if p_t < single_switch * V_single_switch:\n                p_t = single_switch * V_single_switch\n                single_recombination_required = True\n        else:\n            # Double switch is the alternative\n            if p_t < double_switch:\n                p_t = double_switch\n                double_recombination_required = True\n\n        self.output.add_single_recombination_required(\n            site_id, node_1, node_2, single_recombination_required\n        )\n        self.output.add_double_recombination_required(\n            site_id, node_1, node_2, double_recombination_required\n        )\n\n        return p_t * p_e\n\n\ndef ls_forward_tree(g, ts, rho, mu, precision=30):\n    \"\"\"Forward matrix computation based on a tree sequence.\"\"\"\n    fa = ForwardAlgorithm(ts, rho, mu, precision=precision)\n    return fa.run_forward(g)\n\n\ndef ls_backward_tree(g, ts_mirror, rho, mu, normalisation_factor, precision=30):\n    \"\"\"Backward matrix computation based on a tree sequence.\"\"\"\n    ba = BackwardAlgorithm(ts_mirror, rho, mu, normalisation_factor, precision=precision)\n    return ba.run_backward(g)\n\n\ndef ls_viterbi_tree(g, ts, rho, mu, precision=30):\n    \"\"\"\n    Viterbi path computation based on a tree sequence.\n    \"\"\"\n    va = ViterbiAlgorithm(ts, rho, mu, precision=precision)\n    return va.run_forward(g)\n\n\nclass LSBase:\n    \"\"\"Superclass of Li and Stephens tests.\"\"\"\n\n    def genotype_emission(self, mu, m):\n        # Define the emission probability matrix\n        e = np.zeros((m, 8))\n        e[:, EQUAL_BOTH_HOM] = (1 - mu) ** 2\n        e[:, UNEQUAL_BOTH_HOM] = mu**2\n        e[:, BOTH_HET] = (1 - mu) ** 2 + mu**2\n        e[:, REF_HOM_OBS_HET] = 2 * mu * (1 - mu)\n        e[:, REF_HET_OBS_HOM] = mu * (1 - mu)\n\n        return e\n\n    def example_genotypes(self, ts):\n        H = ts.genotype_matrix()\n        s = H[:, 0].reshape(1, H.shape[0]) + H[:, 1].reshape(1, H.shape[0])\n        H = H[:, 2:]\n\n        genotypes = [\n            s,\n            H[:, -1].reshape(1, H.shape[0]) + H[:, -2].reshape(1, H.shape[0]),\n        ]\n\n        s_tmp = s.copy()\n        s_tmp[0, -1] = MISSING\n        genotypes.append(s_tmp)\n        s_tmp = s.copy()\n        s_tmp[0, ts.num_sites // 2] = MISSING\n        genotypes.append(s_tmp)\n        s_tmp = s.copy()\n        s_tmp[0, :] = MISSING\n        genotypes.append(s_tmp)\n\n        m = ts.get_num_sites()\n        n = H.shape[1]\n\n        G = np.zeros((m, n, n))\n        for i in range(m):\n            G[i, :, :] = np.add.outer(H[i, :], H[i, :])\n\n        return H, G, genotypes\n\n    def example_parameters_genotypes(self, ts, seed=42):\n        np.random.seed(seed)\n        H, G, genotypes = self.example_genotypes(ts)\n        n = H.shape[1]\n        m = ts.get_num_sites()\n\n        # Here we have equal mutation and recombination\n        r = np.zeros(m) + 0.01\n        mu = np.zeros(m) + 0.01\n        r[0] = 0\n\n        e = self.genotype_emission(mu, m)\n\n        for s in genotypes:\n            yield n, m, G, s, e, r, mu\n\n        # Mixture of random and extremes\n        rs = [np.zeros(m) + 0.999, np.zeros(m) + 1e-6, np.random.rand(m)]\n        mus = [np.zeros(m) + 0.33, np.zeros(m) + 1e-6, np.random.rand(m) * 0.33]\n\n        s = genotypes[0]\n        for r, mu in itertools.product(rs, mus):\n            r[0] = 0\n            e = self.genotype_emission(mu, m)\n            yield n, m, G, s, e, r, mu\n\n    def assertAllClose(self, A, B):\n        \"\"\"Assert that all entries of two matrices are 'close'\"\"\"\n        assert np.allclose(A, B, rtol=1e-5, atol=1e-8)\n\n    # Define a bunch of very small tree-sequences for testing a collection of\n    # parameters on\n    @pytest.mark.skip(reason=\"No plans to implement diploid LS HMM yet.\")\n    def test_simple_n_10_no_recombination(self):\n        ts = msprime.simulate(\n            10, recombination_rate=0, mutation_rate=0.5, random_seed=42\n        )\n        assert ts.num_sites > 3\n        self.verify(ts)\n\n    @pytest.mark.skip(reason=\"No plans to implement diploid LS HMM yet.\")\n    def test_simple_n_6(self):\n        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=7, random_seed=42)\n        assert ts.num_sites > 5\n        self.verify(ts)\n\n    @pytest.mark.skip(reason=\"No plans to implement diploid LS HMM yet.\")\n    def test_simple_n_8_high_recombination(self):\n        ts = msprime.simulate(8, recombination_rate=20, mutation_rate=5, random_seed=42)\n        assert ts.num_trees > 15\n        assert ts.num_sites > 5\n        self.verify(ts)\n\n    # FIXME Reducing the number of test cases here as they take a long time to run,\n    # and we will want to refactor the test infrastructure when implementing these\n    # diploid methods in the library.\n\n    # def test_simple_n_10_no_recombination_high_mut(self):\n    #     ts = msprime.simulate(\n    #         10, recombination_rate=0, mutation_rate=3, random_seed=42)\n    #     assert ts.num_sites > 3\n    #     self.verify(ts)\n\n    # def test_simple_n_10_no_recombination_higher_mut(self):\n    #     ts = msprime.simulate(\n    #         20, recombination_rate=0, mutation_rate=3, random_seed=42)\n    #     assert ts.num_sites > 3\n    #     self.verify(ts)\n\n    # def test_simple_n_8(self):\n    #     ts = msprime.simulate(\n    #         8, recombination_rate=2, mutation_rate=5, random_seed=42)\n    #     assert ts.num_sites > 5\n    #     self.verify(ts)\n\n    # def test_simple_n_16(self):\n    #     ts = msprime.simulate(\n    #         16, recombination_rate=2, mutation_rate=5, random_seed=42)\n    #     assert ts.num_sites > 5\n    #     self.verify(ts)\n\n    def verify(self, ts):\n        raise NotImplementedError()\n\n\nclass FBAlgorithmBase(LSBase):\n    \"\"\"Base for forwards backwards algorithm tests.\"\"\"\n\n\nclass VitAlgorithmBase(LSBase):\n    \"\"\"Base for viterbi algoritm tests.\"\"\"\n\n\nclass TestMirroringDipdict(FBAlgorithmBase):\n    \"\"\"Tests that mirroring the tree sequence and running forwards and backwards\n    algorithms give the same log-likelihood of observing the data.\"\"\"\n\n    def verify(self, ts):\n        for n, m, _, s, _, r, mu in self.example_parameters_genotypes(ts):\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check, mapping = ts.simplify(\n                range(1, n + 1), filter_sites=False, map_nodes=True\n            )\n            H_check = ts_check.genotype_matrix()\n            G_check = np.zeros((m, n, n))\n            for i in range(m):\n                G_check[i, :, :] = np.add.outer(H_check[i, :], H_check[i, :])\n\n            cm_d = ls_forward_tree(s[0, :], ts_check, r, mu)\n            ll_tree = np.sum(np.log10(cm_d.normalisation_factor))\n\n            ts_check_mirror = mirror_coordinates(ts_check)\n            r_flip = np.insert(np.flip(r)[:-1], 0, 0)\n            cm_mirror = ls_forward_tree(\n                np.flip(s[0, :]), ts_check_mirror, r_flip, np.flip(mu)\n            )\n            ll_mirror_tree_dict = np.sum(np.log10(cm_mirror.normalisation_factor))\n\n            self.assertAllClose(ll_tree, ll_mirror_tree_dict)\n\n            # Ensure that the decoded matrices are the same\n            flipped_H_check = np.flip(H_check, axis=0)\n            flipped_s = np.flip(s, axis=1)\n\n            F_mirror_matrix, c, ll = ls.forwards(\n                flipped_H_check,\n                flipped_s,\n                ploidy=2,\n                prob_recombination=r_flip,\n                prob_mutation=np.flip(mu),\n                scale_mutation_rate=False,\n            )\n\n            self.assertAllClose(F_mirror_matrix, cm_mirror.decode())\n\n\nclass TestForwardDipTree(FBAlgorithmBase):\n    \"\"\"Tests that the tree algorithm computes the same forward matrix as the simple\n    method.\"\"\"\n\n    def verify(self, ts):\n        for n, m, _, s, _, r, mu in self.example_parameters_genotypes(ts):\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check, mapping = ts.simplify(\n                range(1, n + 1), filter_sites=False, map_nodes=True\n            )\n            H_check = ts_check.genotype_matrix()\n            G_check = np.zeros((m, n, n))\n            for i in range(m):\n                G_check[i, :, :] = np.add.outer(H_check[i, :], H_check[i, :])\n\n            F, c, ll = ls.forwards(\n                reference_panel=H_check,\n                query=s,\n                ploidy=2,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n            cm_d = ls_forward_tree(s[0, :], ts_check, r, mu)\n            self.assertAllClose(cm_d.decode(), F)\n            ll_tree = np.sum(np.log10(cm_d.normalisation_factor))\n            self.assertAllClose(ll, ll_tree)\n\n\nclass TestForwardBackwardTree(FBAlgorithmBase):\n    \"\"\"Tests that the tree algorithm computes the same forward matrix as the simple\n    method.\"\"\"\n\n    def verify(self, ts):\n        for n, m, _, s, _, r, mu in self.example_parameters_genotypes(ts):\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check, mapping = ts.simplify(\n                range(1, n + 1), filter_sites=False, map_nodes=True\n            )\n            H_check = ts_check.genotype_matrix()\n            G_check = np.zeros((m, n, n))\n            for i in range(m):\n                G_check[i, :, :] = np.add.outer(H_check[i, :], H_check[i, :])\n\n            F, c, ll = ls.forwards(\n                reference_panel=H_check,\n                query=s,\n                ploidy=2,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n            B = ls.backwards(\n                reference_panel=H_check,\n                query=s,\n                ploidy=2,\n                normalisation_factor_from_forward=c,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check = ts.simplify(range(1, n + 1), filter_sites=False)\n            c_f = ls_forward_tree(s[0, :], ts_check, r, mu)\n            ll_tree = np.sum(np.log10(c_f.normalisation_factor))\n\n            ts_check_mirror = mirror_coordinates(ts_check)\n            r_flip = np.flip(r)\n            c_b = ls_backward_tree(\n                np.flip(s[0, :]),\n                ts_check_mirror,\n                r_flip,\n                np.flip(mu),\n                np.flip(c_f.normalisation_factor),\n            )\n            B_tree = np.flip(c_b.decode(), axis=0)\n            F_tree = c_f.decode()\n\n            self.assertAllClose(B, B_tree)\n            self.assertAllClose(F, F_tree)\n            self.assertAllClose(ll, ll_tree)\n\n\nclass TestTreeViterbiDip(VitAlgorithmBase):\n    \"\"\"\n    Test that we have the same log-likelihood between tree and matrix\n    implementations\n    \"\"\"\n\n    def verify(self, ts):\n        for n, m, _, s, _, r, mu in self.example_parameters_genotypes(ts):\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check, mapping = ts.simplify(\n                range(1, n + 1), filter_sites=False, map_nodes=True\n            )\n            H_check = ts_check.genotype_matrix()\n            G_check = np.zeros((m, n, n))\n            for i in range(m):\n                G_check[i, :, :] = np.add.outer(H_check[i, :], H_check[i, :])\n            ts_check = ts.simplify(range(1, n + 1), filter_sites=False)\n\n            phased_path, ll = ls.viterbi(\n                reference_panel=H_check,\n                query=s,\n                ploidy=2,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n            path_ll_matrix = ls.path_loglik(\n                reference_panel=H_check,\n                query=s,\n                ploidy=2,\n                path=phased_path,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n\n            c_v = ls_viterbi_tree(s[0, :], ts_check, r, mu)\n            ll_tree = np.sum(np.log10(c_v.normalisation_factor))\n\n            # Attempt to get the path\n            path_tree_dict = c_v.traceback()\n            # Work out the likelihood of the proposed path\n            path_ll_tree = ls.path_loglik(\n                reference_panel=H_check,\n                query=s,\n                ploidy=2,\n                path=np.transpose(path_tree_dict),\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n\n            self.assertAllClose(ll, ll_tree)\n            self.assertAllClose(path_ll_tree, path_ll_matrix)\n"
  },
  {
    "path": "python/tests/test_genotypes.py",
    "content": "# MIT License\n#\n# Copyright (c) 2019-2023 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for generating genotypes/haplotypes.\n\"\"\"\n\nimport itertools\nimport logging\nimport random\nimport re\nimport textwrap\nfrom xml.etree import ElementTree\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests\nimport tests.test_wright_fisher as wf\nimport tests.tsutil as tsutil\nimport tskit\nfrom tests.tsutil import get_example_tree_sequences\nfrom tskit import exceptions\nfrom tskit.genotypes import allele_remap\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this.\n\n# TODO replace this with a call to\n# example_tree_sequences(discrete_genome=True, snps_only=True)\n\n\n@tests.cached_example\ndef get_example_discrete_genome_tree_sequences():\n    ret = []\n    for ts in get_example_tree_sequences(pytest_params=False):\n        if ts.discrete_genome:\n            snps = all(len(site.ancestral_state) == 1 for site in ts.sites()) and all(\n                len(mut.derived_state) == 1 for mut in ts.mutations()\n            )\n            if snps:\n                ret.append(ts)\n    return ret\n\n\ndef naive_get_ancestral_haplotypes(ts):\n    \"\"\"\n    Simple implementation using tree traversals. Note that this definition\n    won't work when we have topology that's not reachable from a root,\n    but this seems more trouble than it's worth dealing with.\n    \"\"\"\n    A = np.zeros((ts.num_nodes, ts.num_sites), dtype=np.int8)\n    A[:] = tskit.MISSING_DATA\n    for t in ts.trees():\n        for site in t.sites():\n            alleles = {site.ancestral_state: 0}\n            for u in t.nodes():\n                A[u, site.id] = 0\n            j = 1\n            for mutation in site.mutations:\n                if mutation.derived_state not in alleles:\n                    alleles[mutation.derived_state] = j\n                    j += 1\n                for u in t.nodes(mutation.node):\n                    A[u, site.id] = alleles[mutation.derived_state]\n    return A\n\n\nclass TestGetAncestralHaplotypes:\n    \"\"\"\n    Tests for the engine to the actual ancestors from a simulation.\n    \"\"\"\n\n    def verify(self, ts):\n        A = naive_get_ancestral_haplotypes(ts)\n        # To detect missing data in ancestors we must set all nodes\n        # to be samples\n        tables = ts.dump_tables()\n        nodes = tables.nodes\n        flags = nodes.flags[:]\n        flags[:] = 1\n        nodes.set_columns(time=nodes.time, flags=flags)\n        ts = tables.tree_sequence()\n        B = ts.genotype_matrix().T\n        assert np.array_equal(A, B)\n\n    def test_single_tree(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=234)\n        self.verify(ts)\n\n    def test_many_trees(self):\n        ts = msprime.simulate(\n            8, recombination_rate=10, mutation_rate=10, random_seed=234\n        )\n        assert ts.num_trees > 1\n        assert ts.num_sites > 1\n        self.verify(ts)\n\n    def test_single_tree_jukes_cantor(self):\n        ts = msprime.simulate(6, random_seed=1, mutation_rate=1)\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        self.verify(ts)\n\n    def test_single_tree_multichar_mutations(self):\n        ts = msprime.simulate(6, random_seed=1, mutation_rate=1)\n        ts = tsutil.insert_multichar_mutations(ts)\n        self.verify(ts)\n\n    def test_many_trees_infinite_sites(self):\n        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=2, random_seed=1)\n        assert ts.num_sites > 0\n        assert ts.num_trees > 2\n        self.verify(ts)\n\n    def test_wright_fisher_initial_generation(self):\n        tables = wf.wf_sim(\n            6, 5, seed=3, deep_history=True, initial_generation_samples=True, num_loci=2\n        )\n        tables.sort()\n        tables.simplify()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.08, random_seed=2)\n        assert ts.num_sites > 0\n        self.verify(ts)\n\n    def test_wright_fisher_simplified(self):\n        tables = wf.wf_sim(\n            9,\n            10,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        ts = msprime.mutate(ts, rate=0.2, random_seed=1234)\n        assert ts.num_sites > 0\n        self.verify(ts)\n\n    def test_empty_ts(self):\n        tables = tskit.TableCollection(1.0)\n        for _ in range(10):\n            tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n\ndef isolated_samples_genotype_matrix(ts):\n    \"\"\"\n    Returns the genotype matrix for the specified tree sequence\n    where isolated samples are marked with MISSING_DATA.\n    \"\"\"\n    G = ts.genotype_matrix()\n    samples = ts.samples()\n    sample_index_map = np.zeros(ts.num_nodes, dtype=int) - 1\n    for index, sample in enumerate(samples):\n        sample_index_map[sample] = index\n    for tree in ts.trees():\n        for site in tree.sites():\n            for root in tree.roots:\n                # An isolated sample is any root that has no children.\n                if tree.left_child(root) == -1:\n                    assert sample_index_map[root] != -1\n                    G[site.id, sample_index_map[root]] = -1\n    return G\n\n\nclass TestVariantGenerator:\n    \"\"\"\n    Tests the variants() method to ensure the output is consistent.\n    \"\"\"\n\n    def get_tree_sequence(self):\n        ts = msprime.simulate(\n            10, length=10, recombination_rate=1, mutation_rate=10, random_seed=3\n        )\n        assert ts.get_num_mutations() > 10\n        return ts\n\n    def test_dtype(self):\n        ts = self.get_tree_sequence()\n        for var in ts.variants():\n            assert var.genotypes.dtype == np.int32\n\n    def test_dtype_conversion(self):\n        # Check if we hit any issues if we assume the variants are uint8\n        # as they were prior to version 0.2.0\n        ts = self.get_tree_sequence()\n        G = ts.genotype_matrix().astype(np.uint8)\n        assert G.dtype == np.uint8\n        for var in ts.variants():\n            assert np.array_equal(G[var.index], var.genotypes)\n            assert np.all(G[var.index] == var.genotypes)\n            assert [var.alleles[g] for g in var.genotypes] == [\n                var.alleles[g] for g in G[var.index]\n            ]\n            G[var.index, :] = var.genotypes\n            assert np.array_equal(G[var.index], var.genotypes)\n\n    def test_multichar_alleles(self):\n        ts = tsutil.insert_multichar_mutations(self.get_tree_sequence())\n        for var in ts.variants():\n            assert len(var.alleles) == 2\n            assert var.site.ancestral_state == var.alleles[0]\n            assert var.site.mutations[0].derived_state == var.alleles[1]\n            assert all(0 <= var.genotypes)\n            assert all(var.genotypes <= 1)\n\n    def test_many_alleles(self):\n        ts = self.get_tree_sequence()\n        tables = ts.dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        # This gives us a total of 360 permutations.\n        alleles = list(map(\"\".join, itertools.permutations(\"ABCDEF\", 4)))\n        assert len(alleles) > 127\n        tables.sites.add_row(0, alleles[0])\n        parent = -1\n        num_alleles = 1\n        for allele in alleles[1:]:\n            ts = tables.tree_sequence()\n            var = next(ts.variants())\n            assert not var.has_missing_data\n            assert var.num_alleles == num_alleles\n            assert len(var.alleles) == num_alleles\n            assert list(var.alleles) == alleles[:num_alleles]\n            assert var.alleles[var.genotypes[0]] == alleles[num_alleles - 1]\n            for u in ts.samples():\n                if u != 0:\n                    assert var.alleles[var.genotypes[u]] == alleles[0]\n            tables.mutations.add_row(0, 0, allele, parent=parent)\n            parent += 1\n            num_alleles += 1\n\n    def test_many_alleles_missing_data(self):\n        ts = self.get_tree_sequence()\n        tables = ts.dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        # Add an isolated sample\n        tables.nodes.add_row(flags=1, time=0)\n        # This gives us a total of 360 permutations.\n        alleles = list(map(\"\".join, itertools.permutations(\"ABCDEF\", 4)))\n        assert len(alleles) > 127\n        tables.sites.add_row(0, alleles[0])\n        parent = -1\n        num_alleles = 1\n        for allele in alleles[1:]:\n            ts = tables.tree_sequence()\n\n            var = next(ts.variants())\n            assert var.has_missing_data\n            assert var.num_alleles == num_alleles\n            assert len(var.alleles) == num_alleles + 1\n            assert list(var.alleles)[:-1] == alleles[:num_alleles]\n            assert var.alleles[-1] is None\n            assert var.alleles[var.genotypes[0]] == alleles[num_alleles - 1]\n            assert var.genotypes[-1] == -1\n            samples = ts.samples()\n            for u in samples[:-1]:\n                if u != 0:\n                    assert var.alleles[var.genotypes[u]] == alleles[0]\n            tables.mutations.add_row(0, 0, allele, parent=parent)\n            parent += 1\n            num_alleles += 1\n\n    def test_site_information(self):\n        ts = self.get_tree_sequence()\n        for site, variant in zip(ts.sites(), ts.variants()):\n            assert site.position == variant.position\n            assert site == variant.site\n\n    def test_no_mutations(self):\n        ts = msprime.simulate(10)\n        assert ts.get_num_mutations() == 0\n        variants = list(ts.variants())\n        assert len(variants) == 0\n\n    @pytest.mark.parametrize(\"samples\", [None, [1, 2], [2, 4], []])\n    def test_genotype_matrix(self, samples):\n        ts = self.get_tree_sequence()\n        num_samples = ts.num_samples if samples is None else len(samples)\n        G = np.empty((ts.num_sites, num_samples), dtype=np.int32)\n        for v in ts.variants(samples=samples):\n            G[v.index, :] = v.genotypes\n        if samples is None:\n            G2 = ts.genotype_matrix()\n        else:\n            G2 = ts.genotype_matrix(samples=samples)\n        assert np.array_equal(G, G2)\n        assert G2.dtype == np.int32\n\n    def test_recurrent_mutations_over_samples(self):\n        ts = self.get_tree_sequence()\n        tables = ts.dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        num_sites = 5\n        for j in range(num_sites):\n            tables.sites.add_row(\n                position=j * ts.sequence_length / num_sites, ancestral_state=\"0\"\n            )\n            for u in range(ts.sample_size):\n                tables.mutations.add_row(site=j, node=u, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        variants = list(ts.variants())\n        assert len(variants) == num_sites\n        for site, variant in zip(ts.sites(), variants):\n            assert site.position == variant.position\n            assert site == variant.site\n            assert site.id == variant.index\n            assert variant.alleles == (\"0\", \"1\")\n            assert np.all(variant.genotypes == np.ones(ts.sample_size))\n\n    def test_silent_mutations(self):\n        ts = self.get_tree_sequence()\n        tree = next(ts.trees())\n        tables = ts.dump_tables()\n        for u in tree.nodes():\n            for sample in tree.samples(u):\n                if sample != u:\n                    tables.sites.clear()\n                    tables.mutations.clear()\n                    site = tables.sites.add_row(position=0, ancestral_state=\"0\")\n                    tables.mutations.add_row(site=site, node=u, derived_state=\"1\")\n                    tables.mutations.add_row(site=site, node=sample, derived_state=\"1\")\n                    tables.compute_mutation_parents()\n                    ts_new = tables.tree_sequence()\n                    assert all([v.genotypes[sample] == 1 for v in ts_new.variants()])\n\n    def test_zero_samples(self):\n        ts = self.get_tree_sequence()\n        for var1, var2 in zip(ts.variants(), ts.variants(samples=[])):\n            assert var1.site == var2.site\n            assert var1.alleles == var2.alleles\n            assert var2.genotypes.shape[0] == 0\n\n    def test_samples(self):\n        n = 4\n        ts = msprime.simulate(\n            n, length=5, recombination_rate=1, mutation_rate=5, random_seed=2\n        )\n        assert ts.num_sites > 1\n        samples = list(range(n))\n        # Generate all possible sample lists.\n        for j in range(n + 1):\n            for s in itertools.permutations(samples, j):\n                s = np.array(s, dtype=np.int32)\n                count = 0\n                for var1, var2 in zip(ts.variants(), ts.variants(samples=s)):\n                    assert var1.site == var2.site\n                    assert var1.alleles == var2.alleles\n                    assert np.array_equal(var1.samples, ts.samples())\n                    assert np.array_equal(var2.samples, s)\n                    assert var2.genotypes.shape == (len(s),)\n                    assert np.array_equal(var1.genotypes[s], var2.genotypes)\n                    count += 1\n                assert count == ts.num_sites\n\n    def test_samples_64bit(self):\n        ts = msprime.simulate(4, length=5, mutation_rate=5, random_seed=2)\n        s = np.where(ts.nodes_time == 0)[0]  # normally returns 64 bit ints\n        next(ts.variants(samples=s))\n        s = np.array(s, dtype=np.int64)  # cast just to make sure\n        next(ts.variants(samples=s))\n\n    def test_samples_missing_data(self):\n        n = 4\n        ts = msprime.simulate(\n            n, length=5, recombination_rate=1, mutation_rate=5, random_seed=2\n        )\n        assert ts.num_sites > 1\n        tables = ts.dump_tables()\n        tables.delete_intervals([[0.5, 0.6]])\n        tables.sites.add_row(0.5, ancestral_state=\"0\")\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = list(range(n))\n        # Generate all possible sample lists.\n        for j in range(1, n + 1):\n            for s in itertools.permutations(samples, j):\n                s = np.array(s, dtype=np.int32)\n                count = 0\n                for var1, var2 in zip(ts.variants(), ts.variants(samples=s)):\n                    assert var1.site == var2.site\n                    assert var1.alleles == var2.alleles\n                    assert var2.genotypes.shape == (len(s),)\n                    assert np.array_equal(var1.genotypes[s], var2.genotypes)\n                    count += 1\n                assert count == ts.num_sites\n\n    def test_non_sample_samples(self):\n        # We don't have to use sample nodes. This does make the terminology confusing\n        # but it's probably still the best option.\n        ts = msprime.simulate(\n            10, length=5, recombination_rate=1, mutation_rate=5, random_seed=2\n        )\n        tables = ts.dump_tables()\n        tables.nodes.set_columns(\n            flags=np.zeros_like(tables.nodes.flags) + tskit.NODE_IS_SAMPLE,\n            time=tables.nodes.time,\n        )\n        all_samples_ts = tables.tree_sequence()\n        assert all_samples_ts.num_samples == ts.num_nodes\n\n        count = 0\n        samples = range(ts.num_nodes)\n        for var1, var2 in zip(\n            all_samples_ts.variants(isolated_as_missing=False),\n            ts.variants(samples=samples, isolated_as_missing=False),\n        ):\n            assert var1.site == var2.site\n            assert var1.alleles == var2.alleles\n            assert var2.genotypes.shape == (len(samples),)\n            assert np.array_equal(var1.genotypes, var2.genotypes)\n            count += 1\n        assert count == ts.num_sites\n\n    def verify_jukes_cantor(self, ts):\n        assert np.array_equal(ts.genotype_matrix(), ts.genotype_matrix())\n        tree = ts.first()\n        for variant in ts.variants():\n            assert not variant.has_missing_data\n            mutations = {\n                mutation.node: mutation.derived_state\n                for mutation in variant.site.mutations\n            }\n            for sample_index, u in enumerate(ts.samples()):\n                while u not in mutations and u != tskit.NULL:\n                    u = tree.parent(u)\n                state1 = mutations.get(u, variant.site.ancestral_state)\n                state2 = variant.alleles[variant.genotypes[sample_index]]\n                assert state1 == state2\n\n    def test_jukes_cantor_n5(self):\n        ts = msprime.simulate(5, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 5, 1, seed=2)\n        self.verify_jukes_cantor(ts)\n\n    def test_jukes_cantor_n20(self):\n        ts = msprime.simulate(20, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 5, 1, seed=2)\n        self.verify_jukes_cantor(ts)\n\n    def test_zero_edge_missing_data(self):\n        ts = msprime.simulate(10, random_seed=2, mutation_rate=2)\n        tables = ts.dump_tables()\n        tables.keep_intervals([[0.25, 0.75]])\n        # add some sites in the deleted regions\n        tables.sites.add_row(0.1, \"A\")\n        tables.sites.add_row(0.2, \"A\")\n        tables.sites.add_row(0.8, \"A\")\n        tables.sites.add_row(0.9, \"A\")\n        tables.sort()\n        ts = tables.tree_sequence()\n        Gnm = ts.genotype_matrix(isolated_as_missing=False)\n        assert np.all(Gnm[0] == 0)\n        assert np.all(Gnm[1] == 0)\n        assert np.all(Gnm[-1] == 0)\n        assert np.all(Gnm[-2] == 0)\n        Gm = isolated_samples_genotype_matrix(ts)\n        assert np.all(Gm[0] == -1)\n        assert np.all(Gm[1] == -1)\n        assert np.all(Gm[-1] == -1)\n        assert np.all(Gm[-2] == -1)\n        Gm2 = ts.genotype_matrix(isolated_as_missing=True)\n        assert np.array_equal(Gm, Gm2)\n\n        # Test deprecated param\n\n        with pytest.warns(FutureWarning):\n            Gi = ts.genotype_matrix(impute_missing_data=True)\n        assert np.array_equal(Gnm, Gi)\n        with pytest.warns(FutureWarning):\n            Gni = ts.genotype_matrix(impute_missing_data=False)\n        assert np.array_equal(Gm, Gni)\n\n        with pytest.warns(FutureWarning):\n            G = ts.genotype_matrix(isolated_as_missing=False, impute_missing_data=True)\n        assert np.array_equal(Gnm, G)\n        with pytest.warns(FutureWarning):\n            G = ts.genotype_matrix(isolated_as_missing=True, impute_missing_data=False)\n        assert np.array_equal(Gm, G)\n\n    def test_empty_ts_missing_data(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        ts = tables.tree_sequence()\n        variants = list(ts.variants())\n        assert len(variants) == 1\n        var = variants[0]\n        assert var.alleles == (\"A\", None)\n        assert var.num_alleles == 1\n        assert np.all(var.genotypes == -1)\n\n    def test_empty_ts_incomplete_samples(self):\n        # https://github.com/tskit-dev/tskit/issues/776\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        ts = tables.tree_sequence()\n        variants = list(ts.variants(samples=[0]))\n        assert list(variants[0].genotypes) == [-1]\n        variants = list(ts.variants(samples=[1]))\n        assert list(variants[0].genotypes) == [-1]\n\n    def test_missing_data_samples(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        tables.mutations.add_row(0, 0, \"T\")\n        ts = tables.tree_sequence()\n\n        # If we have no samples we still get a list of variants.\n        variants = list(ts.variants(samples=[]))\n        assert len(variants[0].genotypes) == 0\n        assert not variants[0].has_missing_data\n        assert variants[0].alleles == (\"A\", \"T\")\n\n        # If we have a single sample that's not missing, there's no\n        # missing data.\n        variants = list(ts.variants(samples=[0]))\n        assert len(variants[0].genotypes) == 1\n        assert variants[0].genotypes[0] == 1\n        assert not variants[0].has_missing_data\n        assert variants[0].alleles == (\"A\", \"T\")\n\n        # If we have a single sample that is missing, there is\n        # missing data.\n        variants = list(ts.variants(samples=[1]))\n        assert len(variants[0].genotypes) == 1\n        assert variants[0].genotypes[0] == -1\n        assert variants[0].has_missing_data\n        assert variants[0].alleles == (\"A\", \"T\", None)\n\n    def test_mutation_over_isolated_sample_not_missing(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        tables.mutations.add_row(0, 0, \"T\")\n        ts = tables.tree_sequence()\n        variants = list(ts.variants())\n        assert len(variants) == 1\n        var = variants[0]\n        assert var.alleles == (\"A\", \"T\", None)\n        assert var.num_alleles == 2\n        assert list(var.genotypes) == [1, -1]\n\n    def test_multiple_mutations_over_isolated_sample(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        tables.mutations.add_row(0, 0, \"T\")\n        tables.mutations.add_row(0, 0, \"G\", parent=0)\n        ts = tables.tree_sequence()\n        variants = list(ts.variants())\n        assert len(variants) == 1\n        var = variants[0]\n        assert var.alleles == (\"A\", \"T\", \"G\", None)\n        assert var.num_alleles == 3\n        assert len(var.site.mutations) == 2\n        assert list(var.genotypes) == [2, -1]\n\n    def test_snipped_tree_sequence_missing_data(self):\n        ts = msprime.simulate(\n            10, length=10, recombination_rate=0.1, mutation_rate=10, random_seed=3\n        )\n        tables = ts.dump_tables()\n        tables.delete_intervals([[4, 6]], simplify=False)\n        tables.sites.add_row(4, ancestral_state=\"0\")\n        tables.sites.add_row(5, ancestral_state=\"0\")\n        tables.sites.add_row(5.999999, ancestral_state=\"0\")\n        tables.sort()\n        ts = tables.tree_sequence()\n        G = ts.genotype_matrix()\n        num_missing = 0\n        for var in ts.variants():\n            if 4 <= var.site.position < 6:\n                assert var.has_missing_data\n                assert np.all(var.genotypes == tskit.MISSING_DATA)\n                num_missing += 1\n            else:\n                assert not var.has_missing_data\n                assert np.all(var.genotypes != tskit.MISSING_DATA)\n            assert np.array_equal(var.genotypes, G[var.site.id])\n        assert num_missing == 3\n\n        G = ts.genotype_matrix(isolated_as_missing=False)\n        for var in ts.variants(isolated_as_missing=False):\n            if 4 <= var.site.position < 6:\n                assert not var.has_missing_data\n                assert np.all(var.genotypes == 0)\n            else:\n                assert not var.has_missing_data\n                assert np.all(var.genotypes != tskit.MISSING_DATA)\n            assert np.array_equal(var.genotypes, G[var.site.id])\n\n    def test_snipped_tree_sequence_mutations_over_isolated(self):\n        ts = msprime.simulate(\n            10, length=10, recombination_rate=0.1, mutation_rate=10, random_seed=3\n        )\n        tables = ts.dump_tables()\n        tables.delete_intervals([[4, 6]], simplify=False)\n        missing_site = tables.sites.add_row(4, ancestral_state=\"0\")\n        tables.mutations.add_row(missing_site, node=0, derived_state=\"1\")\n        # Add another site in which all the samples are marked with a mutation\n        # to the ancestral state. Note: this would normally not be allowed because\n        # there's not state change. However, this allows us to mark a sample\n        # as not-missing, so it's an important feature.\n        missing_site = tables.sites.add_row(5, ancestral_state=\"0\")\n        for u in range(10):\n            tables.mutations.add_row(missing_site, node=u, derived_state=\"0\")\n        tables.sort()\n        ts = tables.tree_sequence()\n        G = ts.genotype_matrix()\n        missing_found = False\n        non_missing_found = False\n        for var in ts.variants():\n            if var.site.position == 4:\n                assert var.has_missing_data\n                assert var.genotypes[0] == 1\n                assert np.all(var.genotypes[1:] == tskit.MISSING_DATA)\n                missing_found += 1\n            elif var.site.position == 5:\n                assert not var.has_missing_data\n                assert np.all(var.genotypes == 0)\n                non_missing_found = 1\n            else:\n                assert not var.has_missing_data\n                assert np.all(var.genotypes != tskit.MISSING_DATA)\n            assert np.array_equal(var.genotypes, G[var.site.id])\n        assert non_missing_found\n        assert missing_found\n\n    def get_missing_data_ts(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        s = tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=s, derived_state=\"B\", node=1)\n        tables.mutations.add_row(site=s, derived_state=\"C\", node=2)\n        s = tables.sites.add_row(0.5, \"\")\n        tables.mutations.add_row(site=s, derived_state=\"A long string\", node=2)\n        return tables.tree_sequence()\n\n    def test_states(self):\n        ts = self.get_missing_data_ts()\n        v_iter = ts.variants(isolated_as_missing=False)\n        v = next(v_iter)\n        assert np.array_equal(v.states(), np.array([\"A\", \"B\", \"C\"]))\n        v = next(v_iter)\n        assert np.array_equal(v.states(), np.array([\"\", \"\", \"A long string\"]))\n        # With no mssing data, it shouldn't matter if the missing string = an allele\n        assert np.array_equal(\n            v.states(missing_data_string=\"\"), np.array([\"\", \"\", \"A long string\"])\n        )\n\n        v_iter = ts.variants(isolated_as_missing=True)\n        v = next(v_iter)\n        assert np.array_equal(v.states(), np.array([\"N\", \"B\", \"C\"]))\n        v = next(v_iter)\n        assert np.array_equal(v.states(), np.array([\"N\", \"N\", \"A long string\"]))\n        assert np.array_equal(\n            v.states(missing_data_string=\"MISSING\"),\n            np.array([\"MISSING\", \"MISSING\", \"A long string\"]),\n        )\n\n    @pytest.mark.parametrize(\"missing\", [True, False])\n    def test_states_haplotypes_equiv(self, missing):\n        ts = msprime.sim_ancestry(2, sequence_length=20, random_seed=1)\n        ts = msprime.sim_mutations(ts, rate=0.1, random_seed=1)\n        assert ts.num_sites > 5\n        tables = ts.dump_tables()\n        tables.delete_intervals([[0, ts.site(4).position]])\n        tables.sites.replace_with(ts.tables.sites)\n        ts = tables.tree_sequence()\n        states = np.array([v.states() for v in ts.variants(isolated_as_missing=missing)])\n        for h1, h2 in zip(ts.haplotypes(isolated_as_missing=missing), states.T):\n            assert h1 == \"\".join(h2)\n\n    @pytest.mark.parametrize(\"s\", [\"\", \"A long string\", True, np.nan, 0, -1])\n    def test_bad_states(self, s):\n        ts = self.get_missing_data_ts()\n        v_iter = ts.variants(isolated_as_missing=True)\n        v = next(v_iter)\n        v = next(v_iter)\n        match = \"existing allele\" if isinstance(s, str) else \"not a string\"\n        with pytest.raises(ValueError, match=match):\n            v.states(missing_data_string=s)\n\n\nclass TestLimitInterval:\n    def test_simple_case(self, ts_fixture):\n        ts = ts_fixture\n        test_variant = tskit.Variant(ts)\n        test_variant.decode(1)\n        for v in ts.variants(left=ts.site(1).position, right=ts.site(2).position):\n            # should only decode the first variant\n            assert v.site.id == 1\n            assert np.all(v.genotypes == test_variant.genotypes)\n            assert v.alleles == test_variant.alleles\n\n    @pytest.mark.parametrize(\n        [\"left\", \"expected\"],\n        [\n            (None, [0, 1, 2, 3, 4]),\n            (0, [0, 1, 2, 3, 4]),\n            (0.999, [1, 2, 3, 4]),\n            (1, [1, 2, 3, 4]),\n            (3.999, [4]),\n            (4, [4]),\n            (4.00001, []),\n            (4.99999, []),\n            (np.array([4.99999])[0], []),\n        ],\n    )\n    def test_left(self, left, expected):\n        tables = tskit.TableCollection(5)\n        for x in range(int(tables.sequence_length)):\n            tables.sites.add_row(position=x, ancestral_state=\"A\")\n        ts = tables.tree_sequence()\n        positions = [var.site.position for var in ts.variants(left=left)]\n        assert positions == expected\n\n    @pytest.mark.parametrize(\n        [\"right\", \"expected\"],\n        [\n            (None, [0, 1, 2, 3, 4]),\n            (5, [0, 1, 2, 3, 4]),\n            (4.00001, [0, 1, 2, 3, 4]),\n            (4.0, [0, 1, 2, 3]),\n            (3.9999, [0, 1, 2, 3]),\n            (0.00001, [0]),\n            (np.array([1e-200])[0], [0]),\n        ],\n    )\n    def test_right(self, right, expected):\n        tables = tskit.TableCollection(5)\n        for x in range(int(tables.sequence_length)):\n            tables.sites.add_row(position=x, ancestral_state=\"A\")\n        ts = tables.tree_sequence()\n        positions = [var.site.position for var in ts.variants(right=right)]\n        assert positions == expected\n\n    @pytest.mark.parametrize(\"bad_left\", [-1, 10, 100, np.nan, np.inf, -np.inf])\n    def test_bad_left(self, bad_left):\n        ts = tskit.TableCollection(10).tree_sequence()\n        with pytest.raises(ValueError, match=\"`left` not between\"):\n            list(ts.variants(left=bad_left))\n\n    @pytest.mark.parametrize(\"bad_right\", [-1, 0, 100, np.nan, np.inf, -np.inf])\n    def test_bad_right(self, bad_right):\n        ts = tskit.TableCollection(10).tree_sequence()\n        with pytest.raises(ValueError, match=\"`right` not between\"):\n            list(ts.variants(right=bad_right))\n\n    def test_bad_left_right(self):\n        ts = tskit.TableCollection(10).tree_sequence()\n        with pytest.raises(ValueError, match=\"must be less than\"):\n            list(ts.variants(left=1, right=1))\n\n\nclass TestHaplotypeGenerator:\n    \"\"\"\n    Tests the haplotype generation code.\n    \"\"\"\n\n    def verify_haplotypes(self, n, haplotypes):\n        \"\"\"\n        Verify that the specified set of haplotypes is consistent.\n        \"\"\"\n        assert len(haplotypes) == n\n        m = len(haplotypes[0])\n        for h in haplotypes:\n            assert len(h) == m\n        # Examine each column in H; we must have a mixture of 0s and 1s\n        for k in range(m):\n            zeros = 0\n            ones = 0\n            col = \"\"\n            for j in range(n):\n                b = haplotypes[j][k]\n                zeros += b == \"0\"\n                ones += b == \"1\"\n                col += b\n            assert zeros + ones == n\n\n    def verify_tree_sequence(self, tree_sequence):\n        n = tree_sequence.sample_size\n        m = tree_sequence.num_sites\n        haplotypes = list(tree_sequence.haplotypes())\n        A = np.zeros((n, m), dtype=\"u1\")\n        B = np.zeros((n, m), dtype=\"u1\")\n        for j, h in enumerate(haplotypes):\n            assert len(h) == m\n            A[j] = np.frombuffer(h.encode(\"ascii\"), np.uint8) - ord(\"0\")\n        for variant in tree_sequence.variants():\n            B[:, variant.index] = variant.genotypes\n        assert np.all(A == B)\n        self.verify_haplotypes(n, haplotypes)\n\n    def verify_simulation(self, n, m, r, theta):\n        \"\"\"\n        Verifies a simulation for the specified parameters.\n        \"\"\"\n        recomb_map = msprime.RecombinationMap.uniform_map(m, r, m)\n        tree_sequence = msprime.simulate(\n            n, recombination_map=recomb_map, mutation_rate=theta\n        )\n        self.verify_tree_sequence(tree_sequence)\n\n    def test_random_parameters(self):\n        num_random_sims = 10\n        for _ in range(num_random_sims):\n            n = random.randint(2, 50)\n            m = random.randint(10, 200)\n            r = random.random()\n            theta = random.uniform(0, 2)\n            self.verify_simulation(n, m, r, theta)\n\n    def test_nonbinary_trees(self):\n        bottlenecks = [\n            msprime.SimpleBottleneck(0.01, 0, proportion=0.05),\n            msprime.SimpleBottleneck(0.02, 0, proportion=0.25),\n            msprime.SimpleBottleneck(0.03, 0, proportion=1),\n        ]\n        ts = msprime.simulate(\n            10,\n            length=100,\n            recombination_rate=1,\n            demographic_events=bottlenecks,\n            random_seed=1,\n        )\n        self.verify_tree_sequence(ts)\n\n    def test_acgt_mutations(self):\n        ts = msprime.simulate(10, mutation_rate=10)\n        assert ts.num_sites > 0\n        tables = ts.dump_tables()\n        sites = tables.sites\n        mutations = tables.mutations\n        sites.set_columns(\n            position=sites.position,\n            ancestral_state=np.zeros(ts.num_sites, dtype=np.int8) + ord(\"A\"),\n            ancestral_state_offset=np.arange(ts.num_sites + 1, dtype=np.uint32),\n        )\n        mutations.set_columns(\n            site=mutations.site,\n            node=mutations.node,\n            derived_state=np.zeros(ts.num_sites, dtype=np.int8) + ord(\"T\"),\n            derived_state_offset=np.arange(ts.num_sites + 1, dtype=np.uint32),\n        )\n        tsp = tables.tree_sequence()\n        H = [h.replace(\"0\", \"A\").replace(\"1\", \"T\") for h in ts.haplotypes()]\n        assert H == list(tsp.haplotypes())\n\n    def test_fails_multiletter_mutations(self):\n        ts = msprime.simulate(10, random_seed=2)\n        tables = ts.dump_tables()\n        tables.sites.add_row(0, \"ACTG\")\n        tsp = tables.tree_sequence()\n        with pytest.raises(TypeError):\n            list(tsp.haplotypes())\n\n    def test_fails_deletion_mutations(self):\n        ts = msprime.simulate(10, random_seed=2)\n        tables = ts.dump_tables()\n        tables.sites.add_row(0, \"\")\n        tsp = tables.tree_sequence()\n        with pytest.raises(TypeError):\n            list(tsp.haplotypes())\n\n    def test_nonascii_mutations(self):\n        ts = msprime.simulate(10, random_seed=2)\n        tables = ts.dump_tables()\n        tables.sites.add_row(0, chr(169))  # Copyright symbol\n        tsp = tables.tree_sequence()\n        with pytest.raises(TypeError):\n            list(tsp.haplotypes())\n\n    def test_recurrent_mutations_over_samples(self):\n        ts = msprime.simulate(10, random_seed=2)\n        num_sites = 5\n        tables = ts.dump_tables()\n        for j in range(num_sites):\n            tables.sites.add_row(\n                position=j * ts.sequence_length / num_sites, ancestral_state=\"0\"\n            )\n            for u in range(ts.sample_size):\n                tables.mutations.add_row(site=j, node=u, derived_state=\"1\")\n        ts_new = tables.tree_sequence()\n        ones = \"1\" * num_sites\n        for h in ts_new.haplotypes():\n            assert ones == h\n\n    def test_silent_mutations(self):\n        ts = msprime.simulate(10, random_seed=2)\n        tables = ts.dump_tables()\n        tree = next(ts.trees())\n        for u in tree.children(tree.root):\n            tables.sites.clear()\n            tables.mutations.clear()\n            site = tables.sites.add_row(position=0, ancestral_state=\"0\")\n            tables.mutations.add_row(site=site, node=tree.root, derived_state=\"1\")\n            tables.mutations.add_row(site=site, node=u, derived_state=\"1\")\n            tables.compute_mutation_parents()\n            ts_new = tables.tree_sequence()\n            all(h == 1 for h in ts_new.haplotypes())\n\n    def test_back_mutations(self):\n        base_ts = msprime.simulate(10, random_seed=2)\n        for j in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(base_ts, mutations_per_branch=j)\n            self.verify_tree_sequence(ts)\n\n    def test_missing_data(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError):\n            list(ts.haplotypes(missing_data_character=\"A\"))\n        for c in (\"-\", \".\", \"a\"):\n            h = list(ts.haplotypes(missing_data_character=c))\n            assert h == [c, c]\n        h = list(ts.haplotypes(isolated_as_missing=True))\n        assert h == [\"N\", \"N\"]\n        h = list(ts.haplotypes(isolated_as_missing=False))\n        assert h == [\"A\", \"A\"]\n        h = list(ts.haplotypes())\n        assert h == [\"N\", \"N\"]\n        # Test deprecated method\n        with pytest.warns(FutureWarning):\n            h = list(ts.haplotypes(impute_missing_data=True))\n        assert h == [\"A\", \"A\"]\n        with pytest.warns(FutureWarning):\n            h = list(ts.haplotypes(impute_missing_data=False))\n        assert h == [\"N\", \"N\"]\n        with pytest.warns(FutureWarning):\n            h = list(ts.haplotypes(isolated_as_missing=True, impute_missing_data=True))\n        assert h == [\"N\", \"N\"]\n        with pytest.warns(FutureWarning):\n            h = list(ts.haplotypes(isolated_as_missing=True, impute_missing_data=False))\n        assert h == [\"N\", \"N\"]\n        with pytest.warns(FutureWarning):\n            h = list(ts.haplotypes(isolated_as_missing=False, impute_missing_data=True))\n        assert h == [\"A\", \"A\"]\n        with pytest.warns(FutureWarning):\n            h = list(ts.haplotypes(isolated_as_missing=False, impute_missing_data=False))\n        assert h == [\"A\", \"A\"]\n\n    def test_restrict_samples(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.5, \"A\")\n        tables.mutations.add_row(0, 0, derived_state=\"B\")\n        ts = tables.tree_sequence()\n        haplotypes = list(ts.haplotypes(samples=[0], isolated_as_missing=False))\n        assert haplotypes == [\"B\"]\n        haplotypes = list(ts.haplotypes(samples=[1], isolated_as_missing=False))\n        assert haplotypes == [\"A\"]\n\n    def test_restrict_positions(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        tables.sites.add_row(0.1, \"A\")\n        tables.sites.add_row(0.2, \"B\")\n        tables.sites.add_row(0.3, \"C\")\n        tables.sites.add_row(0.4, \"D\")\n        ts = tables.tree_sequence()\n        haplotypes = list(ts.haplotypes(left=0.2, right=0.4, isolated_as_missing=False))\n        assert haplotypes == [\"BC\"]\n\n\nclass TestUserAlleles:\n    \"\"\"\n    Tests the functionality of providing a user-specified allele mapping.\n    \"\"\"\n\n    def test_simple_01(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=2)\n        assert ts.num_sites > 2\n        G1 = ts.genotype_matrix()\n        G2 = ts.genotype_matrix(alleles=(\"0\", \"1\"))\n        assert np.array_equal(G1, G2)\n        for v1, v2 in itertools.zip_longest(\n            ts.variants(), ts.variants(alleles=(\"0\", \"1\"))\n        ):\n            assert v1.alleles == v2.alleles\n            assert v1.site == v2.site\n            assert np.array_equal(v1.genotypes, v2.genotypes)\n\n    def test_simple_01_trailing_alleles(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=2)\n        assert ts.num_sites > 2\n        G1 = ts.genotype_matrix()\n        alleles = (\"0\", \"1\", \"2\", \"xxxxx\")\n        G2 = ts.genotype_matrix(alleles=alleles)\n        assert np.array_equal(G1, G2)\n        for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)):\n            assert v2.alleles == alleles\n            assert v1.site == v2.site\n            assert np.array_equal(v1.genotypes, v2.genotypes)\n\n    def test_simple_01_leading_alleles(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=2)\n        assert ts.num_sites > 2\n        G1 = ts.genotype_matrix()\n        alleles = (\"A\", \"B\", \"C\", \"0\", \"1\")\n        G2 = ts.genotype_matrix(alleles=alleles)\n        assert np.array_equal(G1 + 3, G2)\n        for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)):\n            assert v2.alleles == alleles\n            assert v1.site == v2.site\n            assert np.array_equal(v1.genotypes + 3, v2.genotypes)\n\n    def test_simple_01_duplicate_alleles(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=2)\n        assert ts.num_sites > 2\n        G1 = ts.genotype_matrix()\n        alleles = (\"0\", \"0\", \"1\")\n        G2 = ts.genotype_matrix(alleles=alleles)\n        index = np.where(G1 == 1)\n        G1[index] = 2\n        assert np.array_equal(G1, G2)\n        for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)):\n            assert v2.alleles == alleles\n            assert v1.site == v2.site\n            g = np.array(v1.genotypes)\n            index = np.where(g == 1)\n            g[index] = 2\n            assert np.array_equal(g, v2.genotypes)\n\n    def test_simple_acgt(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = msprime.mutate(\n            ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)\n        )\n        assert ts.num_sites > 2\n        alleles = tskit.ALLELES_ACGT\n        G = ts.genotype_matrix(alleles=alleles)\n        for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)):\n            assert v2.alleles == alleles\n            assert v1.site == v2.site\n            h1 = \"\".join(v1.alleles[g] for g in v1.genotypes)\n            h2 = \"\".join(v2.alleles[g] for g in v2.genotypes)\n            assert h1 == h2\n            assert np.array_equal(v2.genotypes, G[v1.site.id])\n\n    def test_missing_alleles(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = msprime.mutate(\n            ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)\n        )\n        assert ts.num_sites > 2\n        bad_allele_examples = [\n            tskit.ALLELES_01,\n            tuple([\"A\"]),\n            (\"C\", \"T\", \"G\"),\n            (\"AA\", \"C\", \"T\", \"G\"),\n            tuple([\"ACTG\"]),\n        ]\n        for bad_alleles in bad_allele_examples:\n            with pytest.raises(exceptions.LibraryError):\n                ts.genotype_matrix(alleles=bad_alleles)\n            with pytest.raises(exceptions.LibraryError):\n                list(ts.variants(alleles=bad_alleles))\n\n    def test_too_many_alleles(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=2)\n        for n in range(128, 138):\n            bad_alleles = tuple(\"0\" for _ in range(n))\n            with pytest.raises(exceptions.LibraryError):\n                ts.genotype_matrix(alleles=bad_alleles)\n            with pytest.raises(exceptions.LibraryError):\n                list(ts.variants(alleles=bad_alleles))\n\n    def test_zero_allele(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=2)\n        with pytest.raises(ValueError):\n            ts.genotype_matrix(alleles=tuple())\n        with pytest.raises(ValueError):\n            list(ts.variants(alleles=tuple()))\n\n    def test_missing_data(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(0.5, \"0\")\n        tables.mutations.add_row(0, 0, \"1\")\n\n        ts = tables.tree_sequence()\n        for isolated_as_missing in [True, False]:\n            G1 = ts.genotype_matrix(isolated_as_missing=isolated_as_missing)\n            G2 = ts.genotype_matrix(\n                isolated_as_missing=isolated_as_missing, alleles=tskit.ALLELES_01\n            )\n            assert np.array_equal(G1, G2)\n            vars1 = ts.variants(isolated_as_missing=isolated_as_missing)\n            vars2 = ts.variants(\n                isolated_as_missing=isolated_as_missing, alleles=tskit.ALLELES_01\n            )\n            for v1, v2 in itertools.zip_longest(vars1, vars2):\n                assert v2.alleles == v1.alleles\n                assert v1.site == v2.site\n                assert np.array_equal(v1.genotypes, v2.genotypes)\n\n\nclass TestUserAllelesRoundTrip:\n    \"\"\"\n    Tests that we correctly produce haplotypes in a variety of situations for\n    the user specified allele map encoding.\n    \"\"\"\n\n    def verify(self, ts, alleles):\n        for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)):\n            h1 = [v1.alleles[g] for g in v1.genotypes]\n            h2 = [v2.alleles[g] for g in v2.genotypes]\n            assert h1 == h2\n\n    def test_simple_01(self):\n        ts = msprime.simulate(5, mutation_rate=2, random_seed=3)\n        assert ts.num_sites > 3\n        valid_alleles = [\n            tskit.ALLELES_01,\n            (\"0\", \"1\", \"xry\"),\n            (\"xry\", \"0\", \"1\", \"xry\"),\n            tuple(str(j) for j in range(127)),\n            tuple([\"0\" for j in range(126)] + [\"1\"]),\n        ]\n        for alleles in valid_alleles:\n            self.verify(ts, alleles)\n\n    def test_simple_acgt(self):\n        ts = msprime.simulate(5, random_seed=3)\n        ts = msprime.mutate(\n            ts, rate=4, random_seed=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)\n        )\n        assert ts.num_sites > 3\n        valid_alleles = [\n            tskit.ALLELES_ACGT,\n            (\"A\", \"C\", \"T\", \"G\", \"AAAAAAAAAAAAAA\"),\n            (\"AA\", \"CC\", \"TT\", \"GG\", \"A\", \"C\", \"T\", \"G\"),\n        ]\n        for alleles in valid_alleles:\n            self.verify(ts, alleles)\n\n    def test_jukes_cantor(self):\n        ts = msprime.simulate(6, random_seed=1, mutation_rate=1)\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        valid_alleles = [\n            tskit.ALLELES_ACGT,\n            (\"A\", \"C\", \"T\", \"G\", \"AAAAAAAAAAAAAA\"),\n            (\"AA\", \"CC\", \"TT\", \"GG\", \"A\", \"C\", \"T\", \"G\"),\n        ]\n        for alleles in valid_alleles:\n            self.verify(ts, alleles)\n\n    def test_multichar_mutations(self):\n        ts = msprime.simulate(6, random_seed=1, recombination_rate=2)\n        ts = tsutil.insert_multichar_mutations(ts)\n        assert ts.num_sites > 5\n        all_alleles = set()\n        for var in ts.variants():\n            all_alleles.update(var.alleles)\n        all_alleles = tuple(all_alleles)\n        self.verify(ts, all_alleles)\n        self.verify(ts, all_alleles[::-1])\n\n    def test_simple_01_missing_data(self):\n        ts = msprime.simulate(6, mutation_rate=2, random_seed=3)\n        tables = ts.dump_tables()\n        # Add another sample node. This will be missing data everywhere.\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n        assert ts.num_sites > 3\n        valid_alleles = [\n            tskit.ALLELES_01,\n            (\"0\", \"1\", \"xry\"),\n            (\"xry\", \"0\", \"1\", \"xry\"),\n            tuple(str(j) for j in range(127)),\n            tuple([\"0\" for j in range(126)] + [\"1\"]),\n        ]\n        for alleles in valid_alleles:\n            self.verify(ts, alleles)\n\n\nclass TestBinaryTreeExample:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0      10\n    #      |    |\n    #  pos 2    9\n    #  anc A    T\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        return tables.tree_sequence()\n\n    def test_haplotypes(self):\n        H = list(self.ts().haplotypes())\n        assert H[0] == \"GT\"\n        assert H[1] == \"AC\"\n        assert H[2] == \"AC\"\n\n    def test_haplotypes_empty_interval(self):\n        ts = self.ts()\n        H = list(ts.haplotypes(left=4, right=5))\n        assert H == [\"\", \"\", \"\"]\n\n    def test_genotypes(self):\n        G = self.ts().genotype_matrix()\n        Gp = [[1, 0, 0], [0, 1, 1]]\n        np.testing.assert_array_equal(G, Gp)\n\n    def test_alignments_default(self):\n        A = list(self.ts().alignments())\n        assert A[0] == \"NNGNNNNNNT\"\n        assert A[1] == \"NNANNNNNNC\"\n        assert A[2] == \"NNANNNNNNC\"\n\n    def test_alignments_restricted(self):\n        ts = self.ts()\n        samples = ts.samples()\n        # Take the first 2 in reverse order\n        A = list(ts.alignments(left=1, right=9, samples=samples[1::-1]))\n        assert A[0] == \"NANNNNNN\"\n        assert A[1] == \"NGNNNNNN\"\n\n    def test_empty_samples(self):\n        ts = self.ts()\n        A = list(ts.alignments(samples=[]))\n        assert len(A) == 0\n\n    def test_non_sample_samples(self):\n        ts = self.ts()\n        assert list(ts.alignments(samples=[4])) == [\"NNANNNNNNT\"]\n        assert list(ts.alignments(samples=[3])) == [\"NNANNNNNNC\"]\n\n    def test_alignments_samples_order_preserved(self):\n        ts = self.ts()\n        # Custom non-default, unique order\n        rows = list(ts.alignments(samples=[2, 0, 1]))\n        assert rows == [\n            \"NNANNNNNNC\",  # sample 2\n            \"NNGNNNNNNT\",  # sample 0\n            \"NNANNNNNNC\",  # sample 1\n        ]\n\n    def test_variants_internal_nodes(self):\n        ts = self.ts()\n        # Root node 4: present across span, no mutations directly; always ancestral\n        vars4 = list(ts.variants(samples=[4]))\n        assert len(vars4) == 2\n        assert [v.alleles[v.genotypes[0]] for v in vars4] == [\"A\", \"T\"]\n        assert [v.has_missing_data for v in vars4] == [False, False]\n        # isolated_as_missing=False should not change results here\n        vars4_i = list(ts.variants(samples=[4], isolated_as_missing=False))\n        assert [v.alleles[v.genotypes[0]] for v in vars4_i] == [\"A\", \"T\"]\n        assert [v.has_missing_data for v in vars4_i] == [False, False]\n        # Internal node 3: mutation at site 1 gives derived state there\n        vars3 = list(ts.variants(samples=[3]))\n        assert len(vars3) == 2\n        assert [v.alleles[v.genotypes[0]] for v in vars3] == [\"A\", \"C\"]\n        assert [v.has_missing_data for v in vars3] == [False, False]\n        vars3_i = list(ts.variants(samples=[3], isolated_as_missing=False))\n        assert [v.alleles[v.genotypes[0]] for v in vars3_i] == [\"A\", \"C\"]\n        assert [v.has_missing_data for v in vars3_i] == [False, False]\n\n    def test_genotype_matrix_internal_nodes(self):\n        ts = self.ts()\n        import numpy as np\n\n        np.testing.assert_array_equal(\n            ts.genotype_matrix(samples=[4]), np.array([[0], [0]], dtype=np.int32)\n        )\n        np.testing.assert_array_equal(\n            ts.genotype_matrix(samples=[4], isolated_as_missing=False),\n            ts.genotype_matrix(samples=[4]),\n        )\n        np.testing.assert_array_equal(\n            ts.genotype_matrix(samples=[3]), np.array([[0], [1]], dtype=np.int32)\n        )\n        np.testing.assert_array_equal(\n            ts.genotype_matrix(samples=[3], isolated_as_missing=False),\n            ts.genotype_matrix(samples=[3]),\n        )\n\n    def test_haplotypes_internal_nodes(self):\n        ts = self.ts()\n        assert list(ts.haplotypes(samples=[4])) == [\"AT\"]\n        assert list(ts.haplotypes(samples=[4], isolated_as_missing=False)) == [\"AT\"]\n        assert list(ts.haplotypes(samples=[3])) == [\"AC\"]\n        assert list(ts.haplotypes(samples=[3], isolated_as_missing=False)) == [\"AC\"]\n\n    def test_alignments_missing_data_char(self):\n        A = list(self.ts().alignments(missing_data_character=\"x\"))\n        assert A[0] == \"xxGxxxxxxT\"\n        assert A[1] == \"xxAxxxxxxC\"\n        assert A[2] == \"xxAxxxxxxC\"\n\n    def test_alignments_reference_sequence(self):\n        ref = \"0123456789\"\n        A = list(self.ts().alignments(reference_sequence=ref))\n        assert A[0] == \"01G345678T\"\n        assert A[1] == \"01A345678C\"\n        assert A[2] == \"01A345678C\"\n\n    def test_alignments_reference_sequence_embedded_null(self):\n        # This is a total corner case, but just want to make sure\n        # we do something sensible.\n        ref = \"0123\" + \"\\0\" + \"56789\"\n        A = list(self.ts().alignments(reference_sequence=ref))\n        assert A[0] == \"01G3\\x005678T\"\n        assert A[1] == \"01A3\\x005678C\"\n        assert A[2] == \"01A3\\x005678C\"\n\n    def test_fasta_default(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            NNGNNNNNNT\n            >n1\n            NNANNNNNNC\n            >n2\n            NNANNNNNNC\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta()\n\n    def test_fasta_missing_Q(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            QQGQQQQQQT\n            >n1\n            QQAQQQQQQC\n            >n2\n            QQAQQQQQQC\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(missing_data_character=\"Q\")\n\n    def test_fasta_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            01G345678T\n            >n1\n            01A345678C\n            >n2\n            01A345678C\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(reference_sequence=ref)\n\n    def test_nexus_default(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 ??G??????T\n                n1 ??A??????C\n                n2 ??A??????C\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus()\n\n    def test_nexus_missing_N(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=N;\n              MATRIX\n                n0 NNGNNNNNNT\n                n1 NNANNNNNNC\n                n2 NNANNNNNNC\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(missing_data_character=\"N\")\n\n    def test_nexus_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 01G345678T\n                n1 01A345678C\n                n2 01A345678C\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(reference_sequence=ref)\n\n\nclass TestBinaryTreeWithReferenceExample:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0      10\n    #      |    |\n    #  pos 2    9\n    #  anc A    T\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        tables.reference_sequence.data = \"ACGTACGTAC\"\n        return tables.tree_sequence()\n\n    def test_alignments_default(self):\n        A = list(self.ts().alignments())\n        assert A[0] == \"ACGTACGTAT\"\n        assert A[1] == \"ACATACGTAC\"\n        assert A[2] == \"ACATACGTAC\"\n\n    def test_alignments_missing_data_char(self):\n        A = list(self.ts().alignments(missing_data_character=\"x\"))\n        assert A[0] == \"ACGTACGTAT\"\n        assert A[1] == \"ACATACGTAC\"\n        assert A[2] == \"ACATACGTAC\"\n\n    def test_alignments_restricted_embedded_reference(self):\n        ts = self.ts()\n        # Use embedded reference (\"ACGTACGTAC\"). Slice [1,9) -> \"CGTACGTA\".\n        A = list(ts.alignments(left=1, right=9))\n        # Site at pos 2 overlays: sample 0 gets 'G' (derived), others 'A' (ancestral).\n        assert A[0] == \"CGTACGTA\"\n        assert A[1] == \"CATACGTA\"\n        assert A[2] == \"CATACGTA\"\n\n    def test_alignments_reference_sequence(self):\n        ref = \"0123456789\"\n        A = list(self.ts().alignments(reference_sequence=ref))\n        assert A[0] == \"01G345678T\"\n        assert A[1] == \"01A345678C\"\n        assert A[2] == \"01A345678C\"\n\n    def test_fasta_default(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            ACGTACGTAT\n            >n1\n            ACATACGTAC\n            >n2\n            ACATACGTAC\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta()\n\n    def test_fasta_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            01G345678T\n            >n1\n            01A345678C\n            >n2\n            01A345678C\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(reference_sequence=ref)\n\n    def test_nexus_default(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 ACGTACGTAT\n                n1 ACATACGTAC\n                n2 ACATACGTAC\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus()\n\n    def test_nexus_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 01G345678T\n                n1 01A345678C\n                n2 01A345678C\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(reference_sequence=ref)\n\n\nclass TestMissingDataExample:\n    # 2.00┊   4     ┊\n    #     ┊ ┏━┻┓    ┊\n    # 1.00┊ ┃  3    ┊\n    #     ┊ ┃ ┏┻┓   ┊\n    # 0.00┊ 0 1 2 5 ┊\n    #     0        10\n    #      |      |\n    #  pos 2      9\n    #  anc A      T\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        return tables.tree_sequence()\n\n    def test_haplotypes(self):\n        H = list(self.ts().haplotypes())\n        assert H[0] == \"GT\"\n        assert H[1] == \"AC\"\n        assert H[2] == \"AC\"\n        assert H[3] == \"NN\"\n\n    def test_haplotypes_missing_data_char(self):\n        H = list(self.ts().haplotypes(missing_data_character=\"?\"))\n        assert H[0] == \"GT\"\n        assert H[1] == \"AC\"\n        assert H[2] == \"AC\"\n        assert H[3] == \"??\"\n\n    def test_genotypes(self):\n        G = self.ts().genotype_matrix()\n        Gp = [[1, 0, 0, -1], [0, 1, 1, -1]]\n        np.testing.assert_array_equal(G, Gp)\n\n    def test_alignments_default(self):\n        A = list(self.ts().alignments())\n        assert A[0] == \"NNGNNNNNNT\"\n        assert A[1] == \"NNANNNNNNC\"\n        assert A[2] == \"NNANNNNNNC\"\n        assert A[3] == \"NNNNNNNNNN\"\n\n    def test_alignments_impute_missing(self):\n        ref = \"N\" * 10\n        A = list(self.ts().alignments(reference_sequence=ref, isolated_as_missing=False))\n        assert A[0] == \"NNGNNNNNNT\"\n        assert A[1] == \"NNANNNNNNC\"\n        assert A[2] == \"NNANNNNNNC\"\n        assert A[3] == \"NNANNNNNNT\"\n\n    def test_alignments_missing_char(self):\n        A = list(self.ts().alignments(missing_data_character=\"z\"))\n        assert A[0] == \"zzGzzzzzzT\"\n        assert A[1] == \"zzAzzzzzzC\"\n        assert A[2] == \"zzAzzzzzzC\"\n        assert A[3] == \"zzzzzzzzzz\"\n\n    def test_alignments_missing_char_ref(self):\n        A = list(self.ts().alignments())\n        assert A[0] == \"NNGNNNNNNT\"\n        assert A[1] == \"NNANNNNNNC\"\n        assert A[2] == \"NNANNNNNNC\"\n        assert A[3] == \"NNNNNNNNNN\"\n\n    def test_alignments_reference_sequence(self):\n        ref = \"0123456789\"\n        A = list(self.ts().alignments(reference_sequence=ref))\n        assert A[0] == \"01G345678T\"\n        assert A[1] == \"01A345678C\"\n        assert A[2] == \"01A345678C\"\n        assert A[3] == \"NNNNNNNNNN\"\n\n    def test_alignments_reference_sequence_missing_data_char(self):\n        ref = \"0123456789\"\n        A = list(\n            self.ts().alignments(reference_sequence=ref, missing_data_character=\"Q\")\n        )\n        assert A[0] == \"01G345678T\"\n        assert A[1] == \"01A345678C\"\n        assert A[2] == \"01A345678C\"\n        assert A[3] == \"QQQQQQQQQQ\"\n\n    def test_alignments_left_right_subinterval(self):\n        ts = self.ts()\n        # Use a custom reference and a subinterval [2, 8)\n        ref = \"A\" * 10\n        got = list(ts.alignments(reference_sequence=ref, left=2, right=8))\n        assert got == [\"GAAAAA\", \"AAAAAA\", \"AAAAAA\", \"NNNNNN\"]\n\n    def test_fasta_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            01G345678T\n            >n1\n            01A345678C\n            >n2\n            01A345678C\n            >n5\n            NNNNNNNNNN\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(reference_sequence=ref)\n\n    def test_fasta_reference_sequence_missing_data_char(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            01G345678T\n            >n1\n            01A345678C\n            >n2\n            01A345678C\n            >n5\n            QQQQQQQQQQ\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(\n            reference_sequence=ref, missing_data_character=\"Q\"\n        )\n\n    def test_fasta_impute_missing(self):\n        ref = \"N\" * 10\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            NNGNNNNNNT\n            >n1\n            NNANNNNNNC\n            >n2\n            NNANNNNNNC\n            >n5\n            NNANNNNNNT\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(\n            reference_sequence=ref, isolated_as_missing=False\n        )\n\n    # Note: the nexus tree output isn't compatible with our representation of\n    # missing data as trees with isolated roots (newick parsers won't accept\n    # this as valid input), so we set include_trees=False for these examples.\n    def test_nexus_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=4;\n              TAXLABELS n0 n1 n2 n5;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 01G345678T\n                n1 01A345678C\n                n2 01A345678C\n                n5 ??????????\n              ;\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(\n            reference_sequence=ref, include_trees=False\n        )\n\n    def test_nexus_reference_sequence_missing_data_char(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=4;\n              TAXLABELS n0 n1 n2 n5;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=Q;\n              MATRIX\n                n0 01G345678T\n                n1 01A345678C\n                n2 01A345678C\n                n5 QQQQQQQQQQ\n              ;\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(\n            reference_sequence=ref,\n            missing_data_character=\"Q\",\n            include_trees=False,\n        )\n\n    def test_nexus_impute_missing(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=4;\n              TAXLABELS n0 n1 n2 n5;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 01G345678T\n                n1 01A345678C\n                n2 01A345678C\n                n5 01A345678T\n              ;\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(\n            reference_sequence=ref,\n            isolated_as_missing=False,\n            include_trees=False,\n        )\n\n\nclass TestAlignmentsPartialIsolation:\n    def build_ts(self):\n        # sequence length 10, sample node covers only [3,7)\n        tables = tskit.TableCollection(10)\n        parent = tables.nodes.add_row(time=1)\n        child = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.edges.add_row(left=3, right=7, parent=parent, child=child)\n        # Add a site inside the covered region with a mutation on the child\n        site_id = tables.sites.add_row(position=5, ancestral_state=\"A\")\n        tables.mutations.add_row(site=site_id, node=child, derived_state=\"G\")\n        tables.sort()\n        return tables.tree_sequence()\n\n    def test_whole_window_missing_at_ends(self):\n        ts = self.build_ts()\n        ref = \"0123456789\"\n        # Node is isolated outside [3,7): expect missing there; inside use ref,\n        # with site overlay at 5\n        got = list(ts.alignments(samples=[1], reference_sequence=ref))\n        assert got == [\"NNN34G6NNN\"]\n\n    def test_subwindow(self):\n        ts = self.build_ts()\n        ref = \"0123456789\"\n        # Request [2,8): expect missing at 2 and 7, ref inside, site overlay at 5\n        got = list(ts.alignments(samples=[1], reference_sequence=ref, left=2, right=8))\n        assert got == [\"N34G6N\"]\n\n\nclass TestMultiRootExample:\n    # 1.00┊  4   5  ┊\n    #     ┊ ┏┻┓ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 ┊\n    #     0        10\n    #       |     |\n    #  pos  2     8\n    #  anc  G     C\n    @tests.cached_example\n    def ts(self):\n        tree = tskit.Tree.generate_balanced(4, arity=2, span=10)\n        tables = tree.tree_sequence.dump_tables()\n        edges = tables.edges.copy()\n        tables.edges.clear()\n        for edge in edges:\n            if edge.parent != 6:\n                tables.edges.append(edge)\n        tables.sites.add_row(2, ancestral_state=\"G\")\n        tables.sites.add_row(8, ancestral_state=\"C\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        tables.mutations.add_row(site=1, node=5, derived_state=\"A\")\n        return tables.tree_sequence()\n\n    def test_haplotypes(self):\n        H = list(self.ts().haplotypes())\n        assert H[0] == \"TC\"\n        assert H[1] == \"GC\"\n        assert H[2] == \"GA\"\n        assert H[3] == \"GA\"\n\n    def test_genotypes(self):\n        G = self.ts().genotype_matrix()\n        Gp = [[1, 0, 0, 0], [0, 0, 1, 1]]\n        np.testing.assert_array_equal(G, Gp)\n\n    def test_alignments_default(self):\n        A = list(self.ts().alignments())\n        assert A[0] == \"NNTNNNNNCN\"\n        assert A[1] == \"NNGNNNNNCN\"\n        assert A[2] == \"NNGNNNNNAN\"\n        assert A[3] == \"NNGNNNNNAN\"\n\n    def test_alignments_N_ref(self):\n        A = list(self.ts().alignments(reference_sequence=\"N\" * 10))\n        assert A[0] == \"NNTNNNNNCN\"\n        assert A[1] == \"NNGNNNNNCN\"\n        assert A[2] == \"NNGNNNNNAN\"\n        assert A[3] == \"NNGNNNNNAN\"\n\n    def test_alignments_multichar_allele_raises(self):\n        tables = self.ts().dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        tables.sites.add_row(2, ancestral_state=\"AC\")\n        tables.sort()\n        ts_bad = tables.tree_sequence()\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_BAD_ALLELE_LENGTH\"):\n            next(ts_bad.alignments())\n\n    def test_fasta_reference_sequence(self):\n        ref = \"0123456789\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            >n0\n            01T34567C9\n            >n1\n            01G34567C9\n            >n2\n            01G34567A9\n            >n3\n            01G34567A9\n            \"\"\"\n        )\n        assert expected == self.ts().as_fasta(reference_sequence=ref)\n\n\nclass TestInternalNodeMissingness:\n    @tests.cached_example\n    def ts_missing(self):\n        # Start from a balanced tree with span=10\n        # then delete ancestry over [4,6]\n        # Internal node is then isolated across [4,6)\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.delete_intervals([[4, 6]], simplify=False)\n        # Ensure there are sites within the deleted region\n        tables.sites.add_row(4, ancestral_state=\"A\")\n        tables.sites.add_row(5, ancestral_state=\"A\")\n        tables.sites.add_row(5.999999, ancestral_state=\"A\")\n        tables.sort()\n        return tables.tree_sequence()\n\n    def test_variants_internal_isolated(self):\n        ts = self.ts_missing()\n        # Choose an internal node id (3) in this balanced tree\n        vars3 = list(ts.variants(samples=[3], isolated_as_missing=True))\n        assert len(vars3) == 3\n        assert all(v.has_missing_data for v in vars3)\n        assert all(v.alleles[-1] is None for v in vars3)\n        assert all(v.genotypes[0] == tskit.MISSING_DATA for v in vars3)\n        # With imputation, internal isolated node maps to ancestral\n        vars3_i = list(ts.variants(samples=[3], isolated_as_missing=False))\n        assert [v.genotypes[0] for v in vars3_i] == [0, 0, 0]\n        assert not any(v.has_missing_data for v in vars3_i)\n\n    def test_genotype_matrix_internal_isolated(self):\n        import numpy as np\n\n        ts = self.ts_missing()\n        Gm = ts.genotype_matrix(samples=[3], isolated_as_missing=True)\n        np.testing.assert_array_equal(\n            Gm.flatten(), np.array([-1, -1, -1], dtype=np.int32)\n        )\n        Gi = ts.genotype_matrix(samples=[3], isolated_as_missing=False)\n        np.testing.assert_array_equal(Gi.flatten(), np.array([0, 0, 0], dtype=np.int32))\n\n    def test_haplotypes_internal_isolated(self):\n        ts = self.ts_missing()\n        H = list(ts.haplotypes(samples=[3], isolated_as_missing=True))\n        assert H == [\"NNN\"]\n        Hq = list(\n            ts.haplotypes(\n                samples=[3], isolated_as_missing=True, missing_data_character=\"?\"\n            )\n        )\n        assert Hq == [\"???\"]\n\n\nclass TestVariantTopologyCombos:\n    def test_all_parent_child_combinations(self):\n        # Build a simple tree with one site and add an isolated node. Then request\n        # genotypes for nodes that realise each combination of (parent, left_child)\n        # nullness in tsk_variant_mark_missing_any.\n        tables = tskit.Tree.generate_balanced(3, span=10).tree_sequence.dump_tables()\n        # Add an isolated node (no edges present anywhere)\n        iso = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        # Add a single site with ancestral allele only\n        tables.sites.add_row(1.0, ancestral_state=\"A\")\n        ts = tables.tree_sequence()\n\n        tree = ts.first()\n        # Select a root that has children (exclude isolated root(s))\n        root_candidates = [r for r in tree.roots if tree.left_child(r) != tskit.NULL]\n        assert len(root_candidates) >= 1\n        root = root_candidates[0]  # parent NULL, left_child != NULL\n        # Choose a leaf sample present in the tree: parent != NULL, left_child NULL\n        leaf = next(\n            u\n            for u in ts.samples()\n            if tree.parent(u) != tskit.NULL and tree.left_child(u) == tskit.NULL\n        )\n        # Choose an internal non-root node: parent != NULL, left_child != NULL\n        internal = next(\n            u\n            for u in tree.nodes()\n            if tree.parent(u) != tskit.NULL and tree.left_child(u) != tskit.NULL\n        )\n\n        v = next(\n            ts.variants(samples=[iso, root, leaf, internal], isolated_as_missing=True)\n        )\n        # Only the isolated node should be marked missing; others should be ancestral (0)\n        np.testing.assert_array_equal(\n            v.genotypes, np.array([tskit.MISSING_DATA, 0, 0, 0], dtype=np.int32)\n        )\n        assert v.has_missing_data\n\n\nclass TestInternalNode:\n    @tests.cached_example\n    def ts_single_tree(self):\n        return tskit.Tree.generate_balanced(3, span=10).tree_sequence\n\n    @tests.cached_example\n    def ts_isolated_internal(self):\n        # Add a non-sample internal node u=6 (no edges), and add sites.\n        ts = self.ts_single_tree()\n        tables = ts.dump_tables()\n        u = tables.nodes.add_row(flags=0, time=1)\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        return tables.tree_sequence(), u\n\n    def test_isolated_internal_node_all_missing(self):\n        ts, u = self.ts_isolated_internal()\n        # Both sites missing for internal node\n        V = list(ts.variants(samples=[u], isolated_as_missing=True))\n        assert [v.genotypes[0] for v in V] == [tskit.MISSING_DATA] * len(V)\n        assert all(v.has_missing_data and v.alleles[-1] is None for v in V)\n        np.testing.assert_array_equal(\n            ts.genotype_matrix(samples=[u], isolated_as_missing=True).flatten(),\n            np.array([-1, -1], dtype=np.int32),\n        )\n        assert list(ts.haplotypes(samples=[u], isolated_as_missing=True)) == [\"NN\"]\n        assert [\n            v.genotypes[0] for v in ts.variants(samples=[u], isolated_as_missing=False)\n        ] == [0, 0]\n\n    @tests.cached_example\n    def ts_dead_branch(self):\n        # Create a dead branch: two non-sample nodes x (root) -> y (leaf) over full span.\n        # This topology is not reachable from sample roots, but present in the\n        # tree arrays for the current tree.\n        #\n        # Dead branch (x to y) unattached to sample-reachable roots\n        # 2.00┊      x         ┊\n        #     ┊      ┃         ┊\n        # 1.00┊      y         ┊\n        #     ┊                ┊\n        # 0.00┊ 0     1     2  ┊  (main balanced tree; x/y not reachable)\n        #     0             10\n        # Sites: pos 3 (anc=A), pos 7 (anc=C); mutation at pos 7 on x 'T'\n        ts = self.ts_single_tree()\n        tables = ts.dump_tables()\n        x = tables.nodes.add_row(flags=0, time=2)\n        y = tables.nodes.add_row(flags=0, time=1)\n        tables.edges.add_row(0, tables.sequence_length, parent=x, child=y)\n        # Sites to probe ancestral/derived states\n        tables.sites.add_row(3, ancestral_state=\"A\")\n        s1 = tables.sites.add_row(7, ancestral_state=\"C\")\n        # Mutation on x at s1\n        tables.mutations.add_row(site=s1, node=x, derived_state=\"T\")\n        tables.sort()\n        return tables.tree_sequence(), x, y\n\n    def test_dead_branch_internal_and_leaf(self):\n        ts, x, y = self.ts_dead_branch()\n        # y is a leaf on a dead branch (no children, parent=x),\n        # so not isolated (parent != NULL).\n        Vy = list(ts.variants(samples=[y], isolated_as_missing=True))\n        # y inherits ancestral at site 3 and derived at site 7 via parent x\n        assert [vy.alleles[vy.genotypes[0]] for vy in Vy] == [\"A\", \"T\"]\n        assert not any(vy.has_missing_data for vy in Vy)\n        # x is internal (has child), so not isolated.\n        # At site 7 it is mutated; site 3 ancestral.\n        Vx = list(ts.variants(samples=[x], isolated_as_missing=True))\n        assert [vx.alleles[vx.genotypes[0]] for vx in Vx] == [\"A\", \"T\"]\n        assert not any(vx.has_missing_data for vx in Vx)\n\n    @tests.cached_example\n    def ts_presence_switch(self):\n        # Construct two trees with a breakpoint at 5. Internal node a present on [0,5),\n        # absent on [5,10).\n        #\n        # Two trees; breakpoint at 5\n        # Tree 0: [0,5)                Tree 1: [5,10)\n        # 1.00┊  a                     1.00┊  b\n        # 0.00┊  0    (1 isolated)     0.00┊  0   (1 isolated)\n        #     0         5              5          10\n        # Sites: pos 2 (anc=A), pos 7 (anc=C)\n        # Mutation: pos 2 on a 'G'\n        tables = tskit.TableCollection(10)\n        s0 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        s1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)  # s1 (isolated)\n        a = tables.nodes.add_row(flags=0, time=1)\n        b = tables.nodes.add_row(flags=0, time=1)\n        # Left tree: a->s0 on [0,5); right tree: b->s0 on [5,10)\n        tables.edges.add_row(0, 5, parent=a, child=s0)\n        tables.edges.add_row(5, 10, parent=b, child=s0)\n        # s1 remains isolated across entire span\n        # Sites: one on each side of breakpoint\n        s_left = tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(7, ancestral_state=\"C\")\n        # Mutation on a at s_left so a has derived there\n        tables.mutations.add_row(site=s_left, node=a, derived_state=\"G\")\n        tables.sort()\n        ts = tables.tree_sequence()\n        return ts, a, s1\n\n    def test_internal_presence_switch(self):\n        ts, a, _ = self.ts_presence_switch()\n        # Site at 2 (left): a present & derived; Site at 7 (right): a absent - isolated\n        V = list(ts.variants(samples=[a], isolated_as_missing=True))\n        assert [v.genotypes[0] for v in V] == [1, tskit.MISSING_DATA]\n        assert [\n            (v.alleles[v.genotypes[0]] if v.genotypes[0] != -1 else None) for v in V\n        ] == [\n            \"G\",\n            None,\n        ]\n        # Imputed ancestral at right site\n        assert [\n            vi.genotypes[0] for vi in ts.variants(samples=[a], isolated_as_missing=False)\n        ] == [1, 0]\n        assert list(ts.haplotypes(samples=[a], isolated_as_missing=False)) == [\"GC\"]\n        assert list(ts.haplotypes(samples=[a], isolated_as_missing=True)) == [\"GN\"]\n\n    def test_isolated_sample_mark_missing_any(self):\n        ts, _, s1 = self.ts_presence_switch()\n        V = list(ts.variants(samples=[s1], isolated_as_missing=True))\n        assert len(V) == 2\n        assert [v.genotypes[0] for v in V] == [tskit.MISSING_DATA] * len(V)\n        assert all(v.has_missing_data and v.alleles[-1] is None for v in V)\n        np.testing.assert_array_equal(\n            ts.genotype_matrix(samples=[s1], isolated_as_missing=True),\n            np.full((2, 1), tskit.MISSING_DATA, dtype=np.int32),\n        )\n\n    @tests.cached_example\n    def ts_mutation_overrides_missing(self):\n        # Similar to presence_switch, but add a mutation on a at the right-side site.\n        # Even though a is absent over [5,10), a mutation at the site (pos 7) makes\n        # the allele known (overrides missingness at the site).\n        ts, a, _ = self.ts_presence_switch()\n        tables = ts.dump_tables()\n        # Add a mutation on a at the right-side site (position 7)\n        # Find the site id for position 7\n        pos_to_id = {s.position: s.id for s in ts.sites()}\n        s_right = pos_to_id[7.0]\n        tables.mutations.add_row(site=s_right, node=a, derived_state=\"T\")\n        tables.sort()\n        return tables.tree_sequence(), a\n\n    def test_mutation_overrides_missing(self):\n        ts, a = self.ts_mutation_overrides_missing()\n        # Now both sites have derived alleles for a,\n        # even though a is absent on the right interval.\n        V = list(ts.variants(samples=[a], isolated_as_missing=True))\n        assert [v.alleles[v.genotypes[0]] for v in V] == [\"G\", \"T\"]\n        assert not any(v.has_missing_data for v in V)\n\n    def test_mixed_samples_ordering(self):\n        # Combine internal and sample nodes; ensure order and mapping preserved\n        ts, a, _ = self.ts_presence_switch()\n        samples = [a] + list(ts.samples())  # [internal, sample0, sample1]\n        # Build expected by joining per-node results\n        rows = []\n        for u in samples:\n            row = [\n                v.genotypes[0]\n                for v in ts.variants(samples=[u], isolated_as_missing=True)\n            ]\n            rows.append(row)\n        # Single call with mixed samples should match stacked rows\n        # Build matrix via variants over mixed sample list\n        G_rows = []\n        for v in ts.variants(samples=samples, isolated_as_missing=True):\n            G_rows.append(v.genotypes.tolist())\n        # Compare\n        assert G_rows == [list(col) for col in zip(*rows)]\n\n    def test_variants_copy_false_internal(self):\n        ts, a, _ = self.ts_presence_switch()\n        it = ts.variants(samples=[a], isolated_as_missing=True, copy=False)\n        v1 = next(it)\n        # Hold onto v1, decode next site updates same object\n        val1 = (v1.site.position, v1.genotypes.copy())\n        v2 = next(it)\n        assert v1 is v2\n        # v1 now reflects second site\n        assert v1.site.position != val1[0]\n\n\nclass TestAlignmentsErrors:\n    @tests.cached_example\n    def simplest_ts(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=1, time=0)\n        return tables.tree_sequence()\n\n    def test_non_discrete_genome(self):\n        ts = tskit.TableCollection(1.1).tree_sequence()\n        assert not ts.discrete_genome\n        with pytest.raises(ValueError, match=\"defined for discrete genomes\"):\n            next(ts.alignments())\n\n    @pytest.mark.parametrize(\"ref_length\", [1, 9, 11])\n    def test_reference_length_mismatch(self, ref_length):\n        tables = tskit.TableCollection(10)\n        tables.reference_sequence.data = \"A\" * ref_length\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            ValueError, match=\"must be equal to the tree sequence length\"\n        ):\n            next(ts.alignments())\n\n    @pytest.mark.parametrize(\"ref\", [\"\", \"xy\"])\n    def test_reference_sequence_length_mismatch(self, ref):\n        ts = self.simplest_ts()\n        with pytest.raises(\n            ValueError, match=\"must be equal to the tree sequence length\"\n        ):\n            next(ts.alignments(reference_sequence=ref))\n\n    @pytest.mark.parametrize(\"ref\", [\"À\", \"┃\", \"α\"])\n    def test_non_ascii_references(self, ref):\n        ts = self.simplest_ts()\n        with pytest.raises(UnicodeEncodeError):\n            next(ts.alignments(reference_sequence=ref))\n\n    @pytest.mark.parametrize(\"ref\", [\"À\", \"┃\", \"α\"])\n    def test_non_ascii_embedded_references(self, ref):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.reference_sequence.data = ref\n        ts = tables.tree_sequence()\n        with pytest.raises(UnicodeEncodeError):\n            next(ts.alignments())\n\n    @pytest.mark.parametrize(\"missing_data_char\", [\"À\", \"┃\", \"α\"])\n    def test_non_ascii_missing_data_char(self, missing_data_char):\n        ts = self.simplest_ts()\n        with pytest.raises(UnicodeEncodeError):\n            next(ts.alignments(missing_data_character=missing_data_char))\n\n    def test_multichar_missing_data_char(self):\n        ts = self.simplest_ts()\n        # Multi-character missing symbol is invalid\n        with pytest.raises(TypeError):\n            next(ts.alignments(reference_sequence=\"A\", missing_data_character=\"NN\"))\n\n    def test_missing_char_clashes_with_allele(self):\n        # If the missing character equals an allele present at a site, error\n        tables = tskit.TableCollection(3)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(1, ancestral_state=\"A\")\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_MISSING_CHAR_COLLISION\"):\n            next(ts.alignments(missing_data_character=\"A\"))\n\n    def test_invalid_negative_node(self):\n        ts = self.simplest_ts()\n        with pytest.raises(tskit.LibraryError, match=\"out of bounds\"):\n            next(ts.alignments(samples=[-1]))\n\n    def test_invalid_out_of_bounds_node(self):\n        ts = self.simplest_ts()\n        with pytest.raises(tskit.LibraryError, match=\"out of bounds\"):\n            next(ts.alignments(samples=[ts.num_nodes]))\n\n    def test_bad_left(self):\n        ts = tskit.TableCollection(10).tree_sequence()\n        with pytest.raises(ValueError, match=\"integer\"):\n            next(ts.alignments(left=0.1))\n\n    def test_bad_right(self):\n        ts = tskit.TableCollection(10).tree_sequence()\n        with pytest.raises(ValueError, match=\"integer\"):\n            next(ts.alignments(right=1.1))\n\n    def test_bad_restricted(self):\n        tables = tskit.TableCollection(10)\n        tables.reference_sequence.data = \"A\" * 7\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            ValueError, match=\"must be equal to the tree sequence length\"\n        ):\n            next(ts.alignments(right=8))\n\n    def test_no_samples_default(self):\n        # No sample nodes: default alignments result is empty\n        tables = tskit.TableCollection(5)\n        # Add a non-sample node only\n        tables.nodes.add_row(flags=0, time=0)\n        ts = tables.tree_sequence()\n        A = list(ts.alignments())\n        assert len(A) == 0\n\n    def test_boundary_sites_left_and_right(self):\n        # Sites at the boundaries 0 and L-1 overlay correctly\n        L = 5\n        tables = tskit.TableCollection(L)\n        a = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        b = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        p = tables.nodes.add_row(time=1)\n        tables.edges.add_row(0, L, parent=p, child=a)\n        s0 = tables.sites.add_row(0, ancestral_state=\"A\")\n        s4 = tables.sites.add_row(L - 1, ancestral_state=\"T\")\n        # Mutate at position 0 for node a; position L-1 for node b\n        tables.mutations.add_row(site=s0, node=a, derived_state=\"G\")\n        tables.mutations.add_row(site=s4, node=b, derived_state=\"C\")\n        ts = tables.tree_sequence()\n        A = list(ts.alignments(reference_sequence=\"N\" * L))\n        assert A[0] == \"GNNNT\"\n        assert A[1] == \"NNNNC\"\n\n    def test_reference_sequence_too_short_with_interval(self):\n        # Explicit ref shorter than [left,right) span should error\n        tables = tskit.TableCollection(10)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            ValueError, match=\"must be equal to the tree sequence length\"\n        ):\n            next(ts.alignments(reference_sequence=\"A\" * 5, left=2, right=8))\n\n    def test_reference_sequence_length_must_match_sequence(self):\n        # Explicit ref length must match full sequence length\n        tables = tskit.TableCollection(10)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            ValueError, match=\"must be equal to the tree sequence length\"\n        ):\n            next(ts.alignments(reference_sequence=\"A\" * 7, left=2, right=8))\n\n\nclass TestAlignmentExamples:\n    @pytest.mark.parametrize(\"ts\", get_example_discrete_genome_tree_sequences())\n    def test_defaults(self, ts):\n        A = list(ts.alignments())\n        assert len(A) == ts.num_samples\n        H = list(ts.haplotypes())\n        pos = ts.tables.sites.position.astype(int)\n        for a, h in map(np.array, zip(A, H)):\n            last = 0\n            for j, x in enumerate(pos):\n                assert a[last:x] == \"N\" * (x - last)\n                assert a[x] == h[j]\n                last = x + 1\n\n    @pytest.mark.parametrize(\"ts\", get_example_discrete_genome_tree_sequences())\n    def test_reference_sequence(self, ts):\n        ref = tskit.random_nucleotides(ts.sequence_length, seed=1234)\n        A = list(ts.alignments(reference_sequence=ref, isolated_as_missing=False))\n        assert len(A) == ts.num_samples\n        H = list(ts.haplotypes(isolated_as_missing=False))\n        pos = ts.tables.sites.position.astype(int)\n        for a, h in map(np.array, zip(A, H)):\n            last = 0\n            for j, x in enumerate(pos):\n                assert a[last:x] == ref[last:x]\n                assert a[x] == h[j]\n                last = x + 1\n            assert a[last:] == ref[last:]\n\n\n# Reference implementation for alignments (tests only)\ndef _reference_alignments(\n    ts,\n    *,\n    reference_sequence=None,\n    missing_data_character=None,\n    isolated_as_missing=None,\n    samples=None,\n    left=None,\n    right=None,\n):\n    if not ts.discrete_genome:\n        raise ValueError(\"sequence alignments only defined for discrete genomes\")\n    interval = ts._check_genomic_range(left, right, ensure_integer=True)\n    missing_data_character = (\n        \"N\" if missing_data_character is None else missing_data_character\n    )\n    if isolated_as_missing is None:\n        isolated_as_missing = True\n    L = interval.span\n    sample_ids = ts.samples() if samples is None else list(samples)\n    if reference_sequence is None:\n        if ts.has_reference_sequence():\n            reference_sequence = ts.reference_sequence.data[\n                interval.left : interval.right\n            ]\n        else:\n            reference_sequence = missing_data_character * L\n    if len(reference_sequence) != L:\n        raise ValueError(\n            \"The reference sequence must be equal to the tree sequence length\"\n        )\n    ref_array = np.frombuffer(reference_sequence.encode(\"ascii\"), dtype=np.int8)\n    if len(sample_ids) == 0:\n        return list()\n\n    H, (first_site_id, last_site_id) = ts._haplotypes_array(\n        interval=interval,\n        isolated_as_missing=isolated_as_missing,\n        missing_data_character=missing_data_character,\n        samples=sample_ids,\n    )\n    site_pos = ts.sites_position.astype(np.int64)[first_site_id : last_site_id + 1]\n    missing_val = ord(missing_data_character)\n    a = np.empty(L, dtype=np.int8)\n    for i, u in enumerate(sample_ids):\n        a[:] = ref_array\n        if isolated_as_missing:\n            for t in ts.trees():\n                li = max(interval.left, int(t.interval.left))\n                ri = min(interval.right, int(t.interval.right))\n                if ri > li and t.is_isolated(u):\n                    a[li - interval.left : ri - interval.left] = missing_val\n        if H.shape[1] > 0:\n            a[site_pos - interval.left] = H[i]\n        yield a.tobytes().decode(\"ascii\")\n\n\nclass TestAlignmentsReferenceImpl:\n    @pytest.mark.parametrize(\n        \"case\",\n        [\n            \"default\",\n            \"isolated_false_with_ref\",\n            \"interval\",\n        ],\n    )\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_against_python_reference(self, ts, case):\n        kwargs = {}\n        L = int(ts.sequence_length)\n        if case == \"isolated_false_with_ref\":\n            kwargs = {\"reference_sequence\": \"A\" * L, \"isolated_as_missing\": False}\n        elif case == \"interval\":\n            L = int(ts.sequence_length)\n            if L <= 1:\n                left, right = 0, L\n            else:\n                left, right = 1, max(2, L - 1)\n            kwargs = {\"left\": left, \"right\": right}\n        try:\n            got = list(ts.alignments(**kwargs))\n        except Exception as e1:\n            ex1 = e1\n            got = None\n        else:\n            ex1 = None\n        try:\n            exp = list(_reference_alignments(ts, **kwargs))\n        except Exception as e2:\n            ex2 = e2\n            exp = None\n        else:\n            ex2 = None\n        if ex1 or ex2:\n            # The C backend may be stricter than the Python reference in some\n            # invalid-data situations (e.g. multi-character alleles or other\n            # format issues), and can raise a LibraryError or FileFormatError\n            # where the reference raises TypeError/ValueError, or even succeeds.\n            assert ex1 is not None\n            if ex2 is None:\n                return\n            if isinstance(ex1, tskit.LibraryError) and isinstance(\n                ex2, (TypeError, ValueError)\n            ):\n                return\n            assert type(ex1) is type(ex2)\n        else:\n            assert got == exp\n\n\n#\n# Tests for allele_remap\n#\n@pytest.mark.parametrize(\n    \"alleles_from, alleles_to, allele_map\",\n    [\n        # Case 1: alleles_to is longer than alleles_from.\n        (\n            [\"A\", \"C\", \"G\", \"T\"],\n            [\"G\", \"C\"],\n            np.array([2, 1, 0, 3], dtype=\"uint32\"),\n        ),\n        # Case 2: alleles_to is shorter than alleles_from.\n        (\n            [\"G\", \"C\"],\n            [\"A\", \"C\", \"G\", \"T\"],\n            np.array([2, 1], dtype=\"uint32\"),\n        ),\n        # Case 3: alleles_to is empty.\n        (\n            [\"A\", \"C\", \"G\", \"T\"],\n            [],\n            np.array([0, 1, 2, 3], dtype=\"uint32\"),\n        ),\n        # Case 4: alleles_from is empty.\n        (\n            [],\n            [\"A\", \"C\", \"G\", \"T\"],\n            np.array([], dtype=\"uint32\"),\n        ),\n        # Case 5: Both lists are empty.\n        (\n            [],\n            [],\n            np.array([], dtype=\"uint32\"),\n        ),\n        # Case 6: Both lists are tuples.\n        (\n            (\"G\", \"C\"),\n            (\"A\", \"C\", \"G\", \"T\"),\n            np.array([2, 1], dtype=\"uint32\"),\n        ),\n        # Case 7: Both lists are numpy arrays.\n        (\n            np.array((\"G\", \"C\")),\n            np.array((\"A\", \"C\", \"G\", \"T\")),\n            np.array([2, 1], dtype=\"uint32\"),\n        ),\n        # Case 8: Lists are of two different types.\n        (\n            np.array((\"G\", \"C\")),\n            [\"A\", \"C\", \"G\", \"T\"],\n            np.array([2, 1], dtype=\"uint32\"),\n        ),\n        # Case 9: Lists contain elements of arbitrary types.\n        (\n            [\"ABBA\", \"CDCD\"],\n            [\"ABBA\", \"CDCD\", \"EFEF\", \"GG\", 18],\n            np.array([0, 1], dtype=\"uint32\"),\n        ),\n        # Case 10: Lists contain unicode characters.\n        (\n            [\"\\u1f1e8\", \"\\u1f1eC\"],\n            [\"\\u1f1eC\", \"\\u1f1e8\", \"\\u1f1e6\", \"\\u1f1f3\"],\n            np.array([1, 0], dtype=\"uint32\"),\n        ),\n    ],\n)\ndef test_allele_remap(alleles_from, alleles_to, allele_map):\n    assert np.array_equal(allele_map, allele_remap(alleles_from, alleles_to))\n\n\nclass TestVariant:\n    # Much more in-depth testing of variant decoding is done via the ts.variants\n    # method as it existed before this class was publicly creatable.\n    def test_variant_init(self, ts_fixture):\n        v = tskit.Variant(ts_fixture)\n        assert np.array_equal(v.samples, np.array(ts_fixture.samples()))\n        assert v.alleles == ()\n        assert v.num_alleles == 0\n        assert v.isolated_as_missing\n        v = tskit.Variant(ts_fixture, samples=[43, 1])\n        assert np.array_equal(v.samples, np.array([43, 1]))\n        v = tskit.Variant(ts_fixture, alleles=(\"A\", \"💩\"))\n        assert v.alleles == (\"A\", \"💩\")\n        v = tskit.Variant(ts_fixture, isolated_as_missing=False)\n        assert not v.isolated_as_missing\n\n    def test_not_decoded(self, ts_fixture):\n        variant = tskit.Variant(ts_fixture)\n        assert variant.index == tskit.NULL\n        with pytest.raises(ValueError, match=\"not yet been decoded\"):\n            _ = variant.site\n        assert variant.alleles == ()\n        with pytest.raises(ValueError, match=\"not yet been decoded\"):\n            assert variant.genotypes\n        assert not variant.has_missing_data\n        assert variant.num_alleles == 0\n        with pytest.raises(ValueError, match=\"not yet been decoded\"):\n            _ = variant.position\n        assert np.array_equal(variant.samples, np.array(ts_fixture.samples()))\n\n    def test_variant_decode(self, ts_fixture):\n        v = tskit.Variant(ts_fixture)\n        v.decode(2)\n        assert v.index == 2\n        assert np.array_equal(v.samples, np.array(ts_fixture.samples()))\n        assert v.alleles == (\"A\", \"T\", \"G\", \"C\", None)\n        # No need to check contents as done in other tests\n        assert len(v.genotypes) == ts_fixture.num_samples\n\n    def test_variant_num_missing(self, ts_fixture):\n        variant = next(ts_fixture.variants())\n        assert variant.num_missing > 0\n        assert variant.num_missing == np.sum(variant.genotypes == -1)\n\n    def test_variant_counts(self, ts_fixture):\n        variant = next(ts_fixture.variants())\n        assert len(variant.alleles) > 2\n        assert None in variant.alleles\n        counts = variant.counts()\n        assert len(counts) == len(variant.alleles)\n        assert np.sum(list(counts.values())) == ts_fixture.num_samples\n        assert counts[None] == variant.num_missing\n        assert ts_fixture.num_samples > variant.num_missing\n        for i, v in enumerate(variant.alleles):\n            if v is not None:\n                assert np.sum(variant.genotypes == i) == counts[v]\n\n    def test_variant_counts_empty(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.sites.add_row(0, ancestral_state=\"💩\")\n        ts = tables.tree_sequence()\n        variant = next(ts.variants())\n        assert len(variant.counts()) == 1\n        assert variant.counts()[\"💩\"] == 0\n\n    def test_variant_simple_frequencies(self):\n        simple_tree = tskit.Tree.generate_balanced(4)\n        simple_ts = simple_tree.tree_sequence\n        tables = simple_ts.dump_tables()\n        tables.sites.add_row(position=0.3, ancestral_state=\"AS0\")\n        tables.sites.add_row(position=0.6, ancestral_state=\"AS1\")\n        tables.mutations.add_row(site=0, derived_state=\"DS0_0\", node=0)\n        tables.mutations.add_row(site=0, derived_state=\"DS0_3\", node=3)\n        tables.mutations.add_row(site=1, derived_state=\"DS1\", node=simple_tree.parent(0))\n        ts = tables.tree_sequence()\n        variant_0 = next(ts.variants())\n        freqs = variant_0.frequencies()\n        assert len(freqs) == 3\n        assert np.allclose(freqs[\"AS0\"], 0.5)\n        assert np.allclose(freqs[\"DS0_0\"], 0.25)\n        assert np.allclose(freqs[\"DS0_3\"], 0.25)\n        variant_1 = next(ts.variants(left=0.5))\n        freqs = variant_1.frequencies()\n        assert len(freqs) == 2\n        assert np.allclose(freqs[\"AS1\"], 0.5)\n        assert np.allclose(freqs[\"DS1\"], 0.5)\n\n    def test_variant_frequencies(self, ts_fixture):\n        variant = next(ts_fixture.variants())\n        assert variant.num_missing > 0\n        freqs = variant.frequencies()\n        assert len(freqs) == len(variant.alleles)\n        assert np.allclose(np.sum(list(freqs.values())), 1)\n        for i, v in enumerate(variant.alleles):\n            if v is None:\n                f = np.sum(variant.genotypes == tskit.NULL) / ts_fixture.num_samples\n            else:\n                f = np.sum(variant.genotypes == i) / ts_fixture.num_samples\n            assert np.allclose(f, freqs[v])\n\n        freqs = variant.frequencies(remove_missing=True)\n        assert len(freqs) == len(variant.alleles) - 1\n        for i, v in enumerate(variant.alleles[:-1]):\n            f = np.sum(variant.genotypes == i) / (\n                ts_fixture.num_samples - variant.num_missing\n            )\n            assert np.allclose(f, freqs[v])\n\n    def test_variant_frequencies_limit_samples(self, ts_fixture):\n        assert ts_fixture.num_samples > 1\n        variant = next(ts_fixture.variants(samples=ts_fixture.samples()[0:1]))\n        assert len(variant.genotypes) == 1\n        allele = variant.alleles[variant.genotypes[0]]\n        freqs = variant.frequencies()\n        assert freqs[allele] == 1\n        # should be one freq of 1 and all the rest zero\n        assert list(freqs.values()).count(0) == len(freqs) - 1\n\n    def test_variant_nonsample_freqs(self):\n        simple_tree = tskit.Tree.generate_balanced(4)\n        nonsample_node_left = simple_tree.parent(0)\n        nonsample_node_right = simple_tree.parent(3)\n        assert nonsample_node_left != nonsample_node_right\n        simple_ts = simple_tree.tree_sequence\n        tables = simple_ts.dump_tables()\n        tables.sites.add_row(position=0, ancestral_state=\"As\")\n        tables.mutations.add_row(site=0, derived_state=\"Ds\", node=nonsample_node_left)\n        ts = tables.tree_sequence()\n        samples = [nonsample_node_left, nonsample_node_right]\n        samples += list(simple_tree.children(nonsample_node_right))\n        variant = next(ts.variants(samples=samples, isolated_as_missing=False))\n        freqs = variant.frequencies()\n        assert np.allclose(freqs[\"Ds\"], 0.25)  # Just nonsample_node_left has the Ds\n        assert np.allclose(freqs[\"As\"], 0.75)\n\n    def test_variant_frequencies_no_samples(self, ts_fixture, caplog):\n        tables = ts_fixture.dump_tables()\n        tables.nodes.flags = np.zeros_like(tables.nodes.flags)\n        ts = tables.tree_sequence()\n        variant = next(ts.variants())\n        assert ts.num_samples == 0\n        with caplog.at_level(logging.WARNING):\n            freqs = variant.frequencies()\n            assert caplog.text.count(\"frequencies undefined\") == 1\n        assert np.all(np.isnan(list(freqs.values())))\n\n    def test_variant_str(self):\n        \"\"\"\n        Test using a simple dummy tree sequence for testing.\n        It has only one tree and one site, whose variant has the alleles\n        ('A', 'T', 'G', '💩', '', 'TAG', None).\n        \"\"\"\n        tables = tskit.TableCollection(10)\n        for _ in np.arange(6):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(position=5, ancestral_state=\"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=1, derived_state=\"G\")\n        tables.mutations.add_row(site=0, node=2, derived_state=\"💩\")\n        tables.mutations.add_row(site=0, node=3, derived_state=\"\")\n        tables.mutations.add_row(site=0, node=4, derived_state=\"TAG\")\n        ts = tables.tree_sequence()\n        v = next(ts.variants())\n        assert v.alleles == (\"A\", \"T\", \"G\", \"💩\", \"\", \"TAG\", None)\n        assert isinstance(str(v), str)\n        assert re.match(\n            textwrap.dedent(\n                r\"\"\"\n                ╔═+╗\n                ║Variant\\s*║\n                ╠═+╤═+╣\n                ║Site id\\s*│\\s*0║\n                ╟─+┼─+╢\n                ║Site position\\s*│\\s*[0-9\\.]+║\n                ╟─+┼─+╢\n                ║Number of samples\\s*│\\s*[0-9]+║\n                ╟─+┼─+╢\n                ║Number of alleles\\s*│\\s*[0-9]+║\n                ╟─+┼─+╢\n                ║Samples with allele \\'A\\'\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Samples with allele \\'T\\'\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Samples with allele \\'G\\'\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Samples with allele \\'💩\\'\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Samples with allele \\'\\'\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Samples with allele \\'TAG\\'\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Samples with allele missing\\s*│\\s*[0-9]+\\s*\\([0-9\\.]+\\%\\)║\n                ╟─+┼─+╢\n                ║Has missing data\\s*│\\s*True║\n                ╟─+┼─+╢\n                ║Isolated as missing\\s*│\\s*True║\n                ╚═+╧═+╝\n                \"\"\"[1:]\n            ),\n            str(v),\n        )\n\n    def test_variant_str_no_samples(self):\n        tables = tskit.TableCollection(10)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(position=5, ancestral_state=\"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        ts = tables.tree_sequence()\n        v = next(ts.variants(samples=[]))\n        for allele in v.alleles:\n            if allele is not None:\n                assert (\n                    re.search(\n                        rf\"║Samples with allele '{allele}'\\s*│\\s*0\\s*\\(nan\\%\\)║\", str(v)\n                    )\n                    is not None\n                )\n\n    def test_variant_str_no_site(self):\n        tables = tskit.TableCollection(10)\n        ts = tables.tree_sequence()\n        v = tskit.Variant(ts)\n        s = str(v)\n        assert len(s.splitlines()) == 5\n        assert (\n            \"This variant has not yet been decoded at a specific site, \"\n            + \"call Variant.decode to set the site\"\n            in s\n        )\n\n    def test_variant_html_repr(self, ts_fixture):\n        v = next(ts_fixture.variants())\n        html = v._repr_html_()\n        # Parse to check valid\n        ElementTree.fromstring(html)\n        assert len(html) > 1900\n\n    def test_variant_html_repr_no_site(self):\n        tables = tskit.TableCollection(10)\n        ts = tables.tree_sequence()\n        v = tskit.Variant(ts)\n        html = v._repr_html_()\n        ElementTree.fromstring(html)\n        assert len(html) > 1600\n\n    def test_variant_repr(self, ts_fixture):\n        v = next(ts_fixture.variants())\n        str_rep = repr(v)\n        assert len(str_rep) > 0 and len(str_rep) < 10000\n        assert re.search(r\"\\AVariant\", str_rep)\n        assert re.search(rf\"\\'site\\': Site\\(id={v.site.id}\", str_rep)\n        assert re.search(rf\"position={v.position}\", str_rep)\n        alleles = re.escape(\"'alleles': \" + str(v.alleles))\n        assert re.search(rf\"{alleles}\", str_rep)\n        assert re.search(r\"\\'genotypes\\': array\\(\\[\", str_rep)\n        assert re.search(rf\"position={v.position}\", str_rep)\n        assert re.search(rf\"\\'has_missing_data\\': {v.has_missing_data}\", str_rep)\n        assert re.search(rf\"\\'isolated_as_missing\\': {v.isolated_as_missing}\", str_rep)\n"
  },
  {
    "path": "python/tests/test_haplotype_matching.py",
    "content": "# MIT License\n#\n# Copyright (c) 2019-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nPython implementation of the Li and Stephens forwards and backwards algorithms.\n\"\"\"\n\nimport warnings\n\nimport lshmm as ls\nimport msprime\nimport numpy as np\nimport numpy.testing as nt\nimport pytest\n\nimport _tskit\nimport tskit\nfrom tests import tsutil\n\nMISSING = -1\n\n\ndef check_alleles(alleles, m):\n    \"\"\"\n    Checks the specified allele list and returns a list of lists\n    of alleles of length num_sites.\n    If alleles is a 1D list of strings, assume that this list is used\n    for each site and return num_sites copies of this list.\n    Otherwise, raise a ValueError if alleles is not a list of length\n    num_sites.\n    \"\"\"\n    if isinstance(alleles[0], str):\n        return [alleles for _ in range(m)], np.int8([len(alleles) for _ in range(m)])\n    if len(alleles) != m:\n        raise ValueError(\"Malformed alleles list\")\n    n_alleles = np.int8([(len(alleles_site)) for alleles_site in alleles])\n    return alleles, n_alleles\n\n\ndef mirror_coordinates(ts):\n    \"\"\"\n    Returns a copy of the specified tree sequence in which all\n    coordinates x are transformed into L - x.\n    \"\"\"\n    L = ts.sequence_length\n    tables = ts.dump_tables()\n    left = tables.edges.left\n    right = tables.edges.right\n    tables.edges.left = L - right\n    tables.edges.right = L - left\n    tables.sites.position = L - tables.sites.position  # + 1\n    # TODO migrations.\n    tables.sort()\n    return tables.tree_sequence()\n\n\nclass ValueTransition:\n    \"\"\"Simple struct holding value transition values.\"\"\"\n\n    def __init__(self, tree_node=-1, value=-1, value_index=-1):\n        self.tree_node = tree_node\n        self.value = value\n        self.value_index = value_index\n\n    def copy(self):\n        return ValueTransition(\n            self.tree_node,\n            self.value,\n            self.value_index,\n        )\n\n    def __repr__(self):\n        return repr(self.__dict__)\n\n    def __str__(self):\n        return repr(self)\n\n\nclass LsHmmAlgorithm:\n    \"\"\"\n    Abstract superclass of Li and Stephens HMM algorithm.\n    \"\"\"\n\n    def __init__(\n        self, ts, rho, mu, alleles, n_alleles, precision=10, scale_mutation=False\n    ):\n        self.ts = ts\n        self.mu = mu\n        self.rho = rho\n        self.precision = precision\n        # The array of ValueTransitions.\n        self.T = []\n        # indexes in to the T array for each node.\n        self.T_index = np.zeros(ts.num_nodes, dtype=int) - 1\n        # The number of nodes underneath each element in the T array.\n        self.N = np.zeros(ts.num_nodes, dtype=int)\n        # Efficiently compute the allelic state at a site\n        self.allelic_state = np.zeros(ts.num_nodes, dtype=int) - 1\n        # TreeIndexes so we can can update T and T_index between trees.\n        self.tree_pos = tsutil.TreeIndexes(ts)\n        self.parent = np.zeros(self.ts.num_nodes, dtype=int) - 1\n        self.tree = tskit.Tree(self.ts)\n        self.output = None\n        # Vector of the number of alleles at each site\n        self.n_alleles = n_alleles\n        self.alleles = alleles\n        self.scale_mutation_based_on_n_alleles = scale_mutation\n\n    def check_integrity(self):\n        M = [st.tree_node for st in self.T if st.tree_node != -1]\n        assert np.all(self.T_index[M] >= 0)\n        index = np.ones_like(self.T_index, dtype=bool)\n        index[M] = 0\n        assert np.all(self.T_index[index] == -1)\n        for j, st in enumerate(self.T):\n            if st.tree_node != -1:\n                assert j == self.T_index[st.tree_node]\n\n    def compress(self):\n        tree = self.tree\n        T = self.T\n        T_index = self.T_index\n\n        values = np.unique(list(st.value if st.tree_node != -1 else 1e200 for st in T))\n        for st in T:\n            if st.tree_node != -1:\n                st.value_index = np.searchsorted(values, st.value)\n\n        child = np.zeros(len(values), dtype=int)\n        num_values = len(values)\n        value_count = np.zeros(num_values, dtype=int)\n\n        def compute(u, parent_state):\n            value_count[:] = 0\n            for v in tree.children(u):\n                child[:] = optimal_set[v]\n                # If the set for a given child is empty, then we know it inherits\n                # directly from the parent state and must be a singleton set.\n                if np.sum(child) == 0:\n                    child[parent_state] = 1\n                for j in range(num_values):\n                    value_count[j] += child[j]\n            max_value_count = np.max(value_count)\n            optimal_set[u, :] = 0\n            optimal_set[u, value_count == max_value_count] = 1\n\n        optimal_set = np.zeros((tree.tree_sequence.num_nodes, len(values)), dtype=int)\n        t_node_time = [-1 if st.tree_node == -1 else tree.time(st.tree_node) for st in T]\n        order = np.argsort(t_node_time)\n        for j in order:\n            st = T[j]\n            u = st.tree_node\n            if u != -1:\n                # Compute the value at this node\n                state = st.value_index\n                if tree.is_internal(u):\n                    compute(u, state)\n                else:\n                    # A[u, state] = 1\n                    optimal_set[u, state] = 1\n                # Find parent state\n                v = tree.parent(u)\n                if v != -1:\n                    while T_index[v] == -1:\n                        v = tree.parent(v)\n                    parent_state = T[T_index[v]].value_index\n                    v = tree.parent(u)\n                    while T_index[v] == -1:\n                        compute(v, parent_state)\n                        v = tree.parent(v)\n\n        T_old = [st.copy() for st in T]\n        T.clear()\n        T_parent = []\n\n        old_state = T_old[T_index[tree.root]].value_index\n        new_state = np.argmax(optimal_set[tree.root])\n\n        T.append(ValueTransition(tree_node=tree.root, value=values[new_state]))\n        T_parent.append(-1)\n        stack = [(tree.root, old_state, new_state, 0)]\n        while len(stack) > 0:\n            u, old_state, new_state, t_parent = stack.pop()\n            for v in tree.children(u):\n                old_child_state = old_state\n                if T_index[v] != -1:\n                    old_child_state = T_old[T_index[v]].value_index\n                if np.sum(optimal_set[v]) > 0:\n                    new_child_state = new_state\n                    child_t_parent = t_parent\n\n                    if optimal_set[v, new_state] == 0:\n                        new_child_state = np.argmax(optimal_set[v])\n                        child_t_parent = len(T)\n                        T_parent.append(t_parent)\n                        T.append(\n                            ValueTransition(tree_node=v, value=values[new_child_state])\n                        )\n                    stack.append((v, old_child_state, new_child_state, child_t_parent))\n                else:\n                    if old_child_state != new_state:\n                        T_parent.append(t_parent)\n                        T.append(\n                            ValueTransition(tree_node=v, value=values[old_child_state])\n                        )\n\n        for st in T_old:\n            if st.tree_node != -1:\n                T_index[st.tree_node] = -1\n        for j, st in enumerate(T):\n            T_index[st.tree_node] = j\n            self.N[j] = tree.num_samples(st.tree_node)\n        for j in range(len(T)):\n            if T_parent[j] != -1:\n                self.N[T_parent[j]] -= self.N[j]\n\n    def update_tree(self, direction=tskit.FORWARD):\n        \"\"\"\n        Update the internal data structures to move on to the next tree.\n        \"\"\"\n        parent = self.parent\n        T_index = self.T_index\n        T = self.T\n        if direction == tskit.FORWARD:\n            self.tree_pos.next()\n        else:\n            self.tree_pos.prev()\n        assert self.tree_pos.index == self.tree.index\n\n        for j in range(\n            self.tree_pos.out_range.start, self.tree_pos.out_range.stop, direction\n        ):\n            e = self.tree_pos.out_range.order[j]\n            edge = self.ts.edge(e)\n            u = edge.child\n            if T_index[u] == -1:\n                # Make sure the subtree we're detaching has an T_index-value at the root.\n                while T_index[u] == -1:\n                    u = parent[u]\n                    assert u != -1\n                T_index[edge.child] = len(T)\n                T.append(\n                    ValueTransition(tree_node=edge.child, value=T[T_index[u]].value)\n                )\n            parent[edge.child] = -1\n\n        for j in range(\n            self.tree_pos.in_range.start, self.tree_pos.in_range.stop, direction\n        ):\n            e = self.tree_pos.in_range.order[j]\n            edge = self.ts.edge(e)\n            parent[edge.child] = edge.parent\n            u = edge.parent\n            if parent[edge.parent] == -1:\n                # Grafting onto a new root.\n                if T_index[edge.parent] == -1:\n                    T_index[edge.parent] = len(T)\n                    T.append(\n                        ValueTransition(\n                            tree_node=edge.parent, value=T[T_index[edge.child]].value\n                        )\n                    )\n            else:\n                # Grafting into an existing subtree.\n                while T_index[u] == -1:\n                    u = parent[u]\n                    assert u != -1\n            assert T_index[u] != -1 and T_index[edge.child] != -1\n            if T[T_index[u]].value == T[T_index[edge.child]].value:\n                st = T[T_index[edge.child]]\n                # Mark the lower ValueTransition as unused.\n                st.value = -1\n                st.tree_node = -1\n                T_index[edge.child] = -1\n\n        # We can have values left over still pointing to old roots. Remove\n        for root in self.tree.roots:\n            if T_index[root] != -1:\n                # Use a special marker here to designate the real roots.\n                T[T_index[root]].value_index = -2\n        for vt in T:\n            if vt.tree_node != -1:\n                if parent[vt.tree_node] == -1 and vt.value_index != -2:\n                    T_index[vt.tree_node] = -1\n                    vt.tree_node = -1\n                vt.value_index = -1\n\n    def update_probabilities(self, site, haplotype_state):\n        tree = self.tree\n        T_index = self.T_index\n        T = self.T\n        alleles = self.alleles[site.id]\n        allelic_state = self.allelic_state\n        # Set the allelic_state for this site.\n        allelic_state[tree.root] = alleles.index(site.ancestral_state)\n\n        for mutation in site.mutations:\n            u = mutation.node\n            allelic_state[u] = alleles.index(mutation.derived_state)\n            if T_index[u] == -1:\n                while T_index[u] == tskit.NULL:\n                    u = tree.parent(u)\n                T_index[mutation.node] = len(T)\n                T.append(\n                    ValueTransition(tree_node=mutation.node, value=T[T_index[u]].value)\n                )\n\n        for st in T:\n            u = st.tree_node\n            if u != -1:\n                # Get the allelic_state at u. TODO we can cache these states to\n                # avoid some upward traversals.\n                v = u\n                while allelic_state[v] == -1:\n                    v = tree.parent(v)\n                    assert v != -1\n                match = haplotype_state == allelic_state[v]\n                is_query_missing = haplotype_state == MISSING\n                # Note that the node u is used only by Viterbi\n                st.value = self.compute_next_probability(\n                    site.id, st.value, match, is_query_missing, u\n                )\n\n        # Unset the states\n        allelic_state[tree.root] = -1\n        for mutation in site.mutations:\n            allelic_state[mutation.node] = -1\n\n    def process_site(self, site, haplotype_state):\n        self.update_probabilities(site, haplotype_state)\n        self.compress()\n        s = self.compute_normalisation_factor()\n        for st in self.T:\n            assert st.tree_node != tskit.NULL\n            # if st.tree_node != tskit.NULL:\n            st.value /= s\n            st.value = round(st.value, self.precision)\n        self.output.store_site(site.id, s, [(st.tree_node, st.value) for st in self.T])\n\n    def compute_emission_proba(self, site_id, is_match):\n        mu = self.mu[site_id]\n        n_alleles = self.n_alleles[site_id]\n        if self.scale_mutation_based_on_n_alleles:\n            if is_match:\n                # Scale mutation based on the number of alleles\n                # - so the mutation rate is the mutation rate to one of the\n                # alleles. The overall mutation rate is then\n                # (n_alleles - 1) * mutation_rate.\n                p_e = 1 - (n_alleles - 1) * mu\n            else:\n                p_e = mu - mu * (n_alleles == 1)\n                # Added boolean in case we're at an invariant site\n        else:\n            # No scaling based on the number of alleles\n            #  - so the mutation rate is the mutation rate to anything.\n            # This means that we must rescale the mutation rate to a different\n            # allele, by the number of alleles.\n            if n_alleles == 1:  # In case we're at an invariant site\n                if is_match:\n                    p_e = 1\n                else:\n                    p_e = 0\n            else:\n                if is_match:\n                    p_e = 1 - mu\n                else:\n                    p_e = mu / (n_alleles - 1)\n        return p_e\n\n    def initialise(self, value):\n        self.tree.clear()\n        for u in self.ts.samples():\n            j = len(self.T)\n            self.T_index[u] = j\n            self.T.append(ValueTransition(tree_node=u, value=value))\n\n    def run(self, h):\n        n = self.ts.num_samples\n        self.initialise(1 / n)\n        while self.tree.next():\n            self.update_tree()\n            for site in self.tree.sites():\n                self.process_site(site, h[site.id])\n        return self.output\n\n    def compute_normalisation_factor(self):\n        raise NotImplementedError()\n\n    def compute_next_probability(\n        self, site_id, p_last, is_match, is_query_missing, node\n    ):\n        raise NotImplementedError()\n\n\nclass ForwardAlgorithm(LsHmmAlgorithm):\n    \"\"\"\n    The Li and Stephens forward algorithm.\n    \"\"\"\n\n    def __init__(\n        self, ts, rho, mu, alleles, n_alleles, scale_mutation=False, precision=10\n    ):\n        super().__init__(\n            ts,\n            rho,\n            mu,\n            alleles,\n            n_alleles,\n            precision=precision,\n            scale_mutation=scale_mutation,\n        )\n        self.output = CompressedMatrix(ts)\n\n    def compute_normalisation_factor(self):\n        s = 0\n        for j, st in enumerate(self.T):\n            assert st.tree_node != tskit.NULL\n            # assert self.N[j] > 0\n            s += self.N[j] * st.value\n        return s\n\n    def compute_next_probability(\n        self, site_id, p_last, is_match, is_query_missing, node\n    ):\n        rho = self.rho[site_id]\n        n = self.ts.num_samples\n        if is_query_missing:\n            p_e = 1.0\n        else:\n            p_e = self.compute_emission_proba(site_id, is_match)\n        p_t = p_last * (1 - rho) + rho / n\n        return p_t * p_e\n\n\nclass BackwardAlgorithm(ForwardAlgorithm):\n    \"\"\"\n    The Li and Stephens backward algorithm.\n    \"\"\"\n\n    def compute_next_probability(\n        self, site_id, p_next, is_match, is_query_missing, node\n    ):\n        if is_query_missing:\n            p_e = 1.0\n        else:\n            p_e = self.compute_emission_proba(site_id, is_match)\n        return p_next * p_e\n\n    def process_site(self, site, haplotype_state, s):\n        # FIXME see nodes in the C code for why we have two calls to\n        # compress\n        # https://github.com/tskit-dev/tskit/issues/2803\n        self.compress()\n        self.output.store_site(\n            site.id,\n            s,\n            [(st.tree_node, st.value) for st in self.T],\n        )\n        self.update_probabilities(site, haplotype_state)\n        # FIXME see nodes in the C code for why we have two calls to\n        # compress\n        self.compress()\n        b_last_sum = self.compute_normalisation_factor()\n        n = self.ts.num_samples\n        rho = self.rho[site.id]\n        for st in self.T:\n            if st.tree_node != tskit.NULL:\n                st.value = rho * b_last_sum / n + (1 - rho) * st.value\n                st.value /= s\n                st.value = round(st.value, self.precision)\n\n    def run(self, h, normalisation_factor):\n        self.initialise(value=1)\n        while self.tree.prev():\n            self.update_tree(direction=tskit.REVERSE)\n            for site in reversed(list(self.tree.sites())):\n                self.process_site(site, h[site.id], normalisation_factor[site.id])\n        return self.output\n\n\nclass ViterbiAlgorithm(LsHmmAlgorithm):\n    \"\"\"\n    Runs the Li and Stephens Viterbi algorithm.\n    \"\"\"\n\n    def __init__(\n        self, ts, rho, mu, alleles, n_alleles, scale_mutation=False, precision=10\n    ):\n        super().__init__(\n            ts,\n            rho,\n            mu,\n            alleles,\n            n_alleles,\n            precision=precision,\n            scale_mutation=scale_mutation,\n        )\n        self.output = ViterbiMatrix(ts)\n\n    def compute_normalisation_factor(self):\n        max_st = ValueTransition(value=-1)\n        for st in self.T:\n            assert st.tree_node != tskit.NULL\n            if st.value > max_st.value:\n                max_st = st\n        if max_st.value == 0:\n            raise ValueError(\n                \"Trying to match non-existent allele with zero mutation rate\"\n            )\n        return max_st.value\n\n    def compute_next_probability(\n        self, site_id, p_last, is_match, is_query_missing, node\n    ):\n        rho = self.rho[site_id]\n        n = self.ts.num_samples\n\n        p_no_recomb = p_last * (1 - rho + rho / n)\n        p_recomb = rho / n\n        recombination_required = False\n        if p_no_recomb > p_recomb:\n            p_t = p_no_recomb\n        else:\n            p_t = p_recomb\n            recombination_required = True\n        self.output.add_recombination_required(site_id, node, recombination_required)\n\n        if is_query_missing:\n            p_e = 1.0\n        else:\n            p_e = self.compute_emission_proba(site_id, is_match)\n\n        return p_t * p_e\n\n\ndef assert_compressed_matrices_equal(cm1, cm2):\n    nt.assert_array_almost_equal(cm1.normalisation_factor, cm2.normalisation_factor)\n\n    for j in range(cm1.num_sites):\n        site1 = cm1.get_site(j)\n        site2 = cm2.get_site(j)\n        assert len(site1) == len(site2)\n        site1 = dict(site1)\n        site2 = dict(site2)\n\n        assert set(site1.keys()) == set(site2.keys())\n        for node in site1.keys():\n            # TODO  the precision value should be used as a parameter here\n            nt.assert_allclose(site1[node], site2[node], rtol=1e-5, atol=1e-8)\n\n\nclass CompressedMatrix:\n    \"\"\"\n    Class representing a num_samples x num_sites matrix compressed by a\n    tree sequence. Each site is represented by a set of (node, value)\n    pairs, which act as \"mutations\", i.e., any sample that descends\n    from a particular node will inherit that value (unless any other\n    values are on the path).\n    \"\"\"\n\n    def __init__(self, ts):\n        self.ts = ts\n        self.num_sites = ts.num_sites\n        self.num_samples = ts.num_samples\n        self.value_transitions = [None for _ in range(self.num_sites)]\n        self.normalisation_factor = np.zeros(self.num_sites)\n\n    def store_site(self, site, normalisation_factor, value_transitions):\n        assert all(u >= 0 for u, _ in value_transitions)\n        self.normalisation_factor[site] = normalisation_factor\n        self.value_transitions[site] = value_transitions\n\n    # Expose the same API as the low-level classes\n\n    @property\n    def num_transitions(self):\n        a = [len(self.value_transitions[j]) for j in range(self.num_sites)]\n        return np.array(a, dtype=np.int32)\n\n    def get_site(self, site):\n        return self.value_transitions[site]\n\n    def decode(self):\n        \"\"\"\n        Decodes the tree encoding of the values into an explicit\n        matrix.\n        \"\"\"\n        sample_index_map = np.zeros(self.ts.num_nodes, dtype=int) - 1\n        sample_index_map[self.ts.samples()] = np.arange(self.ts.num_samples)\n        A = np.zeros((self.num_sites, self.num_samples))\n        for tree in self.ts.trees():\n            for site in tree.sites():\n                for node, value in self.value_transitions[site.id]:\n                    for u in tree.samples(node):\n                        j = sample_index_map[u]\n                        A[site.id, j] = value\n        return A\n\n\nclass ViterbiMatrix(CompressedMatrix):\n    \"\"\"\n    Class representing the compressed Viterbi matrix.\n    \"\"\"\n\n    def __init__(self, ts):\n        super().__init__(ts)\n        # Tuple containing the site, the node in the tree, and whether\n        # recombination is required\n        self.recombination_required = [(-1, 0, False)]\n\n    def add_recombination_required(self, site, node, required):\n        self.recombination_required.append((site, node, required))\n\n    def choose_sample(self, site_id, tree):\n        max_value = -1\n        u = -1\n        for node, value in self.value_transitions[site_id]:\n            if value > max_value:\n                max_value = value\n                u = node\n        assert u != -1\n\n        transition_nodes = [u for (u, _) in self.value_transitions[site_id]]\n        while not tree.is_sample(u):\n            for v in tree.children(u):\n                if v not in transition_nodes:\n                    u = v\n                    break\n            else:\n                raise AssertionError(\"could not find path\")\n        return u\n\n    def traceback(self):\n        # Run the traceback.\n        m = self.ts.num_sites\n        match = np.zeros(m, dtype=int)\n        recombination_tree = np.zeros(self.ts.num_nodes, dtype=int) - 1\n        tree = tskit.Tree(self.ts)\n        tree.last()\n        current_node = -1\n\n        rr_index = len(self.recombination_required) - 1\n        for site in reversed(self.ts.sites()):\n            while tree.interval.left > site.position:\n                tree.prev()\n            assert tree.interval.left <= site.position < tree.interval.right\n\n            # Fill in the recombination tree\n            j = rr_index\n            while self.recombination_required[j][0] == site.id:\n                u, required = self.recombination_required[j][1:]\n                recombination_tree[u] = required\n                j -= 1\n\n            if current_node == -1:\n                current_node = self.choose_sample(site.id, tree)\n            match[site.id] = current_node\n\n            # Now traverse up the tree from the current node. The first marked node\n            # we meet tells us whether we need to recombine.\n            u = current_node\n            while u != -1 and recombination_tree[u] == -1:\n                u = tree.parent(u)\n\n            assert u != -1\n            if recombination_tree[u] == 1:\n                # Need to switch at the next site.\n                current_node = -1\n            # Reset the nodes in the recombination tree.\n            j = rr_index\n            while self.recombination_required[j][0] == site.id:\n                u, required = self.recombination_required[j][1:]\n                recombination_tree[u] = -1\n                j -= 1\n            rr_index = j\n\n        return match\n\n\ndef get_site_alleles(ts, h, alleles):\n    if alleles is None:\n        n_alleles = np.zeros(ts.num_sites, dtype=np.int8) - 1\n        for j in range(ts.num_sites):\n            uniq_alleles = np.unique(np.append(ts.genotype_matrix()[j, :], h[j]))\n            uniq_alleles = uniq_alleles[uniq_alleles != MISSING]\n            n_alleles[j] = len(uniq_alleles)\n        assert np.all(n_alleles > 0)\n        alleles = tskit.ALLELES_ACGT\n        if len(set(alleles).intersection(next(ts.variants()).alleles)) == 0:\n            alleles = tskit.ALLELES_01\n            if len(set(alleles).intersection(next(ts.variants()).alleles)) == 0:\n                raise ValueError(\n                    \"\"\"Alleles list could not be identified.\n                    Please pass a list of lists of alleles of length m,\n                    or a list of alleles (e.g. tskit.ALLELES_ACGT)\"\"\"\n                )\n        alleles = [alleles for _ in range(ts.num_sites)]\n    else:\n        alleles, n_alleles = check_alleles(alleles, ts.num_sites)\n    return alleles, n_alleles\n\n\ndef ls_forward_tree(\n    h, ts, rho, mu, precision=30, alleles=None, scale_mutation_based_on_n_alleles=False\n):\n    alleles, n_alleles = get_site_alleles(ts, h, alleles)\n    fa = ForwardAlgorithm(\n        ts,\n        rho,\n        mu,\n        alleles,\n        n_alleles,\n        precision=precision,\n        scale_mutation=scale_mutation_based_on_n_alleles,\n    )\n    return fa.run(h)\n\n\ndef ls_backward_tree(h, ts, rho, mu, normalisation_factor, precision=30, alleles=None):\n    alleles, n_alleles = get_site_alleles(ts, h, alleles)\n    ba = BackwardAlgorithm(\n        ts,\n        rho,\n        mu,\n        alleles,\n        n_alleles,\n        precision=precision,\n    )\n    return ba.run(h, normalisation_factor)\n\n\ndef ls_viterbi_tree(\n    h, ts, rho, mu, precision=30, alleles=None, scale_mutation_based_on_n_alleles=False\n):\n    alleles, n_alleles = get_site_alleles(ts, h, alleles)\n    va = ViterbiAlgorithm(\n        ts,\n        rho,\n        mu,\n        alleles,\n        n_alleles,\n        precision=precision,\n        scale_mutation=scale_mutation_based_on_n_alleles,\n    )\n    return va.run(h)\n\n\nclass LSBase:\n    \"\"\"Superclass of Li and Stephens tests.\"\"\"\n\n    def example_haplotypes(self, ts):\n        H = ts.genotype_matrix()\n        s = H[:, 0].reshape(1, H.shape[0])\n        H = H[:, 1:]\n\n        haplotypes = [\n            s,\n            H[:, -1].reshape(1, H.shape[0]),\n        ]\n        s_tmp = s.copy()\n        s_tmp[0, -1] = MISSING\n        haplotypes.append(s_tmp)\n        s_tmp = s.copy()\n        s_tmp[0, ts.num_sites // 2] = MISSING\n        haplotypes.append(s_tmp)\n        s_tmp = s.copy()\n        s_tmp[0, :] = MISSING\n        haplotypes.append(s_tmp)\n\n        return H, haplotypes\n\n    def example_parameters_haplotypes(self, ts, seed=42):\n        \"\"\"Returns an iterator over combinations of haplotype,\n        recombination and mutation rates.\"\"\"\n        np.random.seed(seed)\n        H, haplotypes = self.example_haplotypes(ts)\n        n = H.shape[1]\n        m = ts.get_num_sites()\n\n        # Here we have equal mutation and recombination\n        r = np.zeros(m) + 0.01\n        mu = np.zeros(m) + 0.01\n        r[0] = 0\n\n        for s in haplotypes:\n            yield n, H, s, r, mu\n\n        # FIXME removing these as tests are abominably slow.\n        # We'll be refactoring all this to use pytest anyway, so let's not\n        # worry too much about coverage for now.\n        # # Mixture of random and extremes\n        # rs = [np.zeros(m) + 0.999, np.zeros(m) + 1e-6, np.random.rand(m)]\n        # mus = [np.zeros(m) + 0.33, np.zeros(m) + 1e-6, np.random.rand(m) * 0.33]\n\n        # import itertools\n        # for s, r, mu in itertools.product(haplotypes, rs, mus):\n        #     r[0] = 0\n        #     yield n, H, s, r, mu\n\n    def assertAllClose(self, A, B):\n        \"\"\"Assert that all entries of two matrices are 'close'\"\"\"\n        assert np.allclose(A, B, rtol=1e-5, atol=1e-8)\n\n    # Define a bunch of very small tree-sequences for testing a collection\n    # of parameters on\n    def test_simple_n_10_no_recombination(self):\n        ts = msprime.simulate(\n            10, recombination_rate=0, mutation_rate=0.5, random_seed=42\n        )\n        assert ts.num_sites > 3\n        self.verify(ts)\n\n    def test_simple_n_10_no_recombination_high_mut(self):\n        ts = msprime.simulate(10, recombination_rate=0, mutation_rate=3, random_seed=42)\n        assert ts.num_sites > 3\n        self.verify(ts)\n\n    def test_simple_n_10_no_recombination_higher_mut(self):\n        ts = msprime.simulate(20, recombination_rate=0, mutation_rate=3, random_seed=42)\n        assert ts.num_sites > 3\n        self.verify(ts)\n\n    def test_simple_n_6(self):\n        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=7, random_seed=42)\n        assert ts.num_sites > 5\n        self.verify(ts)\n\n    def test_simple_n_8(self):\n        ts = msprime.simulate(8, recombination_rate=2, mutation_rate=5, random_seed=42)\n        assert ts.num_sites > 5\n        self.verify(ts)\n\n    def test_simple_n_8_high_recombination(self):\n        ts = msprime.simulate(8, recombination_rate=20, mutation_rate=5, random_seed=42)\n        assert ts.num_trees > 15\n        assert ts.num_sites > 5\n        self.verify(ts)\n\n    def test_simple_n_16(self):\n        ts = msprime.simulate(16, recombination_rate=2, mutation_rate=5, random_seed=42)\n        assert ts.num_sites > 5\n        self.verify(ts)\n\n    # # Define a bunch of very small tree-sequences for testing a collection\n    # # of parameters on\n    # def test_simple_n_10_no_recombination_blah(self):\n    #     ts = msprime.sim_ancestry(\n    #         samples=10,\n    #         recombination_rate=0,\n    #         random_seed=42,\n    #         sequence_length=10,\n    #         population_size=10000,\n    #     )\n    #     ts = msprime.sim_mutations(ts, rate=1e-5, random_seed=42)\n    #     assert ts.num_sites > 3\n    #     self.verify(ts)\n\n    # def test_simple_n_6_blah(self):\n    # ts = msprime.sim_ancestry(\n    #     samples=6,\n    #     recombination_rate=1e-4,\n    #     random_seed=42,\n    #     sequence_length=40,\n    #     population_size=10000,\n    # )\n    # ts = msprime.sim_mutations(ts, rate=1e-3, random_seed=42)\n    #     assert ts.num_sites > 5\n    #     self.verify(ts)\n\n    # def test_simple_n_8_blah(self):\n    #     ts = msprime.sim_ancestry(\n    #         samples=8,\n    #         recombination_rate=1e-4,\n    #         random_seed=42,\n    #         sequence_length=20,\n    #         population_size=10000,\n    #     )\n    #     ts = msprime.sim_mutations(ts, rate=1e-4, random_seed=42)\n    #     assert ts.num_sites > 5\n    #     assert ts.num_trees > 15\n    #     self.verify(ts)\n\n    # def test_simple_n_16_blah(self):\n    #     ts = msprime.sim_ancestry(\n    #         samples=16,\n    #         recombination_rate=1e-2,\n    #         random_seed=42,\n    #         sequence_length=20,\n    #         population_size=10000,\n    #     )\n    #     ts = msprime.sim_mutations(ts, rate=1e-4, random_seed=42)\n    #     assert ts.num_sites > 5\n    #     self.verify(ts)\n\n    def verify(self, ts):\n        raise NotImplementedError()\n\n\nclass FBAlgorithmBase(LSBase):\n    \"\"\"Base for forwards backwards algorithm tests.\"\"\"\n\n\nclass VitAlgorithmBase(LSBase):\n    \"\"\"Base for viterbi algoritm tests.\"\"\"\n\n\nclass TestMirroringHap(FBAlgorithmBase):\n    \"\"\"Tests that mirroring the tree sequence and running forwards and backwards\n    algorithms gives the same log-likelihood of observing the data.\"\"\"\n\n    def verify(self, ts):\n        for n, H, s, r, mu in self.example_parameters_haplotypes(ts):\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check = ts.simplify(range(1, n + 1), filter_sites=False)\n            cm = ls_forward_tree(s[0, :], ts_check, r, mu)\n            ll_tree = np.sum(np.log10(cm.normalisation_factor))\n\n            ts_check_mirror = mirror_coordinates(ts_check)\n            r_flip = np.insert(np.flip(r)[:-1], 0, 0)\n            cm_mirror = ls_forward_tree(\n                np.flip(s[0, :]), ts_check_mirror, r_flip, np.flip(mu)\n            )\n            ll_mirror_tree = np.sum(np.log10(cm_mirror.normalisation_factor))\n            self.assertAllClose(ll_tree, ll_mirror_tree)\n\n            # Ensure that the decoded matrices are the same\n            flipped_H = np.flip(H, axis=0)\n            flipped_s = np.flip(s, axis=1)\n            F_mirror_matrix, c, ll = ls.forwards(\n                reference_panel=flipped_H,\n                query=flipped_s,\n                ploidy=1,\n                prob_recombination=r_flip,\n                prob_mutation=np.flip(mu),\n                scale_mutation_rate=False,\n            )\n\n            self.assertAllClose(F_mirror_matrix, cm_mirror.decode())\n            self.assertAllClose(ll, ll_tree)\n\n\nclass TestForwardHapTree(FBAlgorithmBase):\n    \"\"\"Tests that the tree algorithm computes the same forward matrix as the\n    simple method.\"\"\"\n\n    def verify(self, ts):\n        for n, H, s, r, mu in self.example_parameters_haplotypes(ts):\n            for scale_mutation in [False, True]:\n                with warnings.catch_warnings():\n                    warnings.simplefilter(\"ignore\")\n                    # Warning from lshmm:\n                    # Passed a vector of mutation rates, but rescaling each mutation\n                    # rate conditional on the number of alleles\n                    F, c, ll = ls.forwards(\n                        reference_panel=H,\n                        query=s,\n                        ploidy=1,\n                        prob_recombination=r,\n                        prob_mutation=mu,\n                        scale_mutation_rate=scale_mutation,\n                    )\n                # Note, need to remove the first sample from the ts, and ensure\n                # that invariant sites aren't removed.\n                ts_check = ts.simplify(range(1, n + 1), filter_sites=False)\n                cm = ls_forward_tree(\n                    s[0, :],\n                    ts_check,\n                    r,\n                    mu,\n                    scale_mutation_based_on_n_alleles=scale_mutation,\n                )\n                self.assertAllClose(cm.decode(), F)\n                ll_tree = np.sum(np.log10(cm.normalisation_factor))\n                self.assertAllClose(ll, ll_tree)\n\n\nclass TestForwardBackwardTree(FBAlgorithmBase):\n    \"\"\"Tests that the tree algorithm computes the same forward matrix as the\n    simple method.\"\"\"\n\n    def verify(self, ts):\n        for n, H, s, r, mu in self.example_parameters_haplotypes(ts):\n            F, c, ll = ls.forwards(\n                reference_panel=H,\n                query=s,\n                ploidy=1,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n            B = ls.backwards(\n                reference_panel=H,\n                query=s,\n                ploidy=1,\n                normalisation_factor_from_forward=c,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n\n            # Note, need to remove the first sample from the ts, and ensure that\n            # invariant sites aren't removed.\n            ts_check = ts.simplify(range(1, n + 1), filter_sites=False)\n            c_f = ls_forward_tree(s[0, :], ts_check, r, mu)\n            ll_tree = np.sum(np.log10(c_f.normalisation_factor))\n\n            c_b = ls_backward_tree(\n                s[0, :],\n                ts_check,\n                r,\n                mu,\n                c_f.normalisation_factor,\n            )\n            B_tree = c_b.decode()\n\n            F_tree = c_f.decode()\n\n            self.assertAllClose(B, B_tree)\n            self.assertAllClose(F, F_tree)\n            self.assertAllClose(ll, ll_tree)\n\n\nclass TestTreeViterbiHap(VitAlgorithmBase):\n    \"\"\"Test that we have the same log-likelihood between tree and matrix\n    implementations\"\"\"\n\n    def verify(self, ts):\n        for n, H, s, r, mu in self.example_parameters_haplotypes(ts):\n            path, ll = ls.viterbi(\n                reference_panel=H,\n                query=s,\n                ploidy=1,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n            ts_check = ts.simplify(range(1, n + 1), filter_sites=False)\n            cm = ls_viterbi_tree(s[0, :], ts_check, r, mu)\n            ll_tree = np.sum(np.log10(cm.normalisation_factor))\n            self.assertAllClose(ll, ll_tree)\n\n            # Now, need to ensure that the likelihood of the preferred path is\n            # the same as ll_tree (and ll).\n            path_tree = cm.traceback()\n            ll_check = ls.path_loglik(\n                reference_panel=H,\n                query=s,\n                ploidy=1,\n                path=path_tree,\n                prob_recombination=r,\n                prob_mutation=mu,\n                scale_mutation_rate=False,\n            )\n            self.assertAllClose(ll, ll_check)\n\n\n# TODO add params to run the various checks\ndef check_viterbi(ts, h, recombination=None, mutation=None):\n    h = np.array(h).astype(np.int8)\n    m = ts.num_sites\n    assert len(h) == m\n    if recombination is None:\n        recombination = np.zeros(ts.num_sites) + 1e-9\n    if mutation is None:\n        mutation = np.zeros(ts.num_sites)\n    precision = 22\n\n    G = ts.genotype_matrix()\n    s = h.reshape(1, m)\n\n    path, ll = ls.viterbi(\n        reference_panel=G,\n        query=s,\n        ploidy=1,\n        prob_recombination=recombination,\n        prob_mutation=mutation,\n        scale_mutation_rate=False,\n    )\n    assert np.isscalar(ll)\n\n    cm = ls_viterbi_tree(h, ts, rho=recombination, mu=mutation)\n    ll_tree = np.sum(np.log10(cm.normalisation_factor))\n    assert np.isscalar(ll_tree)\n    nt.assert_allclose(ll_tree, ll)\n\n    # Check that the likelihood of the preferred path is\n    # the same as ll_tree (and ll).\n    path_tree = cm.traceback()\n    ll_check = ls.path_loglik(\n        reference_panel=G,\n        query=s,\n        ploidy=1,\n        path=path_tree,\n        prob_recombination=recombination,\n        prob_mutation=mutation,\n        scale_mutation_rate=False,\n    )\n    nt.assert_allclose(ll_check, ll)\n\n    ll_ts = ts._ll_tree_sequence\n    ls_hmm = _tskit.LsHmm(ll_ts, recombination, mutation, precision=precision)\n    cm_lib = _tskit.ViterbiMatrix(ll_ts)\n    ls_hmm.viterbi_matrix(h, cm_lib)\n    path_lib = cm_lib.traceback()\n\n    # Not true in general, but let's see how far it goes\n    nt.assert_array_equal(path_lib, path_tree)\n\n    nt.assert_allclose(cm_lib.normalisation_factor, cm.normalisation_factor)\n\n    return path\n\n\n# TODO add params to run the various checks\ndef check_forward_matrix(ts, h, recombination=None, mutation=None):\n    precision = 22\n    h = np.array(h).astype(np.int8)\n    n = ts.num_samples\n    m = ts.num_sites\n    assert len(h) == m\n    if recombination is None:\n        recombination = np.zeros(ts.num_sites) + 1e-9\n    if mutation is None:\n        mutation = np.zeros(ts.num_sites)\n\n    G = ts.genotype_matrix()\n    s = h.reshape(1, m)\n\n    F, c, ll = ls.forwards(\n        reference_panel=G,\n        query=s,\n        ploidy=1,\n        prob_recombination=recombination,\n        prob_mutation=mutation,\n        scale_mutation_rate=False,\n    )\n    assert F.shape == (m, n)\n    assert c.shape == (m,)\n    assert np.isscalar(ll)\n\n    cm = ls_forward_tree(\n        h, ts, recombination, mutation, scale_mutation_based_on_n_alleles=False\n    )\n    F2 = cm.decode()\n    nt.assert_allclose(F, F2)\n    nt.assert_allclose(c, cm.normalisation_factor)\n    ll_tree = np.sum(np.log10(cm.normalisation_factor))\n    nt.assert_allclose(ll_tree, ll)\n\n    ll_ts = ts._ll_tree_sequence\n    ls_hmm = _tskit.LsHmm(ll_ts, recombination, mutation, precision=precision)\n    cm_lib = _tskit.CompressedMatrix(ll_ts)\n    ls_hmm.forward_matrix(h, cm_lib)\n    F3 = cm_lib.decode()\n\n    assert_compressed_matrices_equal(cm, cm_lib)\n\n    nt.assert_allclose(F, F3)\n    nt.assert_allclose(c, cm_lib.normalisation_factor)\n    return cm_lib\n\n\ndef check_backward_matrix(ts, h, forward_cm, recombination=None, mutation=None):\n    precision = 22\n    h = np.array(h).astype(np.int8)\n    m = ts.num_sites\n    assert len(h) == m\n    if recombination is None:\n        recombination = np.zeros(ts.num_sites) + 1e-9\n    if mutation is None:\n        mutation = np.zeros(ts.num_sites)\n\n    G = ts.genotype_matrix()\n    s = h.reshape(1, m)\n\n    B = ls.backwards(\n        reference_panel=G,\n        query=s,\n        ploidy=1,\n        normalisation_factor_from_forward=forward_cm.normalisation_factor,\n        prob_recombination=recombination,\n        prob_mutation=mutation,\n        scale_mutation_rate=False,\n    )\n\n    backward_cm = ls_backward_tree(\n        h,\n        ts,\n        recombination,\n        mutation,\n        forward_cm.normalisation_factor,\n        precision=precision,\n    )\n    nt.assert_array_equal(\n        backward_cm.normalisation_factor, forward_cm.normalisation_factor\n    )\n\n    ll_ts = ts._ll_tree_sequence\n    ls_hmm = _tskit.LsHmm(ll_ts, recombination, mutation, precision=precision)\n    cm_lib = _tskit.CompressedMatrix(ll_ts)\n    ls_hmm.backward_matrix(h, forward_cm.normalisation_factor, cm_lib)\n\n    assert_compressed_matrices_equal(backward_cm, cm_lib)\n\n    B_lib = cm_lib.decode()\n    B_tree = backward_cm.decode()\n    nt.assert_allclose(B_tree, B_lib)\n    nt.assert_allclose(B, B_lib)\n\n\ndef add_unique_sample_mutations(ts, start=0):\n    \"\"\"\n    Adds a mutation for each of the samples at equally spaced locations\n    along the genome.\n    \"\"\"\n    tables = ts.dump_tables()\n    L = int(ts.sequence_length)\n    assert L % ts.num_samples == 0\n    gap = L // ts.num_samples\n    x = start\n    for u in ts.samples():\n        site = tables.sites.add_row(position=x, ancestral_state=\"0\")\n        tables.mutations.add_row(site=site, derived_state=\"1\", node=u)\n        x += gap\n    return tables.tree_sequence()\n\n\nclass TestSingleBalancedTreeExample:\n    # 3.00┊    6    ┊\n    #     ┊  ┏━┻━┓  ┊\n    # 2.00┊  4   5  ┊\n    #     ┊ ┏┻┓ ┏┻┓ ┊\n    # 1.00┊ 0 1 2 3 ┊\n    #     0         8\n\n    @staticmethod\n    def ts():\n        return add_unique_sample_mutations(\n            tskit.Tree.generate_balanced(4, span=8).tree_sequence,\n            start=1,\n        )\n\n    @pytest.mark.parametrize(\"j\", [0, 1, 2, 3])\n    def test_match_sample(self, j):\n        ts = self.ts()\n        h = np.zeros(4)\n        h[j] = 1\n        path = check_viterbi(ts, h)\n        nt.assert_array_equal([j, j, j, j], path)\n        cm = check_forward_matrix(ts, h)\n        check_backward_matrix(ts, h, cm)\n\n    @pytest.mark.parametrize(\"j\", [1, 2])\n    def test_match_sample_missing_flanks(self, j):\n        ts = self.ts()\n        h = np.zeros(4)\n        h[0] = -1\n        h[-1] = -1\n        h[j] = 1\n        path = check_viterbi(ts, h)\n        nt.assert_array_equal([j, j, j, j], path)\n        cm = check_forward_matrix(ts, h)\n        check_backward_matrix(ts, h, cm)\n\n    def test_switch_each_sample(self):\n        ts = self.ts()\n        h = np.ones(4)\n        path = check_viterbi(ts, h)\n        nt.assert_array_equal([0, 1, 2, 3], path)\n        cm = check_forward_matrix(ts, h)\n        check_backward_matrix(ts, h, cm)\n\n    def test_switch_each_sample_missing_flanks(self):\n        ts = self.ts()\n        h = np.ones(4)\n        h[0] = -1\n        h[-1] = -1\n        path = check_viterbi(ts, h)\n        nt.assert_array_equal([1, 1, 2, 2], path)\n        cm = check_forward_matrix(ts, h)\n        check_backward_matrix(ts, h, cm)\n\n    def test_switch_each_sample_missing_middle(self):\n        ts = self.ts()\n        h = np.ones(4)\n        h[1:3] = -1\n        path = check_viterbi(ts, h)\n        # Implementation of Viterbi switches at right-most position\n        nt.assert_array_equal([0, 3, 3, 3], path)\n        cm = check_forward_matrix(ts, h)\n        check_backward_matrix(ts, h, cm)\n\n\nclass TestSimulationExamples:\n    @pytest.mark.slow\n    @pytest.mark.parametrize(\"n\", [3, 10, 50])\n    @pytest.mark.parametrize(\"L\", [1, 10, 100])\n    def test_continuous_genome(self, n, L):\n        ts = msprime.simulate(\n            n, length=L, recombination_rate=1, mutation_rate=1, random_seed=42\n        )\n        h = np.zeros(ts.num_sites, dtype=np.int8)\n        # NOTE this is a bit slow at the moment but we can disable the Python\n        # implementation once testing has been improved on smaller examples.\n        # Add ``compare_py=False``to these calls.\n        check_viterbi(ts, h)\n        cm = check_forward_matrix(ts, h)\n        check_backward_matrix(ts, h, cm)\n"
  },
  {
    "path": "python/tests/test_highlevel.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2015-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for the high level interface to tskit.\n\"\"\"\n\nimport collections\nimport dataclasses\nimport decimal\nimport inspect\nimport io\nimport itertools\nimport json\nimport math\nimport os\nimport pathlib\nimport pickle\nimport platform\nimport random\nimport re\nimport tempfile\nimport unittest\nimport uuid as _uuid\nimport warnings\nfrom xml.etree import ElementTree\n\nimport kastore\nimport msprime\nimport networkx as nx\nimport numpy as np\nimport pytest\nfrom numpy.testing import assert_array_equal\n\nimport _tskit\nimport tests as tests\nimport tests.simplify as simplify\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.metadata as metadata\nimport tskit.util as util\nfrom tskit import UNKNOWN_TIME\n\n\ndef traversal_preorder(tree, root=None):\n    roots = tree.roots if root is None else [root]\n    for node in roots:\n        yield node\n        for child in tree.children(node):\n            yield from traversal_preorder(tree, child)\n\n\ndef traversal_postorder(tree, root=None):\n    roots = tree.roots if root is None else [root]\n    for node in roots:\n        for child in tree.children(node):\n            yield from traversal_postorder(tree, child)\n        yield node\n\n\ndef traversal_inorder(tree, root=None):\n    roots = tree.roots if root is None else [root]\n    for node in roots:\n        children = list(tree.children(node))\n        half = len(children) // 2\n        for child in children[:half]:\n            yield from traversal_inorder(tree, child)\n        yield node\n        for child in children[half:]:\n            yield from traversal_inorder(tree, child)\n\n\ndef traversal_levelorder(tree, root=None):\n    yield from sorted(list(tree.nodes(root)), key=lambda u: tree.depth(u))\n\n\ndef _traversal_minlex_postorder(tree, u):\n    \"\"\"\n    For a given input ID u, this function returns a tuple whose first value\n    is the minimum leaf node ID under node u, and whose second value is\n    a list containing the minlex postorder for the subtree rooted at node u.\n    The first value is needed for sorting, and the second value is what\n    finally gets returned.\n    \"\"\"\n    children = tree.children(u)\n    if len(children) > 0:\n        children_return = [_traversal_minlex_postorder(tree, c) for c in children]\n        # sorts by first value, which is the minimum leaf node ID\n        children_return.sort(key=lambda x: x[0])\n        minlex_postorder = []\n        for _, child_minlex_postorder in children_return:\n            minlex_postorder.extend(child_minlex_postorder)\n        minlex_postorder.extend([u])\n        return (children_return[0][0], minlex_postorder)\n    else:\n        return (u, [u])\n\n\ndef traversal_minlex_postorder(tree, root=None):\n    roots = tree.roots if root is None else [root]\n    root_lists = [_traversal_minlex_postorder(tree, node) for node in roots]\n    for _, node_list in sorted(root_lists, key=lambda x: x[0]):\n        yield from node_list\n\n\ndef traversal_timeasc(tree, root=None):\n    yield from sorted(tree.nodes(root), key=lambda u: (tree.time(u), u))\n\n\ndef traversal_timedesc(tree, root=None):\n    yield from sorted(tree.nodes(root), key=lambda u: (tree.time(u), u), reverse=True)\n\n\ntraversal_map = {\n    \"preorder\": traversal_preorder,\n    \"postorder\": traversal_postorder,\n    \"inorder\": traversal_inorder,\n    \"levelorder\": traversal_levelorder,\n    \"breadthfirst\": traversal_levelorder,\n    \"minlex_postorder\": traversal_minlex_postorder,\n    \"timeasc\": traversal_timeasc,\n    \"timedesc\": traversal_timedesc,\n}\n\n\ndef simple_get_pairwise_diversity(haplotypes):\n    \"\"\"\n    Returns the value of pi for the specified haplotypes.\n    \"\"\"\n    # Very simplistic algorithm...\n    n = len(haplotypes)\n    pi = 0\n    for k in range(n):\n        for j in range(k):\n            for u, v in zip(haplotypes[j], haplotypes[k]):\n                pi += u != v\n    return 2 * pi / (n * (n - 1))\n\n\ndef simplify_tree_sequence(ts, samples, filter_sites=True):\n    \"\"\"\n    Simple tree-by-tree algorithm to get a simplify of a tree sequence.\n    \"\"\"\n    s = simplify.Simplifier(ts, samples, filter_sites=filter_sites)\n    return s.simplify()\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\nclass TestLinkAncestorsExamples:\n    def test_link_ancestors_runs_and_is_sane(self, ts):\n        # Can't link ancestors when edges have metadata.\n        if ts.tables.edges.metadata_schema != tskit.MetadataSchema(schema=None):\n            pytest.skip(\"link_ancestors does not support edges with metadata\")\n\n        samples = ts.samples()\n        if len(samples) == 0:\n            pytest.skip(\"Tree sequence has no samples\")\n\n        # Prefer internal nodes as ancestors; fall back to samples if none.\n        ancestor_nodes = [u.id for u in ts.nodes() if not u.is_sample()]\n        if len(ancestor_nodes) == 0:\n            ancestor_nodes = list(samples)\n\n        # Keep argument sizes modest for large examples.\n        samples = samples[: min(len(samples), 10)]\n        ancestors = ancestor_nodes[: min(len(ancestor_nodes), 10)]\n\n        result = ts.link_ancestors(samples, ancestors)\n        assert isinstance(result, tskit.EdgeTable)\n\n        # Basic invariants on the returned table.\n        assert np.all(result.left >= 0)\n        assert np.all(result.right <= ts.sequence_length)\n        if result.num_rows > 0:\n            assert np.all(result.left < result.right)\n            assert set(result.parent).issubset(set(range(ts.num_nodes)))\n            assert set(result.child).issubset(set(range(ts.num_nodes)))\n\n        # Parity with mutable TableCollection implementation.\n        mutable_result = ts.dump_tables().link_ancestors(samples, ancestors)\n        assert result == mutable_result\n\n        # Parity with immutable TableCollection, when available.\n        if getattr(_tskit, \"HAS_NUMPY_2\", False):\n            immutable_result = ts.tables.link_ancestors(samples, ancestors)\n            assert result == immutable_result\n\n\ndef oriented_forests(n):\n    \"\"\"\n    Implementation of Algorithm O from TAOCP section 7.2.1.6.\n    Generates all canonical n-node oriented forests.\n    \"\"\"\n    p = [k - 1 for k in range(0, n + 1)]\n    k = 1\n    while k != 0:\n        yield p\n        if p[n] > 0:\n            p[n] = p[p[n]]\n            yield p\n        k = n\n        while k > 0 and p[k] == 0:\n            k -= 1\n        if k != 0:\n            j = p[k]\n            d = k - j\n            not_done = True\n            while not_done:\n                if p[k - d] == p[j]:\n                    p[k] = p[j]\n                else:\n                    p[k] = p[k - d] + d\n                if k == n:\n                    not_done = False\n                else:\n                    k += 1\n\n\ndef get_mrca(pi, x, y):\n    \"\"\"\n    Returns the most recent common ancestor of nodes x and y in the\n    oriented forest pi.\n    \"\"\"\n    x_parents = [x]\n    j = x\n    while j != 0:\n        j = pi[j]\n        x_parents.append(j)\n    y_parents = {y: None}\n    j = y\n    while j != 0:\n        j = pi[j]\n        y_parents[j] = None\n    # We have the complete list of parents for x and y back to root.\n    mrca = 0\n    j = 0\n    while x_parents[j] not in y_parents:\n        j += 1\n    mrca = x_parents[j]\n    return mrca\n\n\ndef get_samples(ts, time=None, population=None):\n    samples = []\n    for node in ts.nodes():\n        keep = bool(node.is_sample())\n        if time is not None:\n            if isinstance(time, (int, float)):\n                keep &= np.isclose(node.time, time)\n            if isinstance(time, (tuple, list, np.ndarray)):\n                keep &= node.time >= time[0]\n                keep &= node.time < time[1]\n        if population is not None:\n            keep &= node.population == population\n        if keep:\n            samples.append(node.id)\n    return np.array(samples)\n\n\nclass TestTreeTraversals:\n    def test_bad_traversal_order(self, simple_degree2_ts_fixture):\n        tree = simple_degree2_ts_fixture.first()\n        for bad_order in [\"pre\", \"post\", \"preorderorder\", (\"x\",), b\"preorder\"]:\n            with pytest.raises(ValueError, match=\"Traversal order\"):\n                tree.nodes(order=bad_order)\n\n    @pytest.mark.parametrize(\"order\", list(traversal_map.keys()))\n    def test_returned_types(self, order):\n        ts = msprime.sim_ancestry(2, random_seed=234)\n        tree = ts.first()\n        iterator = tree.nodes(order=order)\n        assert isinstance(iterator, collections.abc.Iterable)\n        lst = list(iterator)\n        assert len(lst) > 0\n        for u in lst:\n            assert isinstance(u, int)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    @pytest.mark.parametrize(\"order\", list(traversal_map.keys()))\n    def test_traversals_virtual_root(self, ts, order):\n        tree = ts.first()\n        node_list2 = list(traversal_map[order](tree, tree.virtual_root))\n        node_list1 = list(tree.nodes(tree.virtual_root, order=order))\n        assert tree.virtual_root in node_list1\n        assert node_list1 == node_list2\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    @pytest.mark.parametrize(\"order\", list(traversal_map.keys()))\n    def test_traversals(self, ts, order):\n        tree = next(ts.trees())\n        traverser = traversal_map[order]\n        node_list1 = list(tree.nodes(order=order))\n        node_list2 = list(traverser(tree))\n        assert node_list1 == node_list2\n\n    def test_binary_example(self):\n        t = tskit.Tree.generate_balanced(5)\n        #     8\n        #  ┏━━┻━┓\n        #  ┃    7\n        #  ┃  ┏━┻┓\n        #  5  ┃  6\n        # ┏┻┓ ┃ ┏┻┓\n        # 0 1 2 3 4\n\n        def f(node=None, order=None):\n            return list(t.nodes(node, order))\n\n        assert f(order=\"preorder\") == [8, 5, 0, 1, 7, 2, 6, 3, 4]\n        assert f(order=\"postorder\") == [0, 1, 5, 2, 3, 4, 6, 7, 8]\n        assert f(order=\"inorder\") == [0, 5, 1, 8, 2, 7, 3, 6, 4]\n        assert f(order=\"levelorder\") == [8, 5, 7, 0, 1, 2, 6, 3, 4]\n        assert f(order=\"breadthfirst\") == [8, 5, 7, 0, 1, 2, 6, 3, 4]\n        assert f(order=\"timeasc\") == [0, 1, 2, 3, 4, 5, 6, 7, 8]\n        assert f(order=\"timedesc\") == [8, 7, 6, 5, 4, 3, 2, 1, 0]\n        assert f(order=\"minlex_postorder\") == [0, 1, 5, 2, 3, 4, 6, 7, 8]\n\n        q = t.virtual_root\n        assert f(q, order=\"preorder\") == [q, 8, 5, 0, 1, 7, 2, 6, 3, 4]\n        assert f(q, order=\"postorder\") == [0, 1, 5, 2, 3, 4, 6, 7, 8, q]\n        assert f(q, order=\"inorder\") == [q, 0, 5, 1, 8, 2, 7, 3, 6, 4]\n        assert f(q, order=\"levelorder\") == [q, 8, 5, 7, 0, 1, 2, 6, 3, 4]\n        assert f(q, order=\"breadthfirst\") == [q, 8, 5, 7, 0, 1, 2, 6, 3, 4]\n        assert f(q, order=\"timeasc\") == [0, 1, 2, 3, 4, 5, 6, 7, 8, q]\n        assert f(q, order=\"timedesc\") == [q, 8, 7, 6, 5, 4, 3, 2, 1, 0]\n        assert f(q, order=\"minlex_postorder\") == [0, 1, 5, 2, 3, 4, 6, 7, 8, q]\n\n        assert f(7, order=\"preorder\") == [7, 2, 6, 3, 4]\n        assert f(7, order=\"postorder\") == [2, 3, 4, 6, 7]\n        assert f(7, order=\"inorder\") == [2, 7, 3, 6, 4]\n        assert f(7, order=\"levelorder\") == [7, 2, 6, 3, 4]\n        assert f(7, order=\"breadthfirst\") == [7, 2, 6, 3, 4]\n        assert f(7, order=\"timeasc\") == [2, 3, 4, 6, 7]\n        assert f(7, order=\"timedesc\") == [7, 6, 4, 3, 2]\n        assert f(7, order=\"minlex_postorder\") == [2, 3, 4, 6, 7]\n\n    def test_ternary_example(self):\n        t = tskit.Tree.generate_balanced(7, arity=3)\n        #      10\n        #  ┏━━━┳┻━━━┓\n        #  7   8    9\n        # ┏┻┓ ┏┻┓ ┏━╋━┓\n        # 0 1 2 3 4 5 6\n\n        def f(node=None, order=None):\n            return list(t.nodes(node, order))\n\n        assert f(order=\"preorder\") == [10, 7, 0, 1, 8, 2, 3, 9, 4, 5, 6]\n        assert f(order=\"postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9, 10]\n        assert f(order=\"inorder\") == [0, 7, 1, 10, 2, 8, 3, 4, 9, 5, 6]\n        assert f(order=\"levelorder\") == [10, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(order=\"breadthfirst\") == [10, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(order=\"timeasc\") == list(range(11))\n        assert f(order=\"timedesc\") == list(reversed(range(11)))\n        assert f(order=\"minlex_postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9, 10]\n\n        q = t.virtual_root\n        assert f(q, order=\"preorder\") == [q, 10, 7, 0, 1, 8, 2, 3, 9, 4, 5, 6]\n        assert f(q, order=\"postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9, 10, q]\n        assert f(q, order=\"inorder\") == [q, 0, 7, 1, 10, 2, 8, 3, 4, 9, 5, 6]\n        assert f(q, order=\"levelorder\") == [q, 10, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(q, order=\"breadthfirst\") == [q, 10, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(q, order=\"timeasc\") == list(range(12))\n        assert f(q, order=\"timedesc\") == list(reversed(range(12)))\n        assert f(q, order=\"minlex_postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9, 10, q]\n\n        assert f(9, order=\"preorder\") == [9, 4, 5, 6]\n        assert f(9, order=\"postorder\") == [4, 5, 6, 9]\n        assert f(9, order=\"inorder\") == [4, 9, 5, 6]\n        assert f(9, order=\"levelorder\") == [9, 4, 5, 6]\n        assert f(9, order=\"breadthfirst\") == [9, 4, 5, 6]\n        assert f(9, order=\"timeasc\") == [4, 5, 6, 9]\n        assert f(9, order=\"timedesc\") == [9, 6, 5, 4]\n        assert f(9, order=\"minlex_postorder\") == [4, 5, 6, 9]\n\n    def test_multiroot_example(self):\n        tables = tskit.Tree.generate_balanced(7, arity=3).tree_sequence.dump_tables()\n        tables.edges.truncate(len(tables.edges) - 3)\n        t = tables.tree_sequence().first()\n\n        #  7   8    9\n        # ┏┻┓ ┏┻┓ ┏━╋━┓\n        # 0 1 2 3 4 5 6\n        def f(node=None, order=None):\n            return list(t.nodes(node, order))\n\n        assert f(order=\"preorder\") == [7, 0, 1, 8, 2, 3, 9, 4, 5, 6]\n        assert f(order=\"postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9]\n        assert f(order=\"inorder\") == [0, 7, 1, 2, 8, 3, 4, 9, 5, 6]\n        assert f(order=\"levelorder\") == [7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(order=\"breadthfirst\") == [7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(order=\"timeasc\") == list(range(10))\n        assert f(order=\"timedesc\") == list(reversed(range(10)))\n        assert f(order=\"minlex_postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9]\n\n        q = t.virtual_root\n        assert f(q, order=\"preorder\") == [q, 7, 0, 1, 8, 2, 3, 9, 4, 5, 6]\n        assert f(q, order=\"postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9, q]\n        assert f(q, order=\"inorder\") == [0, 7, 1, q, 2, 8, 3, 4, 9, 5, 6]\n        assert f(q, order=\"levelorder\") == [q, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(q, order=\"breadthfirst\") == [q, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6]\n        assert f(q, order=\"timeasc\") == list(range(10)) + [q]\n        assert f(q, order=\"timedesc\") == [q] + list(reversed(range(10)))\n        assert f(q, order=\"minlex_postorder\") == [0, 1, 7, 2, 3, 8, 4, 5, 6, 9, q]\n\n        assert f(9, order=\"preorder\") == [9, 4, 5, 6]\n        assert f(9, order=\"postorder\") == [4, 5, 6, 9]\n        assert f(9, order=\"inorder\") == [4, 9, 5, 6]\n        assert f(9, order=\"levelorder\") == [9, 4, 5, 6]\n        assert f(9, order=\"breadthfirst\") == [9, 4, 5, 6]\n        assert f(9, order=\"minlex_postorder\") == [4, 5, 6, 9]\n        assert f(9, order=\"timeasc\") == [4, 5, 6, 9]\n        assert f(9, order=\"timedesc\") == [9, 6, 5, 4]\n\n    def test_multiroot_non_lexical_example(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    population  individual  metadata\n        0   1   0.000000    0   -1  b''\n        1   1   0.000000    0   -1  b''\n        2   1   0.000000    0   -1  b''\n        3   1   0.000000    0   -1  b''\n        4   1   0.000000    0   -1  b''\n        5   1   0.000000    0   -1  b''\n        6   1   0.000000    0   -1  b''\n        7   1   0.000000    0   -1  b''\n        8   1   0.000000    0   -1  b''\n        9   1   0.000000    0   -1  b''\n        10  0   0.047734    0   -1  b''\n        11  0   0.061603    0   -1  b''\n        12  0   0.189503    0   -1  b''\n        13  0   0.275885    0   -1  b''\n        14  0   0.518301    0   -1  b''\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.000000    10000.000000    10  0\n        0.000000    10000.000000    10  2\n        0.000000    10000.000000    11  9\n        0.000000    10000.000000    11  10\n        0.000000    10000.000000    12  3\n        0.000000    10000.000000    12  7\n        0.000000    10000.000000    13  5\n        0.000000    10000.000000    13  11\n        0.000000    10000.000000    14  1\n        0.000000    10000.000000    14  8\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes, edges, sequence_length=10000, strict=False, base64_metadata=False\n        )\n        t = ts.first()\n\n        # Note: this is drawn out in \"tree\" order.\n        #                 14\n        #                 ┏┻┓\n        #          13     ┃ ┃\n        #         ┏━┻━┓   ┃ ┃\n        #     12  ┃   ┃   ┃ ┃\n        #     ┏┻┓ ┃   ┃   ┃ ┃\n        #     ┃ ┃ ┃  11   ┃ ┃\n        #     ┃ ┃ ┃ ┏━┻┓  ┃ ┃\n        #     ┃ ┃ ┃ ┃ 10  ┃ ┃\n        #     ┃ ┃ ┃ ┃ ┏┻┓ ┃ ┃\n        # 4 6 3 7 5 9 0 2 1 8\n\n        def f(node=None, order=None):\n            return list(t.nodes(node, order))\n\n        pre = f(order=\"preorder\")\n        post = f(order=\"postorder\")\n        inord = f(order=\"inorder\")\n        level = f(order=\"levelorder\")\n        breadth = f(order=\"breadthfirst\")\n        timeasc = f(order=\"timeasc\")\n        timedesc = f(order=\"timedesc\")\n        minlex = f(order=\"minlex_postorder\")\n        assert pre == [4, 6, 12, 3, 7, 13, 5, 11, 9, 10, 0, 2, 14, 1, 8]\n        assert post == [4, 6, 3, 7, 12, 5, 9, 0, 2, 10, 11, 13, 1, 8, 14]\n        assert inord == [4, 6, 3, 12, 7, 5, 13, 9, 11, 0, 10, 2, 1, 14, 8]\n        assert level == [4, 6, 12, 13, 14, 3, 7, 5, 11, 1, 8, 9, 10, 0, 2]\n        assert breadth == [4, 6, 12, 13, 14, 3, 7, 5, 11, 1, 8, 9, 10, 0, 2]\n        assert timeasc == list(range(15))\n        assert timedesc == list(reversed(range(15)))\n\n        # And the minlex tree:\n        #         14\n        #         ┏┻┓\n        #    13   ┃ ┃\n        #   ┏━┻━┓ ┃ ┃\n        #   ┃   ┃ ┃ ┃ 12\n        #   ┃   ┃ ┃ ┃ ┏┻┓\n        #  11   ┃ ┃ ┃ ┃ ┃\n        #  ┏┻━┓ ┃ ┃ ┃ ┃ ┃\n        # 10  ┃ ┃ ┃ ┃ ┃ ┃\n        # ┏┻┓ ┃ ┃ ┃ ┃ ┃ ┃\n        # 0 2 9 5 1 8 3 7 4 6\n        assert minlex == [0, 2, 10, 9, 11, 5, 13, 1, 8, 14, 3, 7, 12, 4, 6]\n\n    @pytest.mark.parametrize(\n        [\"order\", \"expected\"],\n        [\n            (\"preorder\", [[9, 6, 2, 3, 7, 4, 5, 0, 1], [10, 4, 8, 5, 0, 1, 6, 2, 3]]),\n            (\"inorder\", [[2, 6, 3, 9, 4, 7, 0, 5, 1], [4, 10, 0, 5, 1, 8, 2, 6, 3]]),\n            (\"postorder\", [[2, 3, 6, 4, 0, 1, 5, 7, 9], [4, 0, 1, 5, 2, 3, 6, 8, 10]]),\n            (\"levelorder\", [[9, 6, 7, 2, 3, 4, 5, 0, 1], [10, 4, 8, 5, 6, 0, 1, 2, 3]]),\n            (\n                \"breadthfirst\",\n                [[9, 6, 7, 2, 3, 4, 5, 0, 1], [10, 4, 8, 5, 6, 0, 1, 2, 3]],\n            ),\n            (\"timeasc\", [[0, 1, 2, 3, 4, 5, 6, 7, 9], [0, 1, 2, 3, 4, 5, 6, 8, 10]]),\n            (\"timedesc\", [[9, 7, 6, 5, 4, 3, 2, 1, 0], [10, 8, 6, 5, 4, 3, 2, 1, 0]]),\n            (\n                \"minlex_postorder\",\n                [[0, 1, 5, 4, 7, 2, 3, 6, 9], [0, 1, 5, 2, 3, 6, 8, 4, 10]],\n            ),\n        ],\n    )\n    def test_ts_example(self, order, expected):\n        # 1.20┊           ┊  10       ┊\n        #     ┊           ┊ ┏━┻━━┓    ┊\n        # 0.90┊     9     ┊ ┃    ┃    ┊\n        #     ┊  ┏━━┻━┓   ┊ ┃    ┃    ┊\n        # 0.60┊  ┃    ┃   ┊ ┃    8    ┊\n        #     ┊  ┃    ┃   ┊ ┃  ┏━┻━┓  ┊\n        # 0.44┊  ┃    7   ┊ ┃  ┃   ┃  ┊\n        #     ┊  ┃  ┏━┻┓  ┊ ┃  ┃   ┃  ┊\n        # 0.21┊  6  ┃  ┃  ┊ ┃  ┃   6  ┊\n        #     ┊ ┏┻┓ ┃  ┃  ┊ ┃  ┃  ┏┻┓ ┊\n        # 0.15┊ ┃ ┃ ┃  5  ┊ ┃  5  ┃ ┃ ┊\n        #     ┊ ┃ ┃ ┃ ┏┻┓ ┊ ┃ ┏┻┓ ┃ ┃ ┊\n        # 0.00┊ 2 3 4 0 1 ┊ 4 0 1 2 3 ┊\n        #   0.00        0.50        1.00\n        nodes = \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       1       0               0.00000000000000\n        3       1       0               0.00000000000000\n        4       1       0               0.00000000000000\n        5       0       0               0.14567111023387\n        6       0       0               0.21385545626353\n        7       0       0               0.43508024345063\n        8       0       0               0.60156352971203\n        9       0       0               0.90000000000000\n        10      0       0               1.20000000000000\n        \"\"\"\n        edges = \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      1.00000000      5       0,1\n        1       0.00000000      1.00000000      6       2,3\n        2       0.00000000      0.50000000      7       4,5\n        3       0.50000000      1.00000000      8       5,6\n        4       0.00000000      0.50000000      9       6,7\n        5       0.50000000      1.00000000      10      4,8\n        \"\"\"\n        ts = tskit.load_text(\n            nodes=io.StringIO(nodes), edges=io.StringIO(edges), strict=False\n        )\n        tree_orders = [list(tree.nodes(order=order)) for tree in ts.trees()]\n        assert tree_orders == expected\n\n    def test_polytomy_inorder(self):\n        \"\"\"\n        If there are N children, current inorder traversal first visits\n        floor(N/2) children, then the parent, then the remaining children.\n        Here we explicitly test that behaviour.\n        \"\"\"\n        #\n        #    __4__\n        #   / / \\ \\\n        #  0 1   2 3\n        #\n        nodes_polytomy_4 = \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       1       0               0.00000000000000\n        3       1       0               0.00000000000000\n        4       0       0               1.00000000000000\n        \"\"\"\n        edges_polytomy_4 = \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      1.00000000      4       0,1,2,3\n        \"\"\"\n        #\n        #    __5__\n        #   / /|\\ \\\n        #  0 1 2 3 4\n        #\n        nodes_polytomy_5 = \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       1       0               0.00000000000000\n        3       1       0               0.00000000000000\n        4       1       0               0.00000000000000\n        5       0       0               1.00000000000000\n        \"\"\"\n        edges_polytomy_5 = \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      1.00000000      5       0,1,2,3,4\n        \"\"\"\n        for nodes_string, edges_string, expected_result in [\n            [nodes_polytomy_4, edges_polytomy_4, [[0, 1, 4, 2, 3]]],\n            [nodes_polytomy_5, edges_polytomy_5, [[0, 1, 5, 2, 3, 4]]],\n        ]:\n            ts = tskit.load_text(\n                nodes=io.StringIO(nodes_string),\n                edges=io.StringIO(edges_string),\n                strict=False,\n            )\n            tree_orders = []\n            for tree in ts.trees():\n                tree_orders.append(list(tree.nodes(order=\"inorder\")))\n            assert tree_orders == expected_result\n\n    def test_minlex_postorder_multiple_roots(self):\n        #\n        #    10    8     9     11\n        #   / \\   / \\   / \\   / \\\n        #  5   3 2   4 6   7 1   0\n        #\n        nodes_string = \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       1       0               0.00000000000000\n        3       1       0               0.00000000000000\n        4       1       0               0.00000000000000\n        5       1       0               0.00000000000000\n        6       1       0               0.00000000000000\n        7       1       0               0.00000000000000\n        8       0       0               1.00000000000000\n        9       0       0               1.00000000000000\n        10      0       0               1.00000000000000\n        11      0       0               1.00000000000000\n        \"\"\"\n        edges_string = \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      1.00000000      8       2,4\n        1       0.00000000      1.00000000      9       6,7\n        2       0.00000000      1.00000000      10      5,3\n        3       0.00000000      1.00000000      11      1,0\n        \"\"\"\n        expected_result = [[0, 1, 11, 2, 4, 8, 3, 5, 10, 6, 7, 9]]\n        ts = tskit.load_text(\n            nodes=io.StringIO(nodes_string),\n            edges=io.StringIO(edges_string),\n            strict=False,\n        )\n        tree_orders = []\n        for tree in ts.trees():\n            tree_orders.append(list(tree.nodes(order=\"minlex_postorder\")))\n        assert tree_orders == expected_result\n\n\nclass TestMRCA:\n    \"\"\"\n    Test both the tree.mrca and tree.tmrca methods.\n    \"\"\"\n\n    t = tskit.Tree.generate_balanced(3)\n    #  4\n    # ┏━┻┓\n    # ┃  3\n    # ┃ ┏┻┓\n    # 0 1 2\n\n    @pytest.mark.parametrize(\"args, expected\", [((2, 1), 3), ((0, 1, 2), 4)])\n    def test_two_or_more_args(self, args, expected):\n        assert self.t.mrca(*args) == expected\n        assert self.t.tmrca(*args) == self.t.tree_sequence.nodes_time[expected]\n\n    def test_less_than_two_args(self):\n        with pytest.raises(ValueError):\n            self.t.mrca(1)\n        with pytest.raises(ValueError):\n            self.t.tmrca(1)\n\n    def test_no_args(self):\n        with pytest.raises(ValueError):\n            self.t.mrca()\n        with pytest.raises(ValueError):\n            self.t.tmrca()\n\n    def test_same_args(self):\n        assert self.t.mrca(0, 0, 0, 0) == 0\n        assert self.t.tmrca(0, 0, 0, 0) == self.t.tree_sequence.nodes_time[0]\n\n    def test_different_tree_levels(self):\n        assert self.t.mrca(0, 3) == 4\n        assert self.t.tmrca(0, 3) == self.t.tree_sequence.nodes_time[4]\n\n    def test_out_of_bounds_args(self):\n        with pytest.raises(ValueError):\n            self.t.mrca(0, 6)\n        with pytest.raises(ValueError):\n            self.t.tmrca(0, 6)\n\n    def test_virtual_root_arg(self):\n        assert self.t.mrca(0, 5) == 5\n        assert np.isposinf(self.t.tmrca(0, 5))\n\n    def test_multiple_roots(self):\n        ts = tskit.Tree.generate_balanced(10).tree_sequence\n        ts = ts.delete_intervals([ts.first().interval])\n        assert ts.first().mrca(*ts.samples()) == tskit.NULL\n        # We decided to raise an error for tmrca here, rather than report inf\n        # see https://github.com/tskit-dev/tskit/issues/2801\n        with pytest.raises(ValueError, match=\"do not share a common ancestor\"):\n            ts.first().tmrca(0, 6)\n\n\nclass TestPathLength:\n    t = tskit.Tree.generate_balanced(9)\n    #         16\n    #    ┏━━━━┻━━━┓\n    #    ┃       15\n    #    ┃     ┏━━┻━┓\n    #   11     ┃   14\n    #  ┏━┻━┓   ┃  ┏━┻┓\n    #  9  10  12  ┃ 13\n    # ┏┻┓ ┏┻┓ ┏┻┓ ┃ ┏┻┓\n    # 0 1 2 3 4 5 6 7 8\n\n    def test_tmrca_leaf(self):\n        assert self.t.path_length(0, 16) == 3\n        assert self.t.path_length(16, 0) == 3\n        assert self.t.path_length(7, 16) == 4\n\n    def test_equal_depth(self):\n        assert self.t.path_length(5, 16) == self.t.depth(5)\n\n    def test_two_leaves(self):\n        assert self.t.path_length(0, 8) == 7\n\n    def test_two_leaves_depth(self):\n        assert self.t.path_length(0, 8) == self.t.depth(0) + self.t.depth(8)\n\n    @pytest.mark.parametrize(\"args\", [[], [1], [1, 2, 3]])\n    def test_bad_num_args(self, args):\n        with pytest.raises(TypeError):\n            self.t.path_length(*args)\n\n    @pytest.mark.parametrize(\"bad_arg\", [[], \"1\"])\n    def test_bad_arg_type(self, bad_arg):\n        with pytest.raises(TypeError):\n            self.t.path_length(0, bad_arg)\n        with pytest.raises(TypeError):\n            self.t.path_length(bad_arg, 0)\n\n    def test_same_args(self):\n        assert self.t.path_length(10, 10) == 0\n\n    def test_different_tree_levels(self):\n        assert self.t.path_length(1, 10) == 3\n\n    def test_out_of_bounds_args(self):\n        with pytest.raises(ValueError):\n            self.t.path_length(0, 20)\n\n    @pytest.mark.parametrize(\"u\", range(17))\n    def test_virtual_root_arg(self, u):\n        assert self.t.path_length(u, self.t.virtual_root) == self.t.depth(u) + 1\n        assert self.t.path_length(self.t.virtual_root, u) == self.t.depth(u) + 1\n\n    def test_both_args_virtual_root(self):\n        assert self.t.path_length(self.t.virtual_root, self.t.virtual_root) == 0\n\n    def test_no_mrca(self):\n        tree = self.t.copy()\n        tree.clear()\n        assert math.isinf(tree.path_length(0, 1))\n\n\nclass TestMRCACalculator:\n    \"\"\"\n    Class to test the Schieber-Vishkin algorithm.\n\n    These tests are included here as we use the MRCA calculator below in\n    our tests.\n    \"\"\"\n\n    def test_all_oriented_forests(self):\n        # Runs through all possible oriented forests and checks all possible\n        # node pairs using an inferior algorithm.\n        for n in range(2, 9):\n            for pi in oriented_forests(n):\n                sv = tests.MRCACalculator(pi)\n                for j in range(1, n + 1):\n                    for k in range(1, j + 1):\n                        mrca = get_mrca(pi, j, k)\n                        assert mrca == sv.get_mrca(j, k)\n\n\nclass HighLevelTestCase:\n    \"\"\"\n    Superclass of tests on the high level interface.\n    \"\"\"\n\n    def verify_tree_mrcas(self, st):\n        # Check the mrcas\n        oriented_forest = [st.get_parent(j) for j in range(st.tree_sequence.num_nodes)]\n        mrca_calc = tests.MRCACalculator(oriented_forest)\n        # We've done exhaustive tests elsewhere, no need to go\n        # through the combinations.\n        for j in range(st.tree_sequence.num_nodes):\n            mrca = st.get_mrca(0, j)\n            assert mrca == mrca_calc.get_mrca(0, j)\n            if mrca != tskit.NULL:\n                assert st.get_time(mrca) == st.get_tmrca(0, j)\n\n    def verify_tree_branch_lengths(self, tree):\n        for u in tree.tree_sequence.samples():\n            while tree.parent(u) != tskit.NULL:\n                length = tree.time(tree.parent(u)) - tree.time(u)\n                assert length > 0.0\n                assert tree.branch_length(u) == length\n                u = tree.parent(u)\n            assert tree.parent(u) == tskit.NULL\n            assert tree.branch_length(u) == 0\n\n    def verify_tree_structure(self, st):\n        roots = set()\n        for u in st.samples():\n            # verify the path to root\n            assert st.is_sample(u)\n            times = []\n            while st.get_parent(u) != tskit.NULL:\n                v = st.get_parent(u)\n                times.append(st.get_time(v))\n                assert st.get_time(v) >= 0.0\n                assert u in st.get_children(v)\n                u = v\n            roots.add(u)\n            assert times == sorted(times)\n        assert sorted(list(roots)) == sorted(st.roots)\n        assert len(st.roots) == st.num_roots\n        u = st.left_root\n        roots = []\n        while u != tskit.NULL:\n            roots.append(u)\n            u = st.right_sib(u)\n        assert roots == st.roots\n        # To a top-down traversal, and make sure we meet all the samples.\n        samples = []\n        for root in st.roots:\n            stack = [root]\n            while len(stack) > 0:\n                u = stack.pop()\n                assert u != tskit.NULL\n                if st.is_sample(u):\n                    samples.append(u)\n                if st.is_leaf(u):\n                    assert len(st.get_children(u)) == 0\n                else:\n                    for c in reversed(st.get_children(u)):\n                        stack.append(c)\n                # Check that we get the correct number of samples at each\n                # node.\n                assert st.get_num_samples(u) == len(list(st.samples(u)))\n                assert st.get_num_tracked_samples(u) == 0\n        assert sorted(samples) == sorted(st.samples())\n        # Check the parent dict\n        pi = st.get_parent_dict()\n        for root in st.roots:\n            assert root not in pi\n        for k, v in pi.items():\n            assert st.get_parent(k) == v\n        assert st.num_samples() == len(samples)\n        assert sorted(st.samples()) == sorted(samples)\n\n    def verify_tree_depths(self, st):\n        for root in st.roots:\n            stack = [(root, 0)]\n            while len(stack) > 0:\n                u, depth = stack.pop()\n                assert st.depth(u) == depth\n                for c in st.children(u):\n                    stack.append((c, depth + 1))\n\n    def verify_tree(self, st):\n        self.verify_tree_mrcas(st)\n        self.verify_tree_branch_lengths(st)\n        self.verify_tree_structure(st)\n        self.verify_tree_depths(st)\n\n    def verify_trees(self, ts):\n        pts = tests.PythonTreeSequence(ts)\n        iter1 = ts.trees()\n        iter2 = pts.trees()\n        length = 0\n        num_trees = 0\n        breakpoints = [0]\n        for st1, st2 in zip(iter1, iter2):\n            assert st1.get_sample_size() == ts.get_sample_size()\n            roots = set()\n            for u in ts.samples():\n                root = u\n                while st1.get_parent(root) != tskit.NULL:\n                    root = st1.get_parent(root)\n                roots.add(root)\n            assert st1.left_root == st2.left_root\n            assert sorted(list(roots)) == sorted(st1.roots)\n            assert st1.roots == st2.roots\n            if len(roots) == 0:\n                assert st1.root == tskit.NULL\n            elif len(roots) == 1:\n                assert st1.root == list(roots)[0]\n            else:\n                with pytest.raises(ValueError):\n                    _ = st1.root\n            assert st2 == st1\n            assert not (st2 != st1)\n            left, right = st1.get_interval()\n            breakpoints.append(right)\n            assert left == pytest.approx(length)\n            assert left >= 0\n            assert right > left\n            assert right <= ts.get_sequence_length()\n            length += right - left\n            self.verify_tree(st1)\n            num_trees += 1\n        with pytest.raises(StopIteration):\n            next(iter1)\n        with pytest.raises(StopIteration):\n            next(iter2)\n        assert ts.get_num_trees() == num_trees\n        assert breakpoints == list(ts.breakpoints())\n        assert length == pytest.approx(ts.get_sequence_length())\n\n\nclass TestNumpySamples:\n    \"\"\"\n    Tests that we correctly handle samples as numpy arrays when passed to\n    various methods.\n    \"\"\"\n\n    def get_tree_sequence(self, num_demes=4, times=None, n=40):\n        if times is None:\n            times = [0]\n        return msprime.simulate(\n            samples=[\n                msprime.Sample(time=t, population=j % num_demes)\n                for j in range(n)\n                for t in times\n            ],\n            population_configurations=[\n                msprime.PopulationConfiguration() for _ in range(num_demes)\n            ],\n            migration_matrix=[\n                [int(j != k) for j in range(num_demes)] for k in range(num_demes)\n            ],\n            random_seed=1,\n            mutation_rate=10,\n        )\n\n    def test_samples(self):\n        d = 4\n        ts = self.get_tree_sequence(d)\n        assert np.array_equal(ts.samples(), np.arange(ts.num_samples, dtype=np.int32))\n        total = 0\n        for pop in range(d):\n            subsample = ts.samples(pop)\n            total += subsample.shape[0]\n            assert np.array_equal(subsample, ts.samples(population=pop))\n            assert list(subsample) == [\n                node.id\n                for node in ts.nodes()\n                if node.population == pop and node.is_sample()\n            ]\n        assert total == ts.num_samples\n\n    @pytest.mark.parametrize(\"pop\", [\"string\", \"\", \"0\", np.arange(2), 0.0, 0.5, np.nan])\n    def test_bad_samples(self, pop):\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        with pytest.raises(ValueError, match=\"must be an integer ID\"):\n            ts.samples(population=pop)\n\n    @pytest.mark.parametrize(\"pop\", [0, np.int32(0), np.int64(0), np.uint32(0)])\n    def test_good_samples(self, pop):\n        ts = msprime.sim_ancestry(2)\n        assert np.array_equiv(ts.samples(population=pop), ts.samples())\n\n    @pytest.mark.parametrize(\"time\", [0, 0.1, 1 / 3, 1 / 4, 5 / 7])\n    def test_samples_time(self, time):\n        ts = self.get_tree_sequence(num_demes=2, n=20, times=[time, 0.2, 1, 15])\n        assert np.array_equal(get_samples(ts, time=time), ts.samples(time=time))\n        for population in (None, 0):\n            assert np.array_equal(\n                get_samples(ts, time=time, population=population),\n                ts.samples(time=time, population=population),\n            )\n\n    @pytest.mark.parametrize(\n        \"time_interval\",\n        [\n            [0, 0.1],\n            (0, 1 / 3),\n            np.array([1 / 4, 2 / 3]),\n            (0.345, 5 / 7),\n            (-1, 1),\n        ],\n    )\n    def test_samples_time_interval(self, time_interval):\n        rng = np.random.default_rng(seed=931)\n        times = rng.uniform(low=time_interval[0], high=2 * time_interval[1], size=20)\n        ts = self.get_tree_sequence(num_demes=2, n=1, times=times)\n        assert np.array_equal(\n            get_samples(ts, time=time_interval),\n            ts.samples(time=time_interval),\n        )\n        for population in (None, 0):\n            assert np.array_equal(\n                get_samples(ts, time=time_interval, population=population),\n                ts.samples(time=time_interval, population=population),\n            )\n\n    def test_samples_example(self):\n        tables = tskit.TableCollection(sequence_length=10)\n        time = [0, 0, 1, 1, 1, 3, 3.00001, 3.0 - 0.0001, 1 / 3]\n        pops = [1, 3, 1, 2, 1, 1, 1, 3, 1]\n        for _ in range(max(pops) + 1):\n            tables.populations.add_row()\n        for t, p in zip(time, pops):\n            tables.nodes.add_row(\n                flags=tskit.NODE_IS_SAMPLE,\n                time=t,\n                population=p,\n            )\n        # add not-samples also\n        for t, p in zip(time, pops):\n            tables.nodes.add_row(\n                flags=0,\n                time=t,\n                population=p,\n            )\n        ts = tables.tree_sequence()\n        assert np.array_equal(\n            ts.samples(),\n            np.arange(len(time)),\n        )\n        assert np.array_equal(\n            ts.samples(time=[0, np.inf]),\n            np.arange(len(time)),\n        )\n        assert np.array_equal(\n            ts.samples(time=0),\n            [0, 1],\n        )\n        # default tolerance is 1e-5\n        assert np.array_equal(\n            ts.samples(time=0.3333333),\n            [8],\n        )\n        assert np.array_equal(\n            ts.samples(time=3),\n            [5, 6],\n        )\n        assert np.array_equal(\n            ts.samples(time=1),\n            [2, 3, 4],\n        )\n        assert np.array_equal(\n            ts.samples(time=1, population=2),\n            [3],\n        )\n        assert np.array_equal(\n            ts.samples(population=0),\n            [],\n        )\n        assert np.array_equal(\n            ts.samples(population=1),\n            [0, 2, 4, 5, 6, 8],\n        )\n        assert np.array_equal(\n            ts.samples(population=2),\n            [3],\n        )\n        assert np.array_equal(\n            ts.samples(time=[0, 3]),\n            [0, 1, 2, 3, 4, 7, 8],\n        )\n        # note tuple instead of array\n        assert np.array_equal(\n            ts.samples(time=(1, 3)),\n            [2, 3, 4, 7],\n        )\n        assert np.array_equal(\n            ts.samples(time=[0, 3], population=1),\n            [0, 2, 4, 8],\n        )\n        assert np.array_equal(\n            ts.samples(time=[0.333333, 3]),\n            [2, 3, 4, 7, 8],\n        )\n        assert np.array_equal(\n            ts.samples(time=[100, np.inf]),\n            [],\n        )\n        assert np.array_equal(\n            ts.samples(time=-1),\n            [],\n        )\n        assert np.array_equal(\n            ts.samples(time=[-100, 100]),\n            np.arange(len(time)),\n        )\n        assert np.array_equal(\n            ts.samples(time=[-100, -1]),\n            [],\n        )\n\n    def test_samples_time_errors(self):\n        ts = self.get_tree_sequence(4)\n        # error incorrect types\n        with pytest.raises(ValueError):\n            ts.samples(time=\"s\")\n        with pytest.raises(ValueError):\n            ts.samples(time=[])\n        with pytest.raises(ValueError):\n            ts.samples(time=np.array([1, 2, 3]))\n        with pytest.raises(ValueError):\n            ts.samples(time=(1, 2, 3))\n        # error using min and max switched\n        with pytest.raises(ValueError):\n            ts.samples(time=(2.4, 1))\n\n    def test_samples_args(self, ts_fixture):\n        ts_fixture.samples(1)\n        with pytest.raises(TypeError, match=\"takes from 1 to 2 positional arguments\"):\n            ts_fixture.samples(1, 2)\n\n    def test_genotype_matrix_indexing(self):\n        num_demes = 4\n        ts = self.get_tree_sequence(num_demes)\n        G = ts.genotype_matrix()\n        for d in range(num_demes):\n            samples = ts.samples(population=d)\n            total = 0\n            for tree in ts.trees(tracked_samples=samples):\n                for mutation in tree.mutations():\n                    total += tree.num_tracked_samples(mutation.node)\n            assert total == np.sum(G[:, samples])\n\n    def test_genotype_indexing(self):\n        num_demes = 6\n        ts = self.get_tree_sequence(num_demes)\n        for d in range(num_demes):\n            samples = ts.samples(population=d)\n            total = 0\n            for tree in ts.trees(tracked_samples=samples):\n                for mutation in tree.mutations():\n                    total += tree.num_tracked_samples(mutation.node)\n            other_total = 0\n            for variant in ts.variants():\n                other_total += np.sum(variant.genotypes[samples])\n            assert total == other_total\n\n    def test_pairwise_diversity(self):\n        num_demes = 6\n        ts = self.get_tree_sequence(num_demes)\n        pi1 = ts.pairwise_diversity(ts.samples())\n        pi2 = ts.pairwise_diversity()\n        assert pi1 == pi2\n        for d in range(num_demes):\n            samples = ts.samples(population=d)\n            pi1 = ts.pairwise_diversity(samples)\n            pi2 = ts.pairwise_diversity(list(samples))\n            assert pi1 == pi2\n\n    def test_simplify(self):\n        num_demes = 3\n        ts = self.get_tree_sequence(num_demes)\n        sts = ts.simplify(samples=ts.samples())\n        assert ts.num_samples == sts.num_samples\n        for d in range(num_demes):\n            samples = ts.samples(population=d)\n            sts = ts.simplify(samples=samples)\n            assert sts.num_samples == samples.shape[0]\n\n\nclass TestTreeSequence(HighLevelTestCase):\n    \"\"\"\n    Tests for the tree sequence object.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_row_getter(self, ts):\n        for table_name, table in ts.tables_dict.items():\n            sequence = getattr(ts, table_name)()\n            element_name = table_name[:-1]  # cut off the \"s\": \"edges\" -> \"edge\"\n            element_accessor = getattr(ts, element_name)\n            for i, n in enumerate(sequence):\n                assert element_accessor(i) == n\n                assert element_accessor(-(table.num_rows - i)) == n\n            with pytest.raises(IndexError):\n                element_accessor(table.num_rows)\n            with pytest.raises(IndexError):\n                element_accessor(-(table.num_rows + 1))\n\n    @pytest.mark.parametrize(\"index\", [0.1, float(0), None, np.array([0, 1]), np.inf])\n    def test_bad_row_getter(self, index, simple_degree2_ts_fixture):\n        for table_name in simple_degree2_ts_fixture.tables_dict.keys():\n            element_name = table_name[:-1]  # cut off the \"s\": \"edges\" -> \"edge\"\n            element_accessor = getattr(simple_degree2_ts_fixture, element_name)\n            if element_name == \"site\" and index is None:\n                # special case\n                match = \"id or position must be provided\"\n            else:\n                match = \"integer type\"\n            with pytest.raises(TypeError, match=match):\n                element_accessor(index)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_discrete_genome(self, ts):\n        def is_discrete(a):\n            return np.all(np.floor(a) == a)\n\n        tables = ts.tables\n        discrete_genome = (\n            is_discrete([tables.sequence_length])\n            and is_discrete(tables.edges.left)\n            and is_discrete(tables.edges.right)\n            and is_discrete(tables.sites.position)\n            and is_discrete(tables.migrations.left)\n            and is_discrete(tables.migrations.right)\n        )\n        assert ts.discrete_genome == discrete_genome\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_discrete_time(self, ts):\n        def is_discrete(a):\n            return np.all(np.logical_or(np.floor(a) == a, tskit.is_unknown_time(a)))\n\n        tables = ts.tables\n        discrete_time = (\n            is_discrete(tables.nodes.time)\n            and is_discrete(tables.mutations.time)\n            and is_discrete(tables.migrations.time)\n        )\n        assert ts.discrete_time == discrete_time\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_trees(self, ts):\n        self.verify_trees(ts)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_mutations(self, ts):\n        self.verify_mutations(ts)\n\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0 or higher\")\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_mutation_inherited_state_property(self, ts):\n        inherited_states = ts.mutations_inherited_state\n        for mut in ts.mutations():\n            expected = inherited_states[mut.id]\n            actual = mut.inherited_state\n            assert actual == expected\n\n            if mut.parent == tskit.NULL:\n                expected_direct = ts.site(mut.site).ancestral_state\n            else:\n                expected_direct = ts.mutation(mut.parent).derived_state\n            assert actual == expected_direct\n\n    def verify_pairwise_diversity(self, ts):\n        haplotypes = ts.genotype_matrix(isolated_as_missing=False).T\n        if ts.num_samples == 0:\n            with pytest.raises(ValueError, match=\"at least one element\"):\n                ts.get_pairwise_diversity()\n            return\n        pi1 = ts.get_pairwise_diversity()\n        pi2 = simple_get_pairwise_diversity(haplotypes)\n        assert pi1 == pytest.approx(pi2)\n        assert pi1 >= 0.0\n        assert not math.isnan(pi1)\n        # Check for a subsample.\n        num_samples = ts.get_sample_size() // 2 + 1\n        samples = list(ts.samples())[:num_samples]\n        pi1 = ts.get_pairwise_diversity(samples)\n        pi2 = simple_get_pairwise_diversity([haplotypes[j] for j in range(num_samples)])\n        assert pi1 == pytest.approx(pi2)\n        assert pi1 >= 0.0\n        assert not math.isnan(pi1)\n\n    @pytest.mark.slow\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_pairwise_diversity(self, ts):\n        self.verify_pairwise_diversity(ts)\n\n    @pytest.mark.parametrize(\"order\", [\"abc\", 0, 1, False])\n    def test_bad_node_iteration_order(self, order):\n        ts = tskit.TableCollection(1).tree_sequence()\n        with pytest.raises(ValueError, match=\"order\"):\n            ts.nodes(order=order)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_node_iteration_order(self, ts):\n        order = [n.id for n in ts.nodes()]\n        assert order == list(range(ts.num_nodes))\n        order = [n.id for n in ts.nodes(order=\"id\")]\n        assert order == list(range(ts.num_nodes))\n        order = np.array([n.id for n in ts.nodes(order=\"timeasc\")], dtype=int)\n        assert np.all(ts.nodes_time[order] == np.sort(ts.nodes_time))\n        # Check it conforms to the order of parents in the edge table\n        parent_only_order = order[np.isin(order, ts.edges_parent)]\n        edge_parents = np.concatenate(\n            (ts.edges_parent[:-1][np.diff(ts.edges_parent) != 0], ts.edges_parent[-1:])\n        )\n        assert np.all(parent_only_order == edge_parents)\n\n    def verify_edgesets(self, ts):\n        \"\"\"\n        Verifies that the edgesets we return are equivalent to the original edges.\n        \"\"\"\n        new_edges = []\n        for edgeset in ts.edgesets():\n            assert edgeset.children == sorted(edgeset.children)\n            assert len(edgeset.children) > 0\n            for child in edgeset.children:\n                new_edges.append(\n                    tskit.Edge(edgeset.left, edgeset.right, edgeset.parent, child)\n                )\n        # squash the edges.\n        t = ts.tables.nodes.time\n        new_edges.sort(key=lambda e: (t[e.parent], e.parent, e.child, e.left))\n\n        squashed = []\n        if len(new_edges) > 0:\n            last_e = new_edges[0]\n            for e in new_edges[1:]:\n                condition = (\n                    e.parent != last_e.parent\n                    or e.child != last_e.child\n                    or e.left != last_e.right\n                )\n                if condition:\n                    squashed.append(last_e)\n                    last_e = e\n                last_e.right = e.right\n            squashed.append(last_e)\n            # reset the IDs\n            for i, e in enumerate(squashed):\n                e.id = i\n        edges = list(ts.edges())\n        assert len(squashed) == len(edges)\n        assert edges == squashed\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_edge_ids(self, ts):\n        for index, edge in enumerate(ts.edges()):\n            assert edge.id == index\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_edge_span_property(self, ts):\n        for edge in ts.edges():\n            assert edge.span == edge.right - edge.left\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_edge_interval_property(self, ts):\n        for edge in ts.edges():\n            assert edge.interval == (edge.left, edge.right)\n        if ts.num_trees == 1 and ts.num_edges > 0:\n            for edge in ts.edges():\n                assert edge.interval == ts.first().interval\n\n    def test_edgesets(self):\n        tested = False\n        # We manual loop in this test to test the example tree sequences are working\n        for ts in tsutil.get_example_tree_sequences(pytest_params=False):\n            # Can't get edgesets with metadata\n            if ts.tables.edges.metadata_schema == tskit.MetadataSchema(None):\n                self.verify_edgesets(ts)\n                tested = True\n        assert tested\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_breakpoints(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        assert breakpoints.shape == (ts.num_trees + 1,)\n        other = np.fromiter(iter([0] + [t.interval.right for t in ts.trees()]), float)\n        assert np.array_equal(other, breakpoints)\n        # in case downstream code has\n        for j, x in enumerate(ts.breakpoints()):\n            assert breakpoints[j] == x\n            assert isinstance(x, float)\n        assert j == ts.num_trees\n\n    def verify_coalescence_records(self, ts):\n        \"\"\"\n        Checks that the coalescence records we output are correct.\n        \"\"\"\n        edgesets = list(ts.edgesets())\n        records = list(ts.records())\n        assert len(edgesets) == len(records)\n        for edgeset, record in zip(edgesets, records):\n            assert edgeset.left == record.left\n            assert edgeset.right == record.right\n            assert edgeset.parent == record.node\n            assert edgeset.children == record.children\n            parent = ts.node(edgeset.parent)\n            assert parent.time == record.time\n            assert parent.population == record.population\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_coalescence_records(self, ts):\n        self.verify_coalescence_records(ts)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_compute_mutation_parent(self, ts):\n        tables = ts.dump_tables()\n        before = tables.mutations.parent[:]\n        tables.compute_mutation_parents()\n        parent = ts.tables.mutations.parent\n        assert np.array_equal(parent, before)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_compute_mutation_time(self, ts):\n        tables = ts.dump_tables()\n        python_time = tsutil.compute_mutation_times(ts)\n        tables.compute_mutation_times()\n        assert np.allclose(python_time, tables.mutations.time, rtol=1e-10, atol=1e-10)\n        # Check we have valid times\n        tables.tree_sequence()\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_tracked_samples(self, ts):\n        # Should be empty list by default.\n        for tree in ts.trees():\n            assert tree.num_tracked_samples() == 0\n            for u in tree.nodes():\n                assert tree.num_tracked_samples(u) == 0\n        samples = list(ts.samples())\n        tracked_samples = samples[:2]\n        for tree in ts.trees(tracked_samples=tracked_samples):\n            nu = [0 for j in range(ts.num_nodes)]\n            assert tree.num_tracked_samples() == len(tracked_samples)\n            for j in tracked_samples:\n                u = j\n                while u != tskit.NULL:\n                    nu[u] += 1\n                    u = tree.parent(u)\n            for u, count in enumerate(nu):\n                assert tree.num_tracked_samples(u) == count\n            assert tree.num_tracked_samples(tree.virtual_root) == len(tracked_samples)\n\n    def test_tracked_samples_is_first_arg(self):\n        ts = tskit.Tree.generate_balanced(6).tree_sequence\n        samples = [0, 1, 2]\n        tree = next(ts.trees(samples))\n        assert tree.num_tracked_samples() == 3\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_deprecated_sample_aliases(self, ts):\n        # Ensure that we get the same results from the various combinations\n        # of leaf_lists, sample_lists etc.\n        samples = list(ts.samples())[:2]\n        # tracked leaves/samples\n        trees_new = ts.trees(tracked_samples=samples)\n        trees_old = ts.trees(tracked_leaves=samples)\n        for t_new, t_old in zip(trees_new, trees_old):\n            for u in t_new.nodes():\n                assert t_new.num_tracked_samples(u) == t_old.get_num_tracked_leaves(u)\n        trees_new = ts.trees()\n        trees_old = ts.trees()\n        for t_new, t_old in zip(trees_new, trees_old):\n            for u in t_new.nodes():\n                assert t_new.num_samples(u) == t_old.get_num_leaves(u)\n                assert list(t_new.samples(u)) == list(t_old.get_leaves(u))\n        for on in [True, False]:\n            trees_new = ts.trees(sample_lists=on)\n            trees_old = ts.trees(leaf_lists=on)\n            for t_new, t_old in zip(trees_new, trees_old):\n                for u in t_new.nodes():\n                    assert t_new.num_samples(u) == t_old.get_num_leaves(u)\n                    assert list(t_new.samples(u)) == list(t_old.get_leaves(u))\n\n    def verify_samples(self, ts):\n        # We should get the same list of samples if we use the low-level\n        # sample lists or a simple traversal.\n        samples1 = []\n        for t in ts.trees(sample_lists=False):\n            samples1.append(list(t.samples()))\n        samples2 = []\n        for t in ts.trees(sample_lists=True):\n            samples2.append(list(t.samples()))\n        assert samples1 == samples2\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_samples(self, ts):\n        self.verify_samples(ts)\n        pops = {node.population for node in ts.nodes()}\n        for pop in pops:\n            subsample = ts.samples(pop)\n            assert np.array_equal(subsample, ts.samples(population=pop))\n            assert np.array_equal(subsample, ts.samples(population_id=pop))\n            assert list(subsample) == [\n                node.id\n                for node in ts.nodes()\n                if node.population == pop and node.is_sample()\n            ]\n        with pytest.raises(ValueError):\n            ts.samples(population=0, population_id=0)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_first_last(self, ts):\n        for kwargs in [{}, {\"tracked_samples\": ts.samples()}]:\n            t1 = ts.first(**kwargs)\n            t2 = next(ts.trees())\n            assert t1 is not t2\n            assert t1.parent_dict == t2.parent_dict\n            assert t1.index == 0\n            if \"tracked_samples\" in kwargs:\n                assert t1.num_tracked_samples() == ts.num_samples\n            else:\n                assert t1.num_tracked_samples() == 0\n\n            t1 = ts.last(**kwargs)\n            t2 = next(reversed(ts.trees()))\n            assert t1 is not t2\n            assert t1.parent_dict == t2.parent_dict\n            assert t1.index == ts.num_trees - 1\n            if \"tracked_samples\" in kwargs:\n                assert t1.num_tracked_samples() == ts.num_samples\n            else:\n                assert t1.num_tracked_samples() == 0\n\n    def test_trees_interface(self):\n        # Use a tree sequence guaranteed to have node 0 as the first sample node\n        ts = tskit.Tree.generate_balanced(10).tree_sequence\n        for t in ts.trees():\n            assert t.get_num_samples(0) == 1\n            assert t.get_num_tracked_samples(0) == 0\n            assert list(t.samples(0)) == [0]\n            assert t.tree_sequence is ts\n\n        for t in ts.trees(tracked_samples=[0]):\n            assert t.get_num_samples(0) == 1\n            assert t.get_num_tracked_samples(0) == 1\n            assert list(t.samples(0)) == [0]\n\n        for t in ts.trees(sample_lists=True):\n            assert t.get_num_samples(0) == 1\n            assert t.get_num_tracked_samples(0) == 0\n            assert list(t.samples(0)) == [0]\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_get_pairwise_diversity(self, ts):\n        with pytest.raises(ValueError, match=\"at least one element\"):\n            ts.get_pairwise_diversity([])\n        samples = list(ts.samples())\n        if len(samples) == 0:\n            with pytest.raises(\n                ValueError, match=\"Sample sets must contain at least one element\"\n            ):\n                ts.get_pairwise_diversity()\n        else:\n            assert ts.get_pairwise_diversity() == ts.get_pairwise_diversity(samples)\n            assert ts.get_pairwise_diversity(samples[:2]) == ts.get_pairwise_diversity(\n                list(reversed(samples[:2]))\n            )\n\n    def test_populations(self):\n        more_than_zero = False\n        for ts in tsutil.get_example_tree_sequences(pytest_params=False):\n            N = ts.num_populations\n            if N > 0:\n                more_than_zero = True\n            pops = list(ts.populations())\n            assert len(pops) == N\n            for j in range(N):\n                assert pops[j] == ts.population(j)\n                assert pops[j].id == j\n        assert more_than_zero\n\n    def test_individuals(self):\n        more_than_zero = False\n        mapped_to_nodes = False\n        for ts in tsutil.get_example_tree_sequences(pytest_params=False):\n            ind_node_map = collections.defaultdict(list)\n            for node in ts.nodes():\n                if node.individual != tskit.NULL:\n                    ind_node_map[node.individual].append(node.id)\n            if len(ind_node_map) > 0:\n                mapped_to_nodes = True\n            N = ts.num_individuals\n            if N > 0:\n                more_than_zero = True\n            inds = list(ts.individuals())\n            assert len(inds) == N\n            for j in range(N):\n                assert inds[j] == ts.individual(j)\n                assert inds[j].id == j\n                assert isinstance(inds[j].parents, np.ndarray)\n                assert isinstance(inds[j].location, np.ndarray)\n                assert isinstance(inds[j].nodes, np.ndarray)\n                assert ind_node_map[j] == list(inds[j].nodes)\n\n        assert more_than_zero\n        assert mapped_to_nodes\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_get_population(self, ts):\n        # Deprecated interface for ts.node(id).population\n        N = ts.get_num_nodes()\n        with pytest.raises(ValueError):\n            ts.get_population(-1)\n        with pytest.raises(ValueError):\n            ts.get_population(N)\n        with pytest.raises(ValueError):\n            ts.get_population(N + 1)\n        for node in range(N):\n            assert ts.get_population(node) == ts.node(node).population\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_get_time(self, ts):\n        # Deprecated interface for ts.node(id).time\n        N = ts.get_num_nodes()\n        with pytest.raises(ValueError):\n            ts.get_time(-1)\n        with pytest.raises(ValueError):\n            ts.get_time(N)\n        with pytest.raises(ValueError):\n            ts.get_time(N + 1)\n        for u in range(N):\n            assert ts.get_time(u) == ts.node(u).time\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_max_root_time(self, ts):\n        oldest = None\n        for tree in ts.trees():\n            for root in tree.roots:\n                oldest = (\n                    tree.time(root) if oldest is None else max(oldest, tree.time(root))\n                )\n        if oldest is None:\n            assert pytest.raises(ValueError, match=\"max()\")\n        else:\n            assert oldest == ts.max_root_time\n\n    def test_max_root_time_corner_cases(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=2)\n        tables.nodes.add_row(flags=0, time=3)\n        assert tables.tree_sequence().max_root_time == 2\n        tables.edges.add_row(0, 1, 1, 0)\n        assert tables.tree_sequence().max_root_time == 2\n        tables.edges.add_row(0, 1, 3, 1)\n        assert tables.tree_sequence().max_root_time == 3\n\n    def test_subset_reverse_all_nodes(self):\n        ts = tskit.Tree.generate_comb(5).tree_sequence\n        assert np.all(ts.samples() == np.arange(ts.num_samples))\n        flipped_ids = np.flip(np.arange(ts.num_nodes))\n        new_ts = ts.subset(flipped_ids)\n        assert set(new_ts.samples()) == set(flipped_ids[np.arange(ts.num_samples)])\n        r1 = ts.first().rank()\n        r2 = new_ts.first().rank()\n        assert r1.shape == r2.shape\n        assert r1.label != r2.label\n\n    def test_subset_reverse_internal_nodes(self):\n        ts = tskit.Tree.generate_balanced(5).tree_sequence\n        internal_nodes = np.ones(ts.num_nodes, dtype=bool)\n        internal_nodes[ts.samples()] = False\n        node_ids = np.arange(ts.num_nodes)\n        node_ids[internal_nodes] = np.flip(node_ids[internal_nodes])\n        new_ts = ts.subset(node_ids)\n        assert np.any(new_ts.nodes_time != ts.nodes_time)\n        assert new_ts.first().rank() == ts.first().rank()\n\n    def test_deprecated_apis(self):\n        ts = msprime.simulate(10, random_seed=1)\n        assert ts.get_ll_tree_sequence() == ts.ll_tree_sequence\n        assert ts.get_sample_size() == ts.sample_size\n        assert ts.get_sample_size() == ts.num_samples\n        assert ts.get_sequence_length() == ts.sequence_length\n        assert ts.get_num_trees() == ts.num_trees\n        assert ts.get_num_mutations() == ts.num_mutations\n        assert ts.get_num_nodes() == ts.num_nodes\n        assert ts.get_pairwise_diversity() == ts.pairwise_diversity()\n        samples = ts.samples()\n        assert ts.get_pairwise_diversity(samples) == ts.pairwise_diversity(samples)\n        assert np.array_equal(ts.get_samples(), ts.samples())\n\n    def test_sites(self):\n        some_sites = False\n        for ts in tsutil.get_example_tree_sequences(pytest_params=False):\n            tables = ts.dump_tables()\n            sites = tables.sites\n            mutations = tables.mutations\n            assert ts.num_sites == len(sites)\n            assert ts.num_mutations == len(mutations)\n            previous_pos = -1\n            mutation_index = 0\n            ancestral_state = tskit.unpack_strings(\n                sites.ancestral_state, sites.ancestral_state_offset\n            )\n            derived_state = tskit.unpack_strings(\n                mutations.derived_state, mutations.derived_state_offset\n            )\n\n            for index, site in enumerate(ts.sites()):\n                s2 = ts.site(site.id)\n                assert s2 == site\n                s3 = ts.site(position=site.position)\n                assert s3 == site\n                assert site.position == sites.position[index]\n                assert site.position > previous_pos\n                previous_pos = site.position\n                assert ancestral_state[index] == site.ancestral_state\n                assert site.id == index\n                for mutation in site.mutations:\n                    m2 = ts.mutation(mutation.id)\n                    assert m2 == mutation\n                    assert mutation.site == site.id\n                    assert mutation.site == mutations.site[mutation_index]\n                    assert mutation.node == mutations.node[mutation_index]\n                    assert mutation.parent == mutations.parent[mutation_index]\n                    assert mutation.id == mutation_index\n                    assert derived_state[mutation_index] == mutation.derived_state\n                    mutation_index += 1\n                some_sites = True\n            total_sites = 0\n            for tree in ts.trees():\n                assert len(list(tree.sites())) == tree.num_sites\n                total_sites += tree.num_sites\n            assert ts.num_sites == total_sites\n            assert mutation_index == len(mutations)\n        assert some_sites\n\n    def verify_mutations(self, ts):\n        other_mutations = []\n        for site in ts.sites():\n            for mutation in site.mutations:\n                other_mutations.append(mutation)\n        mutations = list(ts.mutations())\n        assert ts.num_mutations == len(other_mutations)\n        assert ts.num_mutations == len(mutations)\n        for mut, other_mut in zip(mutations, other_mutations):\n            assert mut == other_mut\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_sites_mutations(self, ts):\n        # Check that the mutations iterator returns the correct values.\n        self.verify_mutations(ts)\n\n    def test_removed_methods(self):\n        ts = tskit.TableCollection(1).tree_sequence()\n        with pytest.raises(NotImplementedError):\n            ts.get_num_records()\n        with pytest.raises(NotImplementedError):\n            ts.diffs()\n        with pytest.raises(NotImplementedError):\n            ts.newick_trees()\n        with pytest.raises(NotImplementedError):\n            ts.to_nexus()\n\n    def test_dump_pathlib(self, ts_fixture, tmp_path):\n        path = tmp_path / \"tmp.trees\"\n        assert path.exists\n        assert path.is_file\n        ts_fixture.dump(path)\n        other_ts = tskit.load(path)\n        assert ts_fixture.tables == other_ts.tables\n\n    @pytest.mark.skipif(platform.system() == \"Windows\", reason=\"Windows doesn't raise\")\n    def test_dump_load_errors(self):\n        ts = msprime.simulate(5, random_seed=1)\n        # Try to dump/load files we don't have access to or don't exist.\n        for func in [ts.dump, tskit.load]:\n            for f in [\"/\", \"/test.trees\", \"/dir_does_not_exist/x.trees\"]:\n                with pytest.raises(OSError):\n                    func(f)\n                try:\n                    func(f)\n                except OSError as e:\n                    message = str(e)\n                    assert len(message) > 0\n            f = \"/\" + 4000 * \"x\"\n            with pytest.raises(OSError):\n                func(f)\n            try:\n                func(f)\n            except OSError as e:\n                message = str(e)\n            assert \"File name too long\" in message\n            for bad_filename in [[], None, {}]:\n                with pytest.raises(TypeError):\n                    func(bad_filename)\n\n    def test_zlib_compression_warning(self, ts_fixture, tmp_path):\n        temp_file = tmp_path / \"tmp.trees\"\n        with warnings.catch_warnings(record=True) as w:\n            ts_fixture.dump(temp_file, zlib_compression=True)\n            assert len(w) == 1\n            assert issubclass(w[0].category, RuntimeWarning)\n        with warnings.catch_warnings(record=True) as w:\n            ts_fixture.dump(temp_file, zlib_compression=False)\n            assert len(w) == 0\n\n    def test_tables_sequence_length_round_trip(self):\n        for sequence_length in [0.1, 1, 10, 100]:\n            ts = msprime.simulate(5, length=sequence_length, random_seed=1)\n            assert ts.sequence_length == sequence_length\n            tables = ts.tables\n            assert tables.sequence_length == sequence_length\n            new_ts = tables.tree_sequence()\n            assert new_ts.sequence_length == sequence_length\n\n    def test_migrations(self):\n        ts = msprime.simulate(\n            population_configurations=[\n                msprime.PopulationConfiguration(10),\n                msprime.PopulationConfiguration(10),\n            ],\n            migration_matrix=[[0, 1], [1, 0]],\n            random_seed=2,\n            record_migrations=True,\n        )\n        assert ts.num_migrations > 0\n        migrations = list(ts.migrations())\n        assert len(migrations) == ts.num_migrations\n        for migration in migrations:\n            assert migration.source in [0, 1]\n            assert migration.dest in [0, 1]\n            assert migration.time > 0\n            assert migration.left == 0\n            assert migration.right == 1\n            assert 0 <= migration.node < ts.num_nodes\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_len_trees(self, ts):\n        tree_iter = ts.trees()\n        assert len(tree_iter) == ts.num_trees\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_list(self, ts):\n        for kwargs in [{}, {\"tracked_samples\": ts.samples()}]:\n            tree_list = ts.aslist(**kwargs)\n            assert len(tree_list) == ts.num_trees\n            assert len(set(map(id, tree_list))) == ts.num_trees\n            for index, tree in enumerate(tree_list):\n                assert index == tree.index\n            for t1, t2 in zip(tree_list, ts.trees(**kwargs)):\n                assert t1 == t2\n                assert t1.parent_dict == t2.parent_dict\n                if \"tracked_samples\" in kwargs:\n                    assert t1.num_tracked_samples() == ts.num_samples\n                    assert t2.num_tracked_samples() == ts.num_samples\n                else:\n                    assert t1.num_tracked_samples() == 0\n                    assert t2.num_tracked_samples() == 0\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_reversed_trees(self, ts):\n        index = ts.num_trees - 1\n        tree_list = ts.aslist()\n        for tree in reversed(ts.trees()):\n            assert tree.index == index\n            t2 = tree_list[index]\n            assert tree.interval == t2.interval\n            assert tree.parent_dict == t2.parent_dict\n            index -= 1\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_at_index(self, ts):\n        for kwargs in [{}, {\"tracked_samples\": ts.samples()}]:\n            tree_list = ts.aslist(**kwargs)\n            for index in list(range(ts.num_trees)) + [-1]:\n                t1 = tree_list[index]\n                t2 = ts.at_index(index, **kwargs)\n                assert t1 == t2\n                assert t1.interval == t2.interval\n                assert t1.parent_dict == t2.parent_dict\n                if \"tracked_samples\" in kwargs:\n                    assert t2.num_tracked_samples() == ts.num_samples\n                else:\n                    assert t2.num_tracked_samples() == 0\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_at(self, ts):\n        for kwargs in [{}, {\"tracked_samples\": ts.samples()}]:\n            tree_list = ts.aslist(**kwargs)\n            for t1 in tree_list:\n                left, right = t1.interval\n                mid = left + (right - left) / 2\n                for pos in [left, left + 1e-9, mid, right - 1e-9]:\n                    t2 = ts.at(pos, **kwargs)\n                    assert t1 == t2\n                    assert t1.interval == t2.interval\n                    assert t1.parent_dict == t2.parent_dict\n                if right < ts.sequence_length:\n                    t2 = ts.at(right, **kwargs)\n                    t3 = tree_list[t1.index + 1]\n                    assert t3 == t2\n                    assert t3.interval == t2.interval\n                    assert t3.parent_dict == t2.parent_dict\n                if \"tracked_samples\" in kwargs:\n                    assert t2.num_tracked_samples() == ts.num_samples\n                else:\n                    assert t2.num_tracked_samples() == 0\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_sequence_iteration(self, ts):\n        for table_name in ts.tables_dict.keys():\n            sequence = getattr(ts, table_name)()\n            length = getattr(ts, \"num_\" + table_name)\n            # Test __iter__\n            i = None\n            for i, n in enumerate(sequence):\n                assert i == n.id\n            if i is not None:\n                assert n.id == (length - 1 if length else 0)\n            if table_name == \"mutations\":\n                # Mutations are not currently sequences, so have no len or idx access\n                with pytest.raises(TypeError):\n                    len(sequence)\n                if length != 0:\n                    with pytest.raises(TypeError):\n                        sequence[0]\n            else:\n                # Test __len__\n                assert len(sequence) == length\n                # Test __getitem__ on the last item in the sequence\n                if length != 0:\n                    assert sequence[length - 1] == n  # +ive indexing\n                    assert sequence[-1] == n  # -ive indexing\n                with pytest.raises(IndexError):\n                    sequence[length]\n                # Test reverse\n                i = None\n                for i, n in enumerate(reversed(sequence)):\n                    assert i == length - 1 - n.id\n                if i is not None:\n                    assert n.id == 0\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_load_tables(self, ts):\n        tables = ts.dump_tables()\n        tables.drop_index()\n\n        # Tables not in tc not rebuilt as per default, so error\n        with pytest.raises(\n            _tskit.LibraryError, match=\"Table collection must be indexed\"\n        ):\n            assert tskit.TreeSequence.load_tables(tables).tables.has_index()\n\n        # Tables not in tc, but rebuilt\n        assert tskit.TreeSequence.load_tables(\n            tables, build_indexes=True\n        ).tables.has_index()\n\n        tables.build_index()\n        # Tables in tc, not rebuilt\n        assert tskit.TreeSequence.load_tables(\n            tables, build_indexes=False\n        ).tables.has_index()\n        # Tables in tc, and rebuilt\n        assert tskit.TreeSequence.load_tables(tables).tables.has_index()\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_html_repr(self, ts):\n        html = ts._repr_html_()\n        # Parse to check valid\n        ElementTree.fromstring(html)\n        assert len(html) > 5000\n        assert f\"<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>\" in html\n        assert f\"<tr><td>Time Units</td><td>{ts.time_units}</td></tr>\" in html\n        for table in ts.tables.table_name_map:\n            assert f\"<td>{table.capitalize()}</td>\" in html\n        if ts.num_provenances > 0:\n            assert (\n                f\"<td>{json.loads(ts.provenance(0).record)['software']['name']}</td>\"\n                in html\n            )\n\n    def test_bad_provenance(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        tables.provenances.add_row(\"bad\", \"bad\")\n        ts = tables.tree_sequence()\n        assert \"Could not parse provenance\" in ts._repr_html_()\n\n    def test_provenance_summary_html(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        for _ in range(20):\n            # Add a row with isotimestamp\n            tables.provenances.add_row(\"foo\", \"bar\")\n        assert \"... 15 more\" in tables.tree_sequence()._repr_html_()\n\n    def test_html_repr_limit(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        d = {n: n for n in range(50)}\n        d[0] = \"N\" * 200\n        tables.metadata = d\n        ts = tables.tree_sequence()\n        assert \"... and 20 more\" in ts._repr_html_()\n        assert \"NN...\" in ts._repr_html_()\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_str(self, ts):\n        s = str(ts)\n        assert len(s) > 999\n        assert re.search(rf\"║Trees *│ *{ts.num_trees}║\", s)\n        assert re.search(rf\"║Time Units *│ *{ts.time_units}║\", s)\n        for table in ts.tables.table_name_map:\n            assert re.search(rf\"║{table.capitalize()} *│\", s)\n\n    @pytest.mark.skip(\"FIXME nbytes\")\n    def test_nbytes(self, tmp_path, ts_fixture):\n        ts_fixture.dump(tmp_path / \"tables\")\n        store = kastore.load(tmp_path / \"tables\")\n        for v in store.values():\n            # Check we really have data in every field\n            assert v.nbytes > 0\n        nbytes = sum(\n            array.nbytes\n            for name, array in store.items()\n            # nbytes is the size of asdict, so exclude file format items\n            if name not in [\"format/version\", \"format/name\", \"uuid\"]\n        )\n        assert nbytes == ts_fixture.nbytes\n\n    def test_equals(self):\n        # Here we don't use the fixture as we'd like to run the same sim twice\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 1], [1, 0]]\n        t1 = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        )\n        t2 = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        )\n\n        assert t1 == t1\n        assert t1 == t1.dump_tables().tree_sequence()\n        assert t1.dump_tables().tree_sequence() == t1\n\n        # The provenances may or may not be equal depending on the clock\n        # precision for record. So clear them first.\n        tb1 = t1.dump_tables()\n        tb2 = t2.dump_tables()\n        tb1.provenances.clear()\n        tb2.provenances.clear()\n        t1 = tb1.tree_sequence()\n        t2 = tb2.tree_sequence()\n\n        assert t1 == t2\n        assert t1 == t2\n        assert not (t1 != t2)\n        # We don't do more as this is the same code path as TableCollection.__eq__\n\n    def test_equals_options(self, ts_fixture):\n        t1 = ts_fixture\n        # Take a copy\n        t2 = ts_fixture.dump_tables().tree_sequence()\n\n        def modify(ts, func):\n            tc = ts.dump_tables()\n            func(tc)\n            return tc.tree_sequence()\n\n        t1 = modify(t1, lambda tc: tc.provenances.add_row(\"random stuff\"))\n        assert not (t1 == t2)\n        assert t1.equals(t2, ignore_provenance=True)\n        assert t2.equals(t1, ignore_provenance=True)\n        assert not (t1.equals(t2))\n        assert not (t2.equals(t1))\n        t1 = modify(t1, lambda tc: tc.provenances.clear())\n        t2 = modify(t2, lambda tc: tc.provenances.clear())\n        assert t1.equals(t2)\n        assert t2.equals(t1)\n\n        tc = t1.dump_tables()\n        tc.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\", \"type\": \"object\"})\n        t1 = tc.tree_sequence()\n        tc = t1.dump_tables()\n        tc.metadata = {\"hello\": \"world\"}\n        t1 = tc.tree_sequence()\n\n        assert not t1.equals(t2)\n        assert t1.equals(t2, ignore_ts_metadata=True)\n        assert not t2.equals(t1)\n        assert t2.equals(t1, ignore_ts_metadata=True)\n        tc = t2.dump_tables()\n        tc.metadata_schema = t1.metadata_schema\n        t2 = tc.tree_sequence()\n        assert not t1.equals(t2)\n        assert t1.equals(t2, ignore_ts_metadata=True)\n        assert not t2.equals(t1)\n        assert t2.equals(t1, ignore_ts_metadata=True)\n\n        t1 = modify(t1, lambda tc: tc.provenances.add_row(\"random stuff\"))\n        assert not t1.equals(t2)\n        assert not t1.equals(t2, ignore_ts_metadata=True)\n        assert not t1.equals(t2, ignore_provenance=True)\n        assert t1.equals(t2, ignore_ts_metadata=True, ignore_provenance=True)\n\n        t1 = modify(t1, lambda tc: tc.provenances.clear())\n        t2 = modify(t2, lambda tc: setattr(tc, \"metadata\", t1.metadata))  # noqa: B010\n        assert t1.equals(t2)\n        assert t2.equals(t1)\n\n        # Empty out tables to test ignore_tables flag\n        tc = t2.dump_tables()\n        tc.individuals.truncate(0)\n        tc.nodes.truncate(0)\n        tc.edges.truncate(0)\n        tc.migrations.truncate(0)\n        tc.sites.truncate(0)\n        tc.mutations.truncate(0)\n        tc.populations.truncate(0)\n        t2 = tc.tree_sequence()\n        assert not t1.equals(t2)\n        assert t1.equals(t2, ignore_tables=True)\n        # Empty out reference to test ignore_reference_sequence flag\n        tc = t1.dump_tables()\n        tc.reference_sequence.clear()\n        t2 = tc.tree_sequence()\n        assert not t1.equals(t2)\n        assert t1.equals(t2, ignore_reference_sequence=True)\n        # Make t1 and t2 equal again\n        t2 = t1.dump_tables().tree_sequence()\n        assert t1.equals(t2)\n        assert t2.equals(t1)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_tree_node_edges(self, ts):\n        edge_visited = np.zeros(ts.num_edges, dtype=bool)\n        for tree in ts.trees():\n            mapping = tree.edge_array\n            node_mapped = mapping >= 0\n            edge_visited[mapping[node_mapped]] = True\n            # Note that tree.nodes() does not necessarily list all the nodes\n            # in the tree topology, only the ones that descend from a root.\n            # Therefore if not all the topological trees in a single `Tree` have\n            # a root, we can have edges above nodes that are not listed. This\n            # happens, for example, in a tree with no sample nodes.\n            assert np.sum(node_mapped) >= len(list(tree.nodes())) - tree.num_roots\n            for u in tree.nodes():\n                if tree.parent(u) == tskit.NULL:\n                    assert mapping[u] == tskit.NULL\n                else:\n                    edge = ts.edge(mapping[u])\n                    assert edge.child == u\n                    assert edge.left <= tree.interval.left\n                    assert edge.right >= tree.interval.right\n        assert np.all(edge_visited)\n\n    def verify_individual_vectors(self, ts):\n        verify_times = np.repeat(np.nan, ts.num_individuals)\n        verify_populations = np.repeat(tskit.NULL, ts.num_individuals)\n        for ind in ts.individuals():\n            if len(ind.nodes) > 0:\n                t = {ts.node(n).time for n in ind.nodes}\n                p = {ts.node(n).population for n in ind.nodes}\n                assert len(t) <= 1\n                assert len(p) <= 1\n                verify_times[ind.id] = t.pop()\n                verify_populations[ind.id] = p.pop()\n\n        times = ts.individuals_time\n        populations = ts.individuals_population\n        assert np.array_equal(times, verify_times, equal_nan=True)\n        assert np.array_equal(populations, verify_populations, equal_nan=True)\n        times2 = ts.individuals_time\n        populations2 = ts.individuals_population\n        assert np.array_equal(times, times2, equal_nan=True)\n        assert np.array_equal(populations, populations2, equal_nan=True)\n        # check aliases also\n        times3 = ts.individual_times\n        populations3 = ts.individual_populations\n        assert np.array_equal(times, times3, equal_nan=True)\n        assert np.array_equal(populations, populations3, equal_nan=True)\n\n    def test_individuals_population_errors(self):\n        t = tskit.TableCollection(sequence_length=1)\n        t.individuals.add_row()\n        t.individuals.add_row()\n        for j in range(2):\n            t.populations.add_row()\n            t.nodes.add_row(time=0, population=j, individual=0)\n        ts = t.tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH\"\n        ):\n            _ = ts.individuals_population\n        # inconsistent but NULL populations are also an error\n        t.nodes.clear()\n        t.nodes.add_row(time=0, population=1, individual=0)\n        t.nodes.add_row(time=0, population=tskit.NULL, individual=0)\n        ts = t.tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH\"\n        ):\n            _ = ts.individuals_population\n        t.nodes.clear()\n        t.nodes.add_row(time=0, population=tskit.NULL, individual=1)\n        t.nodes.add_row(time=0, population=0, individual=1)\n        ts = t.tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH\"\n        ):\n            _ = ts.individuals_population\n\n    def test_individuals_time_errors(self):\n        t = tskit.TableCollection(sequence_length=1)\n        t.individuals.add_row()\n        for j in range(2):\n            t.nodes.add_row(time=j, individual=0)\n        ts = t.tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INDIVIDUAL_TIME_MISMATCH\"\n        ):\n            _ = ts.individuals_time\n\n    @pytest.mark.parametrize(\"n\", [1, 10])\n    def test_individual_vectors(self, n):\n        d = msprime.Demography.island_model([10] * n, 0.1)\n        ts = msprime.sim_ancestry(\n            {pop.name: 10 for pop in d.populations},\n            demography=d,\n            random_seed=100 + n,\n            model=\"dtwf\",\n        )\n        ts = tsutil.insert_random_consistent_individuals(ts, seed=100 + n)\n        assert ts.num_individuals > 10\n        self.verify_individual_vectors(ts)\n\n    def test_individuals_location_errors(self):\n        t = tskit.TableCollection(sequence_length=1)\n        t.individuals.add_row(location=[1.0, 2.0])\n        t.individuals.add_row(location=[0.0])\n        ts = t.tree_sequence()\n        with pytest.raises(ValueError, match=\"locations\"):\n            _ = ts.individuals_location\n\n        t.clear()\n        t.individuals.add_row(location=[1.0, 2.0])\n        t.individuals.add_row(location=[])\n        t.individuals.add_row(location=[1.0, 2.0])\n        t.individuals.add_row(location=[])\n        ts = t.tree_sequence()\n        with pytest.raises(ValueError, match=\"locations\"):\n            _ = ts.individuals_location\n\n    @pytest.mark.parametrize(\"nlocs\", [0, 1, 4])\n    @pytest.mark.parametrize(\"num_indivs\", [0, 3])\n    def test_individuals_location(self, nlocs, num_indivs):\n        t = tskit.TableCollection(sequence_length=1)\n        locs = np.array([j + np.arange(nlocs) for j in range(num_indivs)])\n        if len(locs) == 0:\n            locs = locs.reshape((num_indivs, 0))\n        for j in range(num_indivs):\n            t.individuals.add_row(location=locs[j])\n        ts = t.tree_sequence()\n        ts_locs = ts.individuals_location\n        assert locs.shape == ts_locs.shape\n        assert np.array_equal(locs, ts_locs)\n        locs2 = ts.individuals_location\n        assert np.array_equal(ts_locs, locs2)\n        # test alias\n        locs3 = ts.individual_locations\n        assert np.array_equal(ts_locs, locs3)\n\n    def verify_individual_properties(self, ts):\n        for ind in ts.individuals():\n            times = [ts.node(n).time for n in ind.nodes]\n            if len(set(times)) > 1:\n                with pytest.raises(ValueError, match=\"mis-matched times\"):\n                    _ = ind.time\n            elif len(times) == 0:\n                assert tskit.is_unknown_time(ind.time)\n            else:\n                assert len(set(times)) == 1\n                assert times[0] == ind.time\n                # test accessing more than once in case we mess up with {}.pop()\n                assert times[0] == ind.time\n            pops = [ts.node(n).population for n in ind.nodes]\n            if len(set(pops)) > 1:\n                with pytest.raises(ValueError, match=\"mis-matched populations\"):\n                    _ = ind.population\n            elif len(pops) == 0:\n                assert ind.population is tskit.NULL\n            else:\n                assert len(set(pops)) == 1\n                assert ind.population == pops[0]\n                # test accessing more than once in case we mess up with {}.pop()\n                assert ind.population == pops[0]\n\n    def test_individual_getter_population(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        for _ in range(2):\n            tables.populations.add_row()\n        pop_list = [\n            ((), tskit.NULL),\n            ((tskit.NULL,), tskit.NULL),\n            ((1,), 1),\n            ((1, 1, 1), 1),\n            ((tskit.NULL, 1), \"ERR\"),\n            ((0, tskit.NULL), \"ERR\"),\n            ((0, 1), \"ERR\"),\n        ]\n        for pops, _ in pop_list:\n            j = tables.individuals.add_row()\n            for p in pops:\n                tables.nodes.add_row(time=0, population=p, individual=j)\n        ts = tables.tree_sequence()\n        for ind, (_, p) in zip(ts.individuals(), pop_list):\n            if p == \"ERR\":\n                with pytest.raises(ValueError, match=\"mis-matched populations\"):\n                    _ = ind.population\n            else:\n                assert p == ind.population\n\n    def test_individual_getter_time(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        time_list = [\n            ((), tskit.UNKNOWN_TIME),\n            ((0.0,), 0.0),\n            ((1, 1, 1), 1),\n            ((4.0, 1), \"ERR\"),\n            ((0, 4.0), \"ERR\"),\n        ]\n        for times, _ in time_list:\n            j = tables.individuals.add_row()\n            for t in times:\n                tables.nodes.add_row(time=t, individual=j)\n        ts = tables.tree_sequence()\n        for ind, (_, t) in zip(ts.individuals(), time_list):\n            if t == \"ERR\":\n                with pytest.raises(ValueError, match=\"mis-matched times\"):\n                    _ = ind.time\n            elif tskit.is_unknown_time(t):\n                assert tskit.is_unknown_time(ind.time)\n            else:\n                assert t == ind.time\n\n    @pytest.mark.parametrize(\"n\", [1, 10])\n    def test_individual_properties(self, n):\n        # tests for the .time and .population attributes of\n        # the Individual class\n        d = msprime.Demography.island_model([10] * n, 0.1)\n        ts = msprime.sim_ancestry(\n            {pop.name: int(150 / n) for pop in d.populations},\n            demography=d,\n            random_seed=100 + n,\n            model=\"dtwf\",\n        )\n        ts = tsutil.insert_random_consistent_individuals(ts, seed=100 + n)\n        assert ts.num_individuals > 10\n        self.verify_individual_properties(ts)\n        ts = tsutil.insert_random_ploidy_individuals(ts, seed=100 + n)\n        assert ts.num_individuals > 10\n        self.verify_individual_properties(ts)\n\n    @pytest.mark.parametrize(\n        \"array\",\n        [\n            \"individuals_flags\",\n            \"nodes_time\",\n            \"nodes_flags\",\n            \"nodes_population\",\n            \"nodes_individual\",\n            \"edges_left\",\n            \"edges_right\",\n            \"edges_parent\",\n            \"edges_child\",\n            \"sites_position\",\n            \"mutations_site\",\n            \"mutations_node\",\n            \"mutations_parent\",\n            \"mutations_time\",\n            \"migrations_left\",\n            \"migrations_right\",\n            \"migrations_node\",\n            \"migrations_source\",\n            \"migrations_dest\",\n            \"migrations_time\",\n            \"indexes_edge_insertion_order\",\n            \"indexes_edge_removal_order\",\n        ],\n    )\n    def test_array_attr_properties(self, ts_fixture, array):\n        ts = ts_fixture\n        a = getattr(ts, array)\n        assert isinstance(a, np.ndarray)\n        with pytest.raises(AttributeError):\n            setattr(ts, array, None)\n        with pytest.raises(AttributeError):\n            delattr(ts, array)\n        with pytest.raises(ValueError, match=\"read-only\"):\n            a[:] = 1\n\n    def test_arrays_equal_to_tables(self, ts_fixture):\n        ts = ts_fixture\n        tables = ts.tables\n\n        assert_array_equal(ts.individuals_flags, tables.individuals.flags)\n\n        assert_array_equal(ts.nodes_flags, tables.nodes.flags)\n        assert_array_equal(ts.nodes_population, tables.nodes.population)\n        assert_array_equal(ts.nodes_time, tables.nodes.time)\n        assert_array_equal(ts.nodes_individual, tables.nodes.individual)\n\n        assert_array_equal(ts.edges_left, tables.edges.left)\n        assert_array_equal(ts.edges_right, tables.edges.right)\n        assert_array_equal(ts.edges_parent, tables.edges.parent)\n        assert_array_equal(ts.edges_child, tables.edges.child)\n\n        assert_array_equal(ts.sites_position, tables.sites.position)\n\n        assert_array_equal(ts.mutations_site, tables.mutations.site)\n        assert_array_equal(ts.mutations_node, tables.mutations.node)\n        assert_array_equal(ts.mutations_parent, tables.mutations.parent)\n        assert_array_equal(ts.mutations_time, tables.mutations.time)\n\n        assert_array_equal(ts.migrations_left, tables.migrations.left)\n        assert_array_equal(ts.migrations_right, tables.migrations.right)\n        assert_array_equal(ts.migrations_node, tables.migrations.node)\n        assert_array_equal(ts.migrations_source, tables.migrations.source)\n        assert_array_equal(ts.migrations_dest, tables.migrations.dest)\n        assert_array_equal(ts.migrations_time, tables.migrations.time)\n\n        assert_array_equal(\n            ts.indexes_edge_insertion_order, tables.indexes.edge_insertion_order\n        )\n        assert_array_equal(\n            ts.indexes_edge_removal_order, tables.indexes.edge_removal_order\n        )\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_impute_unknown_mutations_time(self, ts):\n        # Tests for method='min'\n        imputed_time = ts.impute_unknown_mutations_time(method=\"min\")\n        mutations = ts.tables.mutations\n        nodes_time = ts.nodes_time\n        table_time = np.zeros(len(mutations))\n\n        for mut_idx, mut in enumerate(mutations):\n            if tskit.is_unknown_time(mut.time):\n                node_time = nodes_time[mut.node]\n                table_time[mut_idx] = node_time\n            else:\n                table_time[mut_idx] = mut.time\n\n        assert np.allclose(imputed_time, table_time, rtol=1e-10, atol=1e-10)\n\n        # Check we have valid times\n        tables = ts.dump_tables()\n        tables.mutations.time = imputed_time\n        tables.sort()\n        tables.tree_sequence()\n\n        # Test for unallowed methods\n        with pytest.raises(\n            ValueError, match=\"Mutations time imputation method must be chosen\"\n        ):\n            ts.impute_unknown_mutations_time(method=\"foobar\")\n\n    @pytest.mark.parametrize(\n        \"mutations, error\",\n        [\n            ([], None),\n            (\n                [{\"node\": 0, \"parent\": -1}, {\"node\": 1, \"parent\": -1}],\n                None,\n            ),  # On parallel branches, no parents\n            (\n                [\n                    {\"node\": 4, \"parent\": -1},\n                    {\"node\": 0, \"parent\": 0},\n                    {\"node\": 1, \"parent\": 0},\n                ],\n                None,\n            ),  # On parallel branches, legal parent\n            (\n                [{\"node\": 0, \"parent\": -1}, {\"node\": 0, \"parent\": 0}],\n                None,\n            ),  # On same node\n            (\n                [{\"node\": 0, \"parent\": -1}, {\"node\": 0, \"parent\": -1}],\n                \"not consistent with the topology\",\n            ),  # On same node without parents\n            (\n                [\n                    {\"node\": 3, \"parent\": -1},\n                    {\"node\": 0, \"parent\": 0},\n                    {\"node\": 1, \"parent\": 0},\n                ],\n                \"not consistent with the topology\",\n            ),  # On parallel branches, parent on parallel branches\n            (\n                [\n                    {\"node\": 5, \"parent\": -1},\n                    {\"node\": 0, \"parent\": 0},\n                    {\"node\": 1, \"parent\": 0},\n                ],\n                \"not consistent with the topology\",\n            ),  # On parallel branches, parent high on parallel\n            (\n                [\n                    {\"node\": 3, \"parent\": -1},\n                    {\"node\": 0, \"parent\": 0},\n                    {\"node\": 7, \"parent\": 0},\n                ],\n                \"not consistent with the topology\",\n            ),  # On parallel branches, parent on different root\n            (\n                [\n                    {\"node\": 0, \"parent\": -1},\n                    {\"node\": 1, \"parent\": 0},\n                ],\n                \"not consistent with the topology\",\n            ),  # parent on parallel branch\n            (\n                [\n                    {\"node\": 6, \"parent\": -1},\n                    {\"node\": 6, \"parent\": 0},\n                ],\n                None,\n            ),  # parent above root\n            (\n                [\n                    {\"node\": 6, \"parent\": -1},\n                    {\"node\": 6, \"parent\": -1},\n                ],\n                \"not consistent with the topology\",\n            ),  # parent above root, no parents\n        ],\n    )\n    def test_mutation_parent_errors(self, mutations, error):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.nodes.add_row(time=1)\n        tables.nodes.add_row(time=1)\n        tables.nodes.add_row(time=2)\n        tables.nodes.add_row(time=3)\n        tables.edges.add_row(left=0, right=1, parent=4, child=0)\n        tables.edges.add_row(left=0, right=1, parent=4, child=1)\n        tables.edges.add_row(left=0, right=1, parent=5, child=2)\n        tables.edges.add_row(left=0, right=1, parent=5, child=3)\n        tables.edges.add_row(left=0, right=1, parent=6, child=4)\n        tables.edges.add_row(left=0, right=1, parent=6, child=5)\n        tables.sites.add_row(position=0.5, ancestral_state=\"A\")\n\n        for mut in mutations:\n            tables.mutations.add_row(**{\"derived_state\": \"G\", \"site\": 0, **mut})\n\n        if error is not None:\n            with pytest.raises(_tskit.LibraryError, match=error):\n                tables.tree_sequence()\n        else:\n            tables.tree_sequence()\n\n    def test_union(self, ts_fixture):\n        # most of the union tests are in test_tables.py, here we just sanity check\n        tables = ts_fixture.dump_tables()\n        tables.migrations.clear()  # migrations not supported in union()\n        ts = tables.tree_sequence()\n        tables = tskit.TableCollection(ts.sequence_length)\n        tables.time_units = ts.time_units\n        empty = tables.tree_sequence()\n        union_ts = empty.union(\n            ts,\n            node_mapping=np.full(ts.num_nodes, tskit.NULL, dtype=int),\n            all_edges=True,\n            all_mutations=True,\n            check_shared_equality=False,\n        )\n        union_ts.tables.assert_equals(\n            ts.tables,\n            ignore_metadata=True,\n            ignore_reference_sequence=True,\n            ignore_provenance=True,\n        )\n\n\nclass TestSimplify:\n    # This class was factored out of the old TestHighlevel class 2022-12-13,\n    # and is a mishmash of different testing paradigms. There is some valuable\n    # testing done here, so it would be good to fully bring it up to date.\n\n    def verify_simplify_provenance(self, ts):\n        new_ts = ts.simplify()\n        assert new_ts.num_provenances == ts.num_provenances + 1\n        old = list(ts.provenances())\n        new = list(new_ts.provenances())\n        assert old == new[:-1]\n        # TODO call verify_provenance on this.\n        assert len(new[-1].timestamp) > 0\n        assert len(new[-1].record) > 0\n\n        new_ts = ts.simplify(record_provenance=False)\n        assert new_ts.tables.provenances == ts.tables.provenances\n\n    def verify_simplify_topology(self, ts, sample):\n        new_ts, node_map = ts.simplify(sample, map_nodes=True)\n        if len(sample) == 0:\n            assert new_ts.num_nodes == 0\n            assert new_ts.num_edges == 0\n            assert new_ts.num_sites == 0\n            assert new_ts.num_mutations == 0\n        elif len(sample) == 1:\n            assert new_ts.num_nodes == 1\n            assert new_ts.num_edges == 0\n        # The output samples should be 0...n\n        assert new_ts.num_samples == len(sample)\n        assert list(range(len(sample))) == list(new_ts.samples())\n        for j in range(new_ts.num_samples):\n            assert node_map[sample[j]] == j\n        for u in range(ts.num_nodes):\n            old_node = ts.node(u)\n            if node_map[u] != tskit.NULL:\n                new_node = new_ts.node(node_map[u])\n                assert old_node.time == new_node.time\n                assert old_node.population == new_node.population\n                assert old_node.metadata == new_node.metadata\n        for u in sample:\n            old_node = ts.node(u)\n            new_node = new_ts.node(node_map[u])\n            assert old_node.flags == new_node.flags\n            assert old_node.time == new_node.time\n            assert old_node.population == new_node.population\n            assert old_node.metadata == new_node.metadata\n        old_trees = ts.trees()\n        old_tree = next(old_trees)\n        assert ts.get_num_trees() >= new_ts.get_num_trees()\n        for new_tree in new_ts.trees():\n            new_left, new_right = new_tree.get_interval()\n            old_left, old_right = old_tree.get_interval()\n            # Skip ahead on the old tree until new_left is within its interval\n            while old_right <= new_left:\n                old_tree = next(old_trees)\n                old_left, old_right = old_tree.get_interval()\n            # If the MRCA of all pairs of samples is the same, then we have the\n            # same information. We limit this to at most 500 pairs\n            pairs = itertools.islice(itertools.combinations(sample, 2), 500)\n            for pair in pairs:\n                mapped_pair = [node_map[u] for u in pair]\n                mrca1 = old_tree.get_mrca(*pair)\n                mrca2 = new_tree.get_mrca(*mapped_pair)\n                if mrca1 == tskit.NULL:\n                    assert mrca2 == mrca1\n                else:\n                    assert mrca2 == node_map[mrca1]\n                    assert old_tree.get_time(mrca1) == new_tree.get_time(mrca2)\n                    assert old_tree.get_population(mrca1) == new_tree.get_population(\n                        mrca2\n                    )\n\n    def verify_simplify_equality(self, ts, sample):\n        for filter_sites in [False, True]:\n            s1, node_map1 = ts.simplify(\n                sample, map_nodes=True, filter_sites=filter_sites\n            )\n            t1 = s1.dump_tables()\n            s2, node_map2 = simplify_tree_sequence(ts, sample, filter_sites=filter_sites)\n            t2 = s2.dump_tables()\n            assert s1.num_samples == len(sample)\n            assert s2.num_samples == len(sample)\n            assert all(node_map1 == node_map2)\n            assert t1.individuals == t2.individuals\n            assert t1.nodes == t2.nodes\n            assert t1.edges == t2.edges\n            assert t1.migrations == t2.migrations\n            assert t1.sites == t2.sites\n            assert t1.mutations == t2.mutations\n            assert t1.populations == t2.populations\n\n    def verify_simplify_variants(self, ts, sample):\n        subset = ts.simplify(sample)\n        sample_map = {u: j for j, u in enumerate(ts.samples())}\n        # Need to map IDs back to their sample indexes\n        s = np.array([sample_map[u] for u in sample])\n        # Build a map of genotypes by position\n        full_genotypes = {}\n        for variant in ts.variants(isolated_as_missing=False):\n            alleles = [variant.alleles[g] for g in variant.genotypes]\n            full_genotypes[variant.position] = alleles\n        for variant in subset.variants(isolated_as_missing=False):\n            if variant.position in full_genotypes:\n                a1 = [full_genotypes[variant.position][u] for u in s]\n                a2 = [variant.alleles[g] for g in variant.genotypes]\n                assert a1 == a2\n\n    def verify_tables_api_equality(self, ts):\n        for samples in [None, list(ts.samples()), ts.samples()]:\n            tables = ts.dump_tables()\n            tables.simplify(samples=samples)\n            tables.assert_equals(\n                ts.simplify(samples=samples).dump_tables(),\n                ignore_timestamps=True,\n            )\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_simplify_tables_equality(self, ts):\n        # Can't simplify edges with metadata\n        if ts.tables.edges.metadata_schema == tskit.MetadataSchema(schema=None):\n            self.verify_tables_api_equality(ts)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_simplify_provenance(self, ts):\n        # Can't simplify edges with metadata\n        if ts.tables.edges.metadata_schema == tskit.MetadataSchema(schema=None):\n            self.verify_simplify_provenance(ts)\n\n    # TODO this test needs to be broken up into discrete bits, so that we can\n    # test them independently. A way of getting a random-ish subset of samples\n    # from the pytest param would be useful.\n    @pytest.mark.slow\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_simplify(self, ts):\n        # Can't simplify edges with metadata\n        if ts.tables.edges.metadata_schema == tskit.MetadataSchema(schema=None):\n            n = ts.num_samples\n            sample_sizes = {0}\n            if n > 1:\n                sample_sizes |= {1}\n            if n > 2:\n                sample_sizes |= {2, max(2, n // 2), n - 1}\n            for k in sample_sizes:\n                subset = random.sample(list(ts.samples()), k)\n                self.verify_simplify_topology(ts, subset)\n                self.verify_simplify_equality(ts, subset)\n                self.verify_simplify_variants(ts, subset)\n\n    def test_simplify_bugs(self):\n        prefix = os.path.join(os.path.dirname(__file__), \"data\", \"simplify-bugs\")\n        j = 1\n        while True:\n            nodes_file = os.path.join(prefix, f\"{j:02d}-nodes.txt\")\n            if not os.path.exists(nodes_file):\n                break\n            edges_file = os.path.join(prefix, f\"{j:02d}-edges.txt\")\n            sites_file = os.path.join(prefix, f\"{j:02d}-sites.txt\")\n            mutations_file = os.path.join(prefix, f\"{j:02d}-mutations.txt\")\n            with (\n                open(nodes_file) as nodes,\n                open(edges_file) as edges,\n                open(sites_file) as sites,\n                open(mutations_file) as mutations,\n            ):\n                ts = tskit.load_text(\n                    nodes=nodes,\n                    edges=edges,\n                    sites=sites,\n                    mutations=mutations,\n                    strict=False,\n                )\n            samples = list(ts.samples())\n            self.verify_simplify_equality(ts, samples)\n            j += 1\n        assert j > 1\n\n    def test_simplify_migrations_fails(self):\n        ts = msprime.simulate(\n            population_configurations=[\n                msprime.PopulationConfiguration(10),\n                msprime.PopulationConfiguration(10),\n            ],\n            migration_matrix=[[0, 1], [1, 0]],\n            random_seed=2,\n            record_migrations=True,\n        )\n        assert ts.num_migrations > 0\n        # We don't support simplify with migrations, so should fail.\n        with pytest.raises(_tskit.LibraryError):\n            ts.simplify()\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_no_update_sample_flags_no_filter_nodes(self, ts):\n        # Can't simplify edges with metadata\n        if ts.tables.edges.metadata_schema == tskit.MetadataSchema(schema=None):\n            k = min(ts.num_samples, 3)\n            subset = ts.samples()[:k]\n            ts1 = ts.simplify(subset)\n            ts2 = ts.simplify(subset, update_sample_flags=False, filter_nodes=False)\n            assert ts1.num_samples == len(subset)\n            assert ts2.num_samples == ts.num_samples\n            assert ts1.num_edges == ts2.num_edges\n            assert ts2.tables.nodes == ts.tables.nodes\n\n\nclass TestMinMaxTime:\n    def get_example_tree_sequence(self, use_unknown_time):\n        \"\"\"\n        Min time is set to 0.1.\n        Max time is set to 2.0.\n        \"\"\"\n        tables = tskit.TableCollection(sequence_length=2)\n        tables.nodes.add_row(flags=1, time=0.1)\n        tables.nodes.add_row(flags=1, time=0.1)\n        tables.nodes.add_row(flags=1, time=0.1)\n        tables.nodes.add_row(flags=0, time=1)\n        tables.nodes.add_row(flags=0, time=2)\n        tables.edges.add_row(left=0, right=2, parent=3, child=0)\n        tables.edges.add_row(left=0, right=2, parent=3, child=1)\n        tables.edges.add_row(left=0, right=2, parent=4, child=2)\n        tables.edges.add_row(left=0, right=2, parent=4, child=3)\n        tables.sites.add_row(position=0, ancestral_state=\"0\")\n        tables.sites.add_row(position=1, ancestral_state=\"0\")\n        if use_unknown_time:\n            tables.mutations.add_row(\n                site=0, node=2, derived_state=\"1\", time=tskit.UNKNOWN_TIME\n            )\n            tables.mutations.add_row(\n                site=1, node=3, derived_state=\"1\", time=tskit.UNKNOWN_TIME\n            )\n        else:\n            tables.mutations.add_row(site=0, node=2, derived_state=\"1\", time=0.5)\n            tables.mutations.add_row(site=1, node=3, derived_state=\"1\", time=1.5)\n        ts = tables.tree_sequence()\n        return ts\n\n    def get_empty_tree_sequence(self):\n        \"\"\"\n        Min time is initialised to positive infinity.\n        Max time is initialised to negative infinity.\n        \"\"\"\n        tables = tskit.TableCollection(sequence_length=2)\n        ts = tables.tree_sequence()\n        return ts\n\n    def test_example(self):\n        ts = self.get_example_tree_sequence(use_unknown_time=False)\n        expected_min_time = min(ts.nodes_time.min(), ts.mutations_time.min())\n        expected_max_time = max(ts.nodes_time.max(), ts.mutations_time.max())\n        assert ts.min_time == expected_min_time\n        assert ts.max_time == expected_max_time\n\n    def test_example_unknown_mutation_times(self):\n        ts = self.get_example_tree_sequence(use_unknown_time=True)\n        expected_min_time = ts.nodes_time.min()\n        expected_max_time = ts.nodes_time.max()\n        assert ts.min_time == expected_min_time\n        assert ts.max_time == expected_max_time\n\n    def test_empty(self):\n        ts = self.get_empty_tree_sequence()\n        assert ts.min_time == np.inf\n        assert ts.max_time == -np.inf\n\n\nclass TestSiteAlleles:\n    def test_no_mutations(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.sites.add_row(0, ancestral_state=\"\")\n        site = tables.tree_sequence().site(0)\n        assert site.alleles == {\"\"}\n\n    @pytest.mark.parametrize(\"k\", range(5))\n    def test_k_mutations(self, k):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.sites.add_row(0, ancestral_state=\"ABC\")\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(1, 0)  # will not have any mutations => missing\n        for j in range(k):\n            tables.mutations.add_row(site=0, node=0, derived_state=str(j))\n        tables.build_index()\n        tables.compute_mutation_parents()\n        ts = tables.tree_sequence()\n        variant = next(ts.variants())\n        assert variant.has_missing_data\n        assert len(variant.site.alleles) == k + 1\n        assert \"ABC\" in variant.site.alleles\n        assert variant.site.alleles == set(variant.alleles[:-1])\n\n\nclass TestEdgeDiffs:\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_correct_trees_forward(self, ts):\n        parent = np.full(ts.num_nodes + 1, tskit.NULL, dtype=np.int32)\n        for edge_diff, tree in itertools.zip_longest(ts.edge_diffs(), ts.trees()):\n            assert edge_diff.interval == tree.interval\n            for edge in edge_diff.edges_out:\n                parent[edge.child] = tskit.NULL\n            for edge in edge_diff.edges_in:\n                parent[edge.child] = edge.parent\n            assert_array_equal(parent, tree.parent_array)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_correct_trees_reverse(self, ts):\n        parent = np.full(ts.num_nodes + 1, tskit.NULL, dtype=np.int32)\n        iterator = itertools.zip_longest(\n            ts.edge_diffs(direction=tskit.REVERSE), reversed(ts.trees())\n        )\n        for edge_diff, tree in iterator:\n            assert edge_diff.interval == tree.interval\n            for edge in edge_diff.edges_out:\n                parent[edge.child] = tskit.NULL\n            for edge in edge_diff.edges_in:\n                parent[edge.child] = edge.parent\n            assert_array_equal(parent, tree.parent_array)\n\n    def test_elements_are_like_named_tuple(self, simple_degree2_ts_fixture):\n        for val in simple_degree2_ts_fixture.edge_diffs():\n            assert len(val) == 3\n            assert val[0] == val.interval\n            assert val[1] == val.edges_out\n            assert val[2] == val.edges_in\n\n    @pytest.mark.parametrize(\"direction\", [-6, \"forward\", None])\n    def test_bad_direction(self, direction, simple_degree2_ts_fixture):\n        ts = simple_degree2_ts_fixture\n        with pytest.raises(ValueError, match=\"direction must be\"):\n            ts.edge_diffs(direction=direction)\n\n    @pytest.mark.parametrize(\"direction\", [tskit.FORWARD, tskit.REVERSE])\n    def test_edge_properties(self, direction, simple_degree2_ts_fixture):\n        ts = simple_degree2_ts_fixture\n        edge_ids = set()\n        for _, e_out, e_in in ts.edge_diffs(direction=direction):\n            for edge in e_in:\n                assert edge.id not in edge_ids\n                edge_ids.add(edge.id)\n                assert ts.edge(edge.id) == edge\n            for edge in e_out:\n                assert ts.edge(edge.id) == edge\n        assert edge_ids == set(range(ts.num_edges))\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    @pytest.mark.parametrize(\"direction\", [tskit.FORWARD, tskit.REVERSE])\n    def test_include_terminal(self, ts, direction):\n        edges = set()\n        i = 0\n        diffs = ts.edge_diffs(include_terminal=True, direction=direction)\n        parent = np.full(ts.num_nodes + 1, tskit.NULL, dtype=np.int32)\n        for (left, right), e_out, e_in in diffs:  # noqa: B007\n            for e in e_out:\n                edges.remove(e.id)\n                parent[e.child] = tskit.NULL\n            for e in e_in:\n                edges.add(e.id)\n                parent[e.child] = e.parent\n            i += 1\n        assert np.all(parent == tskit.NULL)\n        assert i == ts.num_trees + 1\n        assert len(edges) == 0\n        # On last iteration, interval is empty\n        if direction == tskit.FORWARD:\n            assert left == ts.sequence_length\n            assert right == ts.sequence_length\n        else:\n            assert left == 0\n            assert right == 0\n\n\nclass TestTreeSequenceMethodSignatures:\n    ts = msprime.simulate(10, random_seed=1234)\n\n    def test_kwargs_only(self):\n        with pytest.raises(TypeError, match=\"argument\"):\n            tskit.Tree(self.ts, [], True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            self.ts.trees([], True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            self.ts.haplotypes(True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            self.ts.variants(True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            self.ts.genotype_matrix(True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            self.ts.simplify([], True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            self.ts.draw_svg(\"filename\", True)\n        with pytest.raises(TypeError, match=\"argument\"):\n            tskit.TreeSequence.load_tables(tskit.TableCollection(1), True)\n\n    def test_trees_params(self):\n        \"\"\"\n        The initial .trees() iterator parameters should match those in Tree.__init__()\n        \"\"\"\n        tree_class_params = list(inspect.signature(tskit.Tree).parameters.items())\n        trees_iter_params = list(\n            inspect.signature(tskit.TreeSequence.trees).parameters.items()\n        )\n        # Skip the first param, which is `tree_sequence` and `self` respectively\n        tree_class_params = tree_class_params[1:]\n        # The trees iterator has some extra (deprecated) aliases\n        trees_iter_params = trees_iter_params[1:-3]\n        assert trees_iter_params == tree_class_params\n\n\nclass TestTreeSequenceMetadata:\n    metadata_tables = [\n        \"node\",\n        \"edge\",\n        \"site\",\n        \"mutation\",\n        \"migration\",\n        \"individual\",\n        \"population\",\n    ]\n    metadata_schema = tskit.MetadataSchema(\n        {\n            \"codec\": \"json\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"table\": {\"type\": \"string\"},\n                \"string_prop\": {\"type\": \"string\"},\n                \"num_prop\": {\"type\": \"number\"},\n            },\n            \"required\": [\"table\", \"string_prop\", \"num_prop\"],\n            \"additionalProperties\": False,\n        },\n    )\n\n    def test_tree_sequence_metadata_schema(self):\n        tc = tskit.TableCollection(1)\n        ts = tc.tree_sequence()\n        assert repr(ts.metadata_schema) == repr(tskit.MetadataSchema(None))\n        tc.metadata_schema = self.metadata_schema\n        ts = tc.tree_sequence()\n        assert repr(ts.metadata_schema) == repr(self.metadata_schema)\n        with pytest.raises(AttributeError):\n            del ts.metadata_schema\n        with pytest.raises(AttributeError):\n            ts.metadata_schema = tskit.MetadataSchema(None)\n\n    def test_tree_sequence_metadata(self):\n        tc = tskit.TableCollection(1)\n        ts = tc.tree_sequence()\n        assert ts.metadata == b\"\"\n        tc.metadata_schema = self.metadata_schema\n        data = {\n            \"table\": \"tree-sequence\",\n            \"string_prop\": \"stringy\",\n            \"num_prop\": 42,\n        }\n        tc.metadata = data\n        ts = tc.tree_sequence()\n        assert ts.metadata == data\n        with pytest.raises(AttributeError):\n            ts.metadata = {\"should\": \"fail\"}\n        with pytest.raises(AttributeError):\n            del ts.metadata\n\n    def test_tree_sequence_time_units(self):\n        tc = tskit.TableCollection(1)\n        ts = tc.tree_sequence()\n        assert ts.time_units == tskit.TIME_UNITS_UNKNOWN\n        tc.time_units = \"something else\"\n        ts = tc.tree_sequence()\n        assert ts.time_units == \"something else\"\n        with pytest.raises(AttributeError):\n            del ts.time_units\n        with pytest.raises(AttributeError):\n            ts.time_units = \"readonly\"\n        assert tskit.TIME_UNITS_UNKNOWN == \"unknown\"\n        assert tskit.TIME_UNITS_UNCALIBRATED == \"uncalibrated\"\n\n    def test_table_metadata_schemas(self):\n        ts = msprime.simulate(5)\n        for table in self.metadata_tables:\n            tables = ts.dump_tables()\n            # Set and read back a unique schema for each table\n            schema = tskit.MetadataSchema({\"codec\": \"json\", \"TEST\": f\"{table}-SCHEMA\"})\n            # Check via table API\n            getattr(tables, f\"{table}s\").metadata_schema = schema\n            assert repr(getattr(tables, f\"{table}s\").metadata_schema) == repr(schema)\n            for other_table in self.metadata_tables:\n                if other_table != table:\n                    assert repr(getattr(tables, f\"{other_table}s\").metadata_schema) == \"\"\n            # Check via tree-sequence API\n            new_ts = tskit.TreeSequence.load_tables(tables)\n            assert repr(getattr(new_ts.table_metadata_schemas, table)) == repr(schema)\n            for other_table in self.metadata_tables:\n                if other_table != table:\n                    assert (\n                        repr(getattr(new_ts.table_metadata_schemas, other_table)) == \"\"\n                    )\n            # Can't set schema via this API\n            with pytest.raises(AttributeError):\n                new_ts.table_metadata_schemas = {}\n                # or modify the schema tuple return object\n                with pytest.raises(dataclasses.exceptions.FrozenInstanceError):\n                    setattr(\n                        new_ts.table_metadata_schemas,\n                        table,\n                        tskit.MetadataSchema({\"codec\": \"json\"}),\n                    )\n\n    def test_table_metadata_round_trip_via_row_getters(self):\n        # A tree sequence with all entities\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 1], [1, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        )\n        tables = ts.dump_tables()\n        tables.individuals.add_row(location=[1, 2, 3])\n        tables.individuals.add_row(location=[4, 5, 6])\n        ts = tables.tree_sequence()\n\n        for table in self.metadata_tables:\n            new_tables = ts.dump_tables()\n            tables_copy = ts.dump_tables()\n            table_obj = getattr(new_tables, f\"{table}s\")\n            table_obj.metadata_schema = self.metadata_schema\n            table_obj.clear()\n            # Write back the rows, but adding unique metadata\n            for j, row in enumerate(getattr(tables_copy, f\"{table}s\")):\n                row_data = dataclasses.asdict(row)\n                row_data[\"metadata\"] = {\n                    \"table\": table,\n                    \"string_prop\": f\"Row number{j}\",\n                    \"num_prop\": j,\n                }\n                table_obj.add_row(**row_data)\n            new_ts = new_tables.tree_sequence()\n            # Check that all tables have data otherwise we'll silently not check one\n            assert getattr(new_ts, f\"num_{table}s\") > 0\n            assert getattr(new_ts, f\"num_{table}s\") == getattr(ts, f\"num_{table}s\")\n            for j, row in enumerate(getattr(new_ts, f\"{table}s\")()):\n                assert row.metadata == {\n                    \"table\": table,\n                    \"string_prop\": f\"Row number{row.id}\",\n                    \"num_prop\": row.id,\n                }\n                assert getattr(new_ts, f\"{table}\")(j).metadata == {\n                    \"table\": table,\n                    \"string_prop\": f\"Row number{row.id}\",\n                    \"num_prop\": row.id,\n                }\n\n\ndef test_pickle_round_trip(ts_fixture):\n    for protocol in range(pickle.HIGHEST_PROTOCOL + 1):\n        ts = pickle.loads(pickle.dumps(ts_fixture, protocol=protocol))\n        assert ts.tables == ts_fixture.tables\n        # Do some thing to check the ts is init'd properly\n        ts.draw_text()\n\n\nclass TestFileUuid(HighLevelTestCase):\n    \"\"\"\n    Tests that the file UUID attribute is handled correctly.\n    \"\"\"\n\n    def validate(self, ts):\n        with tempfile.TemporaryDirectory() as tempdir:\n            temp_file = pathlib.Path(tempdir) / \"tmp.trees\"\n            assert ts.file_uuid is None\n            ts.dump(temp_file)\n            other_ts = tskit.load(temp_file)\n            assert other_ts.file_uuid is not None\n            assert len(other_ts.file_uuid), 36\n            uuid = other_ts.file_uuid\n            other_ts = tskit.load(temp_file)\n            assert other_ts.file_uuid == uuid\n            assert ts.tables == other_ts.tables\n\n            # Check that the UUID is well-formed.\n            parsed = _uuid.UUID(\"{\" + uuid + \"}\")\n            assert str(parsed) == uuid\n\n            # Save the same tree sequence to the file. We should get a different UUID.\n            ts.dump(temp_file)\n            other_ts = tskit.load(temp_file)\n            assert other_ts.file_uuid is not None\n            assert other_ts.file_uuid != uuid\n\n            # Even saving a ts that has a UUID to another file changes the UUID\n            old_uuid = other_ts.file_uuid\n            other_ts.dump(temp_file)\n            assert other_ts.file_uuid == old_uuid\n            other_ts = tskit.load(temp_file)\n            assert other_ts.file_uuid is not None\n            assert other_ts.file_uuid != old_uuid\n\n            # Tables dumped from this ts are a deep copy, so they don't have\n            # the file_uuid.\n            tables = other_ts.dump_tables()\n            assert tables.file_uuid is None\n\n            # For now, ts.tables also returns a deep copy. This will hopefully\n            # change in the future though.\n            assert ts.tables.file_uuid is None\n\n    def test_simple_simulation(self):\n        ts = msprime.simulate(2, random_seed=1)\n        self.validate(ts)\n\n    def test_empty_tables(self):\n        tables = tskit.TableCollection(1)\n        self.validate(tables.tree_sequence())\n\n\nclass TestTreeSequenceTextIO(HighLevelTestCase):\n    \"\"\"\n    Tests for the tree sequence text IO.\n    \"\"\"\n\n    def verify_nodes_format(self, ts, nodes_file, precision, base64_metadata):\n        \"\"\"\n        Verifies that the nodes we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_nodes = nodes_file.read().splitlines()\n        assert len(output_nodes) - 1 == ts.num_nodes\n        assert list(output_nodes[0].split()) == [\n            \"id\",\n            \"is_sample\",\n            \"time\",\n            \"population\",\n            \"individual\",\n            \"metadata\",\n        ]\n        for node, line in zip(ts.nodes(), output_nodes[1:]):\n            splits = line.split(\"\\t\")\n            assert str(node.id) == splits[0]\n            assert str(node.is_sample()) == splits[1]\n            assert convert(node.time) == splits[2]\n            assert str(node.population) == splits[3]\n            assert str(node.individual) == splits[4]\n            if isinstance(node.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(node.metadata) == splits[5]\n            else:\n                assert repr(node.metadata) == splits[5]\n\n    def verify_edges_format(self, ts, edges_file, precision, base64_metadata):\n        \"\"\"\n        Verifies that the edges we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_edges = edges_file.read().splitlines()\n        assert len(output_edges) - 1 == ts.num_edges\n        assert list(output_edges[0].split()) == [\n            \"left\",\n            \"right\",\n            \"parent\",\n            \"child\",\n            \"metadata\",\n        ]\n        for edge, line in zip(ts.edges(), output_edges[1:]):\n            splits = line.split(\"\\t\")\n            assert convert(edge.left) == splits[0]\n            assert convert(edge.right) == splits[1]\n            assert str(edge.parent) == splits[2]\n            assert str(edge.child) == splits[3]\n            if isinstance(edge.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(edge.metadata) == splits[4]\n            else:\n                assert repr(edge.metadata) == splits[4]\n\n    def verify_sites_format(self, ts, sites_file, precision, base64_metadata):\n        \"\"\"\n        Verifies that the sites we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_sites = sites_file.read().splitlines()\n        assert len(output_sites) - 1 == ts.num_sites\n        assert list(output_sites[0].split()) == [\n            \"position\",\n            \"ancestral_state\",\n            \"metadata\",\n        ]\n        for site, line in zip(ts.sites(), output_sites[1:]):\n            splits = line.split(\"\\t\")\n            assert convert(site.position) == splits[0]\n            assert site.ancestral_state == splits[1]\n            if isinstance(site.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(site.metadata) == splits[2]\n            else:\n                assert repr(site.metadata) == splits[2]\n\n    def verify_mutations_format(self, ts, mutations_file, precision, base64_metadata):\n        \"\"\"\n        Verifies that the mutations we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_mutations = mutations_file.read().splitlines()\n        assert len(output_mutations) - 1 == ts.num_mutations\n        assert list(output_mutations[0].split()) == [\n            \"site\",\n            \"node\",\n            \"time\",\n            \"derived_state\",\n            \"parent\",\n            \"metadata\",\n        ]\n        mutations = [mut for site in ts.sites() for mut in site.mutations]\n        for mutation, line in zip(mutations, output_mutations[1:]):\n            splits = line.split(\"\\t\")\n            assert str(mutation.site) == splits[0]\n            assert str(mutation.node) == splits[1]\n            assert (\n                \"unknown\" if util.is_unknown_time(mutation.time) else str(mutation.time)\n            ) == splits[2]\n            assert str(mutation.derived_state) == splits[3]\n            assert str(mutation.parent) == splits[4]\n            if isinstance(mutation.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(mutation.metadata) == splits[5]\n            else:\n                assert repr(mutation.metadata) == splits[5]\n\n    def verify_individuals_format(\n        self, ts, individuals_file, precision, base64_metadata\n    ):\n        \"\"\"\n        Verifies that the individuals we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_individuals = individuals_file.read().splitlines()\n        assert len(output_individuals) - 1 == ts.num_individuals\n        assert list(output_individuals[0].split()) == [\n            \"id\",\n            \"flags\",\n            \"location\",\n            \"parents\",\n            \"metadata\",\n        ]\n        for individual, line in zip(ts.individuals(), output_individuals[1:]):\n            splits = line.split(\"\\t\")\n            assert str(individual.id) == splits[0]\n            assert str(individual.flags) == splits[1]\n            assert \",\".join(map(str, individual.location)) == splits[2]\n            assert \",\".join(map(str, individual.parents)) == splits[3]\n            if isinstance(individual.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(individual.metadata) == splits[4]\n            else:\n                assert repr(individual.metadata) == splits[4]\n\n    def verify_populations_format(\n        self, ts, populations_file, precision, base64_metadata\n    ):\n        \"\"\"\n        Verifies that the populations we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_populations = populations_file.read().splitlines()\n        assert len(output_populations) - 1 == ts.num_populations\n        assert list(output_populations[0].split()) == [\n            \"id\",\n            \"metadata\",\n        ]\n        for population, line in zip(ts.populations(), output_populations[1:]):\n            splits = line.split(\"\\t\")\n            assert str(population.id) == splits[0]\n            if isinstance(population.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(population.metadata) == splits[1]\n            else:\n                assert repr(population.metadata) == splits[1]\n\n    def verify_migrations_format(self, ts, migrations_file, precision, base64_metadata):\n        \"\"\"\n        Verifies that the migrations we output have the correct form.\n        \"\"\"\n\n        def convert(v):\n            return \"{:.{}f}\".format(v, precision)\n\n        output_migrations = migrations_file.read().splitlines()\n        assert len(output_migrations) - 1 == ts.num_migrations\n        assert list(output_migrations[0].split()) == [\n            \"left\",\n            \"right\",\n            \"node\",\n            \"source\",\n            \"dest\",\n            \"time\",\n            \"metadata\",\n        ]\n        for migration, line in zip(ts.migrations(), output_migrations[1:]):\n            splits = line.split(\"\\t\")\n            assert str(migration.left) == splits[0]\n            assert str(migration.right) == splits[1]\n            assert str(migration.node) == splits[2]\n            assert str(migration.source) == splits[3]\n            assert str(migration.dest) == splits[4]\n            assert str(migration.time) == splits[5]\n            if isinstance(migration.metadata, bytes) and base64_metadata:\n                assert tests.base64_encode(migration.metadata) == splits[6]\n            else:\n                assert repr(migration.metadata) == splits[6]\n\n    @pytest.mark.parametrize((\"precision\", \"base64_metadata\"), [(2, True), (7, False)])\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_output_format(self, precision, base64_metadata, ts):\n        nodes_file = io.StringIO()\n        edges_file = io.StringIO()\n        sites_file = io.StringIO()\n        mutations_file = io.StringIO()\n        individuals_file = io.StringIO()\n        populations_file = io.StringIO()\n        migrations_file = io.StringIO()\n        provenances_file = io.StringIO()\n        ts.dump_text(\n            nodes=nodes_file,\n            edges=edges_file,\n            sites=sites_file,\n            mutations=mutations_file,\n            individuals=individuals_file,\n            populations=populations_file,\n            migrations=migrations_file,\n            provenances=provenances_file,\n            precision=precision,\n            base64_metadata=base64_metadata,\n        )\n        nodes_file.seek(0)\n        edges_file.seek(0)\n        sites_file.seek(0)\n        mutations_file.seek(0)\n        individuals_file.seek(0)\n        populations_file.seek(0)\n        migrations_file.seek(0)\n        self.verify_nodes_format(ts, nodes_file, precision, base64_metadata)\n        self.verify_edges_format(ts, edges_file, precision, base64_metadata)\n        self.verify_sites_format(ts, sites_file, precision, base64_metadata)\n        self.verify_mutations_format(ts, mutations_file, precision, base64_metadata)\n        self.verify_individuals_format(ts, individuals_file, precision, base64_metadata)\n        self.verify_populations_format(ts, populations_file, precision, base64_metadata)\n        self.verify_migrations_format(ts, migrations_file, precision, base64_metadata)\n\n    def verify_approximate_equality(self, ts1, ts2):\n        \"\"\"\n        Verifies that the specified tree sequences are approximately\n        equal, taking into account the error incurred in exporting to text.\n        \"\"\"\n        assert ts1.sample_size == ts2.sample_size\n        assert ts1.sequence_length == ts2.sequence_length\n        assert ts1.num_nodes == ts2.num_nodes\n        assert ts1.num_edges == ts2.num_edges\n        assert ts1.num_sites == ts2.num_sites\n        assert ts1.num_mutations == ts2.num_mutations\n        assert ts1.num_populations == ts2.num_populations\n        assert ts1.num_migrations == ts2.num_migrations\n\n        checked = 0\n        for n1, n2 in zip(ts1.nodes(), ts2.nodes()):\n            assert n1.population == n2.population\n            assert n1.metadata == n2.metadata\n            assert n1.time == pytest.approx(n2.time)\n            checked += 1\n        assert checked == ts1.num_nodes\n\n        checked = 0\n        for r1, r2 in zip(ts1.edges(), ts2.edges()):\n            checked += 1\n            assert r1.left == pytest.approx(r2.left)\n            assert r1.right == pytest.approx(r2.right)\n            assert r1.parent == r2.parent\n            assert r1.child == r2.child\n        assert ts1.num_edges == checked\n\n        checked = 0\n        for s1, s2 in zip(ts1.sites(), ts2.sites()):\n            checked += 1\n            assert s1.position == pytest.approx(s2.position)\n            assert s1.ancestral_state == s2.ancestral_state\n            assert s1.metadata == s2.metadata\n            assert s1.mutations == s2.mutations\n        assert ts1.num_sites == checked\n\n        checked = 0\n        for s1, s2 in zip(ts1.mutations(), ts2.mutations()):\n            checked += 1\n            assert s1.site == s2.site\n            assert s1.node == s2.node\n            if not (math.isnan(s1.time) and math.isnan(s2.time)):\n                assert s1.time == pytest.approx(s2.time)\n            assert s1.derived_state == s2.derived_state\n            assert s1.parent == s2.parent\n            assert s1.metadata == s2.metadata\n        assert ts1.num_mutations == checked\n\n        checked = 0\n        for s1, s2 in zip(ts1.migrations(), ts2.migrations()):\n            checked += 1\n            assert s1.left == s2.left\n            assert s1.right == s2.right\n            assert s1.node == s2.node\n            assert s1.source == s2.source\n            assert s1.dest == s2.dest\n            assert s1.time == s2.time\n            assert s1.metadata == s2.metadata\n        assert ts1.num_migrations == checked\n\n        # Check the trees\n        check = 0\n        for t1, t2 in zip(ts1.trees(), ts2.trees()):\n            assert list(t1.nodes()) == list(t2.nodes())\n            check += 1\n        assert check == ts1.get_num_trees()\n\n    @pytest.mark.parametrize(\"ts1\", tsutil.get_example_tree_sequences())\n    def test_text_record_round_trip(self, ts1):\n        # Can't round trip without the schema\n        if ts1.tables.nodes.metadata_schema == tskit.MetadataSchema(None):\n            nodes_file = io.StringIO()\n            edges_file = io.StringIO()\n            sites_file = io.StringIO()\n            mutations_file = io.StringIO()\n            individuals_file = io.StringIO()\n            populations_file = io.StringIO()\n            migrations_file = io.StringIO()\n            ts1.dump_text(\n                nodes=nodes_file,\n                edges=edges_file,\n                sites=sites_file,\n                mutations=mutations_file,\n                individuals=individuals_file,\n                populations=populations_file,\n                migrations=migrations_file,\n                precision=16,\n            )\n            nodes_file.seek(0)\n            edges_file.seek(0)\n            sites_file.seek(0)\n            mutations_file.seek(0)\n            individuals_file.seek(0)\n            populations_file.seek(0)\n            migrations_file.seek(0)\n            ts2 = tskit.load_text(\n                nodes=nodes_file,\n                edges=edges_file,\n                sites=sites_file,\n                mutations=mutations_file,\n                individuals=individuals_file,\n                populations=populations_file,\n                migrations=migrations_file,\n                sequence_length=ts1.sequence_length,\n                strict=True,\n            )\n            tables1 = ts1.tables.copy()\n            # load_text performs a `sort`, which changes the order relative to\n            # the original tree sequence\n            tables1.sort()\n            ts1_sorted = tables1.tree_sequence()\n            self.verify_approximate_equality(ts1_sorted, ts2)\n\n    def test_empty_files(self):\n        nodes_file = io.StringIO(\"is_sample\\ttime\\n\")\n        edges_file = io.StringIO(\"left\\tright\\tparent\\tchild\\n\")\n        sites_file = io.StringIO(\"position\\tancestral_state\\n\")\n        mutations_file = io.StringIO(\"site\\tnode\\tderived_state\\n\")\n        individuals_file = io.StringIO(\"flags\\n\")\n        migrations_file = io.StringIO(\"left\\tright\\tnode\\tsource\\tdest\\ttime\\n\")\n        with pytest.raises(_tskit.LibraryError):\n            tskit.load_text(\n                nodes=nodes_file,\n                edges=edges_file,\n                sites=sites_file,\n                mutations=mutations_file,\n                individuals=individuals_file,\n                migrations=migrations_file,\n            )\n\n    def test_empty_files_sequence_length(self):\n        nodes_file = io.StringIO(\"is_sample\\ttime\\n\")\n        edges_file = io.StringIO(\"left\\tright\\tparent\\tchild\\n\")\n        sites_file = io.StringIO(\"position\\tancestral_state\\n\")\n        mutations_file = io.StringIO(\"site\\tnode\\tderived_state\\n\")\n        individuals_file = io.StringIO(\"flags\\n\")\n        migrations_file = io.StringIO(\"left\\tright\\tnode\\tsource\\tdest\\ttime\\n\")\n        ts = tskit.load_text(\n            nodes=nodes_file,\n            edges=edges_file,\n            sites=sites_file,\n            mutations=mutations_file,\n            individuals=individuals_file,\n            migrations=migrations_file,\n            sequence_length=100,\n        )\n        assert ts.sequence_length == 100\n        assert ts.num_nodes == 0\n        assert ts.num_edges == 0\n        assert ts.num_sites == 0\n        assert ts.num_mutations == 0\n        assert ts.num_individuals == 0\n        assert ts.num_migrations == 0\n\n    def test_load_text_no_populations(self):\n        nodes_file = io.StringIO(\"is_sample\\ttime\\tpopulation\\n1\\t0\\t2\\n\")\n        edges_file = io.StringIO(\"left\\tright\\tparent\\tchild\\n\")\n        ts = tskit.load_text(nodes_file, edges_file, sequence_length=100)\n        assert ts.num_nodes == 1\n        assert ts.num_populations == 3\n\n    def test_load_text_populations(self):\n        nodes_file = io.StringIO(\"is_sample\\ttime\\tpopulation\\n\")\n        edges_file = io.StringIO(\"left\\tright\\tparent\\tchild\\n\")\n        populations_file = io.StringIO(\"metadata\\nmetadata_1\\nmetadata_2\\n\")\n        ts = tskit.load_text(\n            nodes_file,\n            edges_file,\n            populations=populations_file,\n            sequence_length=100,\n            base64_metadata=False,\n        )\n        assert ts.num_populations == 2\n        assert ts.tables.populations[0].metadata == b\"metadata_1\"\n        assert ts.tables.populations[1].metadata == b\"metadata_2\"\n\n\nclass TestTree(HighLevelTestCase):\n    \"\"\"\n    Some simple tests on the tree API.\n    \"\"\"\n\n    def get_tree(self, sample_lists=False):\n        ts = msprime.simulate(10, random_seed=1, mutation_rate=1, record_full_arg=True)\n        return next(ts.trees(sample_lists=sample_lists))\n\n    def verify_mutations(self, tree):\n        assert tree.num_mutations > 0\n        other_mutations = []\n        for site in tree.sites():\n            for mutation in site.mutations:\n                other_mutations.append(mutation)\n        mutations = list(tree.mutations())\n        assert tree.num_mutations == len(other_mutations)\n        assert tree.num_mutations == len(mutations)\n        for mut, other_mut in zip(mutations, other_mutations):\n            assert mut == other_mut\n\n    def test_simple_mutations(self):\n        tree = self.get_tree()\n        self.verify_mutations(tree)\n\n    def test_complex_mutations(self):\n        ts = tsutil.insert_branch_mutations(msprime.simulate(10, random_seed=1))\n        self.verify_mutations(ts.first())\n\n    def test_str(self, ts_fixture):\n        t = ts_fixture.first()\n        assert isinstance(str(t), str)\n        pattern = re.compile(\n            r\"\"\"\n            ╔═+╗\\s*\n            ║Tree.*?║\\s*\n            ╠═+╤═+╣\\s*\n            ║Index.*?│\\s*[\\d\\u2009,]+║\\s*\n            ╟─+┼─+╢\\s*\n            ║Interval.*?│\\s*[\\d\\u2009,]+-[\\d\\u2009,]+\\s*\\([\\d\\u2009,]+\\)║\\s*\n            ╟─+┼─+╢\\s*\n            ║Roots.*?│\\s*[\\d\\u2009,]+║\\s*\n            ╟─+┼─+╢\\s*\n            ║Nodes.*?│\\s*[\\d\\u2009,]+║\\s*\n            ╟─+┼─+╢\\s*\n            ║Sites.*?│\\s*[\\d\\u2009,]+║\\s*\n            ╟─+┼─+╢\\s*\n            ║Mutations.*?│\\s*[\\d\\u2009,]+║\\s*\n            ╟─+┼─+╢\\s*\n            ║Total\\s*Branch\\s*Length.*?│\\s*[\\d\\u2009,]+\\.\\d+║\\s*\n            ╚═+╧═+╝\\s*\n            \"\"\",\n            re.VERBOSE | re.DOTALL,\n        )\n        assert pattern.search(str(t))\n\n    def test_html_repr(self, ts_fixture):\n        html = ts_fixture.first()._repr_html_()\n        # Parse to check valid\n        ElementTree.fromstring(html)\n        assert len(html) > 1900\n        assert \"<tr><td>Total Branch Length</td><td>\" in html\n\n    def test_samples(self):\n        for sample_lists in [True, False]:\n            t = self.get_tree(sample_lists)\n            n = t.get_sample_size()\n            all_samples = list(t.samples(t.get_root()))\n            assert sorted(all_samples) == list(range(n))\n            for j in range(n):\n                assert list(t.samples(j)) == [j]\n\n            def test_func(t, u):\n                \"\"\"\n                Simple test definition of the traversal.\n                \"\"\"\n                stack = [u]\n                while len(stack) > 0:\n                    v = stack.pop()\n                    if t.is_sample(v):\n                        yield v\n                    if t.is_internal(v):\n                        for c in reversed(t.get_children(v)):\n                            stack.append(c)\n\n            for u in t.nodes():\n                l1 = list(t.samples(u))\n                l2 = list(test_func(t, u))\n                assert l1 == l2\n                assert t.get_num_samples(u) == len(l1)\n\n    def test_num_children(self):\n        tree = self.get_tree()\n        for u in tree.nodes():\n            assert tree.num_children(u) == len(tree.children(u))\n\n    def test_ancestors(self):\n        tree = tskit.Tree.generate_balanced(10, arity=3)\n        ancestors_arrays = {u: [] for u in np.arange(tree.tree_sequence.num_nodes)}\n        ancestors_arrays[-1] = []\n        for u in tree.nodes(order=\"preorder\"):\n            parent = tree.parent(u)\n            if parent != tskit.NULL:\n                ancestors_arrays[u] = [parent] + ancestors_arrays[tree.parent(u)]\n        for u in tree.nodes():\n            assert list(tree.ancestors(u)) == ancestors_arrays[u]\n\n    def test_ancestors_empty(self):\n        ts = tskit.Tree.generate_comb(10).tree_sequence\n        tree = ts.delete_intervals([[0, 1]]).first()\n        for u in ts.samples():\n            assert len(list(tree.ancestors(u))) == 0\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_virtual_root_semantics(self, ts):\n        for tree in ts.trees():\n            assert math.isinf(tree.time(tree.virtual_root))\n            assert tree.depth(tree.virtual_root) == -1\n            assert tree.parent(tree.virtual_root) == -1\n            assert list(tree.children(tree.virtual_root)) == tree.roots\n            with pytest.raises(tskit.LibraryError, match=\"bounds\"):\n                tree.population(tree.virtual_root)\n\n    def test_root_properties(self):\n        tested = set()\n        for ts in tsutil.get_example_tree_sequences(pytest_params=False):\n            for tree in ts.trees():\n                if tree.has_single_root:\n                    tested.add(\"single\")\n                    assert tree.num_roots == 1\n                    assert tree.num_roots == 1\n                    assert tree.root != tskit.NULL\n                elif tree.has_multiple_roots:\n                    tested.add(\"multiple\")\n                    assert tree.num_roots > 1\n                    with pytest.raises(ValueError, match=\"More than one root exists\"):\n                        _ = tree.root\n                else:\n                    tested.add(\"zero\")\n                    assert tree.num_roots == 0\n                    assert tree.root == tskit.NULL\n        assert len(tested) == 3\n\n    def test_as_dict_of_dicts(self):\n        for ts in tsutil.get_example_tree_sequences(pytest_params=False):\n            tree = next(ts.trees())\n            adj_dod = tree.as_dict_of_dicts()\n            g = nx.DiGraph(adj_dod)\n\n            self.verify_nx_graph_topology(tree, g)\n            self.verify_nx_algorithm_equivalence(tree, g)\n            self.verify_nx_for_tutorial_algorithms(tree, g)\n        self.verify_nx_nearest_neighbor_search()\n\n    def verify_nx_graph_topology(self, tree, g):\n        assert set(tree.nodes()) == set(g.nodes)\n\n        assert set(tree.roots) == {n for n in g.nodes if g.in_degree(n) == 0}\n\n        assert set(tree.leaves()) == {n for n in g.nodes if g.out_degree(n) == 0}\n\n        # test if tree has no in-degrees > 1\n        if len(g) > 0:\n            assert nx.is_branching(g)\n\n    def verify_nx_algorithm_equivalence(self, tree, g):\n        for root in tree.roots:\n            assert nx.is_directed_acyclic_graph(g)\n\n            # test descendants\n            assert {u for u in tree.nodes() if tree.is_descendant(u, root)} == set(\n                nx.descendants(g, root)\n            ) | {root}\n\n            # test MRCA\n            if tree.tree_sequence.num_nodes < 20:\n                for u, v in itertools.combinations(tree.nodes(), 2):\n                    mrca = nx.lowest_common_ancestor(g, u, v)\n                    if mrca is None:\n                        mrca = -1\n                    assert tree.mrca(u, v) == mrca\n\n            # test node traversal modes\n            assert list(tree.nodes(root=root, order=\"breadthfirst\")) == [root] + [\n                v for u, v in nx.bfs_edges(g, root)\n            ]\n            assert list(tree.nodes(root=root, order=\"preorder\")) == list(\n                nx.dfs_preorder_nodes(g, root)\n            )\n\n    def verify_nx_for_tutorial_algorithms(self, tree, g):\n        # traversing upwards\n        for u in tree.leaves():\n            path = []\n            v = u\n            while v != tskit.NULL:\n                path.append(v)\n                v = tree.parent(v)\n\n            assert set(path) == {u} | nx.ancestors(g, u)\n            assert path == [u] + [\n                n1 for n1, n2, _ in nx.edge_dfs(g, u, orientation=\"reverse\")\n            ]\n\n        # traversals with information\n        def preorder_dist(tree, root):\n            stack = [(root, 0)]\n            while len(stack) > 0:\n                u, distance = stack.pop()\n                yield u, distance\n                for v in tree.children(u):\n                    stack.append((v, distance + 1))\n\n        for root in tree.roots:\n            assert {\n                k: v for k, v in preorder_dist(tree, root)\n            } == nx.shortest_path_length(g, source=root)\n\n        for root in tree.roots:\n            # new traversal: measuring time between root and MRCA\n            for u, v in itertools.combinations(nx.descendants(g, root), 2):\n                mrca = tree.mrca(u, v)\n                tmrca = tree.time(mrca)\n                assert tree.time(root) - tmrca == pytest.approx(\n                    nx.shortest_path_length(\n                        g, source=root, target=mrca, weight=\"branch_length\"\n                    )\n                )\n\n    def verify_nx_nearest_neighbor_search(self):\n        samples = [\n            msprime.Sample(0, 0),\n            msprime.Sample(0, 1),\n            msprime.Sample(0, 20),\n        ]\n        ts = msprime.simulate(\n            Ne=1e6,\n            samples=samples,\n            demographic_events=[\n                msprime.PopulationParametersChange(\n                    time=10, growth_rate=2, population_id=0\n                ),\n            ],\n            random_seed=42,\n        )\n\n        tree = ts.first()\n        g = nx.Graph(tree.as_dict_of_dicts())\n\n        dist_dod = collections.defaultdict(dict)\n        for source, target in itertools.combinations(tree.samples(), 2):\n            dist_dod[source][target] = nx.shortest_path_length(\n                g, source=source, target=target, weight=\"branch_length\"\n            )\n            dist_dod[target][source] = dist_dod[source][target]\n\n        nearest_neighbor_of = [min(dist_dod[u], key=dist_dod[u].get) for u in range(3)]\n        assert [2, 2, 1] == [nearest_neighbor_of[u] for u in range(3)]\n\n    def test_total_branch_length(self):\n        # Note: this definition works when we have no non-sample branches.\n        t1 = self.get_tree()\n        bl = 0\n        root = t1.get_root()\n        for node in t1.nodes():\n            if node != root:\n                bl += t1.get_branch_length(node)\n        assert bl > 0\n        assert t1.get_total_branch_length() == pytest.approx(bl)\n\n    def test_branch_length_empty_tree(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.branch_length(0) == 0\n        assert tree.branch_length(1) == 0\n        assert tree.total_branch_length == 0\n\n    @pytest.mark.parametrize(\"r_threshold\", [0, -1])\n    def test_bad_val_root_threshold(self, r_threshold):\n        with pytest.raises(ValueError, match=\"greater than 0\"):\n            tskit.Tree.generate_balanced(2, root_threshold=r_threshold)\n\n    @pytest.mark.parametrize(\"r_threshold\", [None, 0.5, 1.5, np.inf])\n    def test_bad_type_root_threshold(self, r_threshold):\n        with pytest.raises(TypeError):\n            tskit.Tree.generate_balanced(2, root_threshold=r_threshold)\n\n    def test_simple_root_threshold(self):\n        tree = tskit.Tree.generate_balanced(3, root_threshold=3)\n        assert tree.num_roots == 1\n        tree = tskit.Tree.generate_balanced(3, root_threshold=4)\n        assert tree.num_roots == 0\n\n    @pytest.mark.parametrize(\"root_threshold\", [1, 2, 3])\n    def test_is_root(self, root_threshold):\n        # Make a tree with multiple roots with different numbers of samples under each\n        ts = tskit.Tree.generate_balanced(5).tree_sequence\n        ts = ts.decapitate(ts.max_root_time - 0.1)\n        tables = ts.dump_tables()\n        tables.nodes.add_row(flags=0)  # Isolated non-sample\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE)  # Isolated sample\n        ts = tables.tree_sequence()\n        assert {ts.first().num_samples(u) for u in ts.first().roots} == {1, 2, 3}\n        tree = ts.first(root_threshold=root_threshold)\n        roots = set(tree.roots)\n        for u in range(ts.num_nodes):  # Will also test isolated nodes\n            assert tree.is_root(u) == (u in roots)\n\n    def test_is_descendant(self):\n        def is_descendant(tree, u, v):\n            path = []\n            while u != tskit.NULL:\n                path.append(u)\n                u = tree.parent(u)\n            return v in path\n\n        tree = self.get_tree()\n        for u, v in itertools.product(range(tree.tree_sequence.num_nodes), repeat=2):\n            assert is_descendant(tree, u, v) == tree.is_descendant(u, v)\n        # All nodes are descendents of themselves\n        for u in range(tree.tree_sequence.num_nodes + 1):\n            assert tree.is_descendant(u, u)\n        for bad_node in [-1, -2, tree.tree_sequence.num_nodes + 1]:\n            with pytest.raises(ValueError):\n                tree.is_descendant(0, bad_node)\n            with pytest.raises(ValueError):\n                tree.is_descendant(bad_node, 0)\n            with pytest.raises(ValueError):\n                tree.is_descendant(bad_node, bad_node)\n\n    def test_apis(self):\n        # tree properties\n        t1 = self.get_tree()\n        assert t1.get_root() == t1.root\n        assert t1.get_index() == t1.index\n        assert t1.get_interval() == t1.interval\n        assert t1.get_sample_size() == t1.sample_size\n        assert t1.get_num_mutations() == t1.num_mutations\n        assert t1.get_parent_dict() == t1.parent_dict\n        assert t1.get_total_branch_length() == t1.total_branch_length\n        assert t1.span == t1.interval.right - t1.interval.left\n        assert t1.mid == t1.interval.left + (t1.interval.right - t1.interval.left) / 2\n        # node properties\n        root = t1.get_root()\n        for node in t1.nodes():\n            if node != root:\n                assert t1.get_time(node) == t1.time(node)\n                assert t1.get_parent(node) == t1.parent(node)\n                assert t1.get_children(node) == t1.children(node)\n                assert t1.get_population(node) == t1.population(node)\n                assert t1.get_num_samples(node) == t1.num_samples(node)\n                assert t1.get_branch_length(node) == t1.branch_length(node)\n                assert t1.get_num_tracked_samples(node) == t1.num_tracked_samples(node)\n\n        pairs = itertools.islice(itertools.combinations(t1.nodes(), 2), 50)\n        for pair in pairs:\n            assert t1.get_mrca(*pair) == t1.mrca(*pair)\n            assert t1.get_tmrca(*pair) == t1.tmrca(*pair)\n\n    @pytest.mark.filterwarnings(\"ignore::FutureWarning\")\n    def test_deprecated_apis(self):\n        t1 = self.get_tree()\n        assert t1.get_length() == t1.span\n        assert t1.length == t1.span\n        assert t1.num_nodes == t1.tree_sequence.num_nodes\n\n    def test_deprecated_api_warnings(self):\n        # Deprecated and will be removed\n        t1 = self.get_tree()\n        with pytest.warns(FutureWarning, match=\"Tree.tree_sequence.num_nodes\"):\n            _ = t1.num_nodes\n\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_index(self, skip):\n        ts = msprime.simulate(10, recombination_rate=3, length=5, random_seed=42)\n        N = ts.num_trees\n        assert ts.num_trees > 3\n        tree = tskit.Tree(ts)\n        for index in [0, N // 2, N - 1, 1]:\n            fresh_tree = tskit.Tree(ts)\n            assert fresh_tree.index == -1\n            fresh_tree.seek_index(index)\n            assert fresh_tree.index == index\n            tree.seek_index(index, skip)\n            assert_trees_equivalent(fresh_tree, tree)\n\n        tree = tskit.Tree(ts)\n        for index in [-1, -2, -N + 2, -N + 1, -N]:\n            fresh_tree = tskit.Tree(ts)\n            assert fresh_tree.index == -1\n            fresh_tree.seek_index(index)\n            tree.seek_index(index, skip)\n            assert fresh_tree.index == index + N\n            assert tree.index == index + N\n            assert_trees_equivalent(fresh_tree, tree)\n\n    def test_seek_index_errors(self):\n        tree = self.get_tree()\n        N = tree.tree_sequence.num_trees\n        with pytest.raises(IndexError):\n            tree.seek_index(N)\n        with pytest.raises(IndexError):\n            tree.seek_index(N + 1)\n        with pytest.raises(IndexError):\n            tree.seek_index(-N - 1)\n        with pytest.raises(IndexError):\n            tree.seek_index(-N - 2)\n\n    def test_first_last(self):\n        ts = msprime.simulate(10, recombination_rate=3, length=2, random_seed=42)\n        assert ts.num_trees > 3\n        tree = tskit.Tree(ts)\n        tree.first()\n        assert tree.index == 0\n        tree = tskit.Tree(ts)\n        tree.last()\n        assert tree.index == ts.num_trees - 1\n        tree = tskit.Tree(ts)\n        for _ in range(3):\n            tree.last()\n            assert tree.index == ts.num_trees - 1\n            tree.first()\n            assert tree.index == 0\n\n    def test_eq_different_tree_sequence(self):\n        ts = msprime.simulate(4, recombination_rate=1, length=2, random_seed=42)\n        copy = ts.dump_tables().tree_sequence()\n        for tree1, tree2 in zip(ts.aslist(), copy.aslist()):\n            assert tree1 != tree2\n\n    def test_next_prev(self):\n        ts = msprime.simulate(10, recombination_rate=3, length=3, random_seed=42)\n        assert ts.num_trees > 5\n        for index, tree in enumerate(ts.aslist()):\n            assert tree.index == index\n            j = index\n            while tree.next():\n                j += 1\n                assert tree.index == j\n            assert tree.index == -1\n            assert j + 1 == ts.num_trees\n        for index, tree in enumerate(ts.aslist()):\n            assert tree.index == index\n            j = index\n            while tree.prev():\n                j -= 1\n                assert tree.index == j\n            assert tree.index == -1\n            assert j == 0\n        tree.first()\n        tree.prev()\n        assert tree.index == -1\n        tree.last()\n        tree.next()\n        assert tree.index == -1\n\n    def test_interval(self):\n        ts = msprime.simulate(10, recombination_rate=1, random_seed=1)\n        assert ts.num_trees > 1\n        breakpoints = list(ts.breakpoints())\n        assert breakpoints[0] == 0\n        assert breakpoints[-1] == ts.sequence_length\n        for i, tree in enumerate(ts.trees()):\n            assert tree.interval.left == pytest.approx(breakpoints[i])\n            assert tree.interval.left == pytest.approx(breakpoints[i])\n            assert tree.interval.right == pytest.approx(breakpoints[i + 1])\n            assert tree.interval.right == pytest.approx(breakpoints[i + 1])\n            assert tree.interval.span == pytest.approx(\n                breakpoints[i + 1] - breakpoints[i]\n            )\n            assert tree.interval.mid == pytest.approx(\n                breakpoints[i] + (breakpoints[i + 1] - breakpoints[i]) / 2\n            )\n\n    def verify_tree_arrays(self, tree):\n        ts = tree.tree_sequence\n        N = ts.num_nodes + 1\n        assert tree.parent_array.shape == (N,)\n        assert tree.left_child_array.shape == (N,)\n        assert tree.right_child_array.shape == (N,)\n        assert tree.left_sib_array.shape == (N,)\n        assert tree.right_sib_array.shape == (N,)\n        assert tree.num_children_array.shape == (N,)\n        assert tree.edge_array.shape == (N,)\n        for u in range(N):\n            assert tree.parent(u) == tree.parent_array[u]\n            assert tree.left_child(u) == tree.left_child_array[u]\n            assert tree.right_child(u) == tree.right_child_array[u]\n            assert tree.left_sib(u) == tree.left_sib_array[u]\n            assert tree.right_sib(u) == tree.right_sib_array[u]\n            assert tree.num_children(u) == tree.num_children_array[u]\n            assert tree.edge(u) == tree.edge_array[u]\n\n    def verify_tree_arrays_python_ts(self, ts):\n        pts = tests.PythonTreeSequence(ts)\n        iter1 = ts.trees()\n        iter2 = pts.trees()\n        for st1, st2 in zip(iter1, iter2):\n            assert np.all(st1.parent_array == st2.parent)\n            assert np.all(st1.left_child_array == st2.left_child)\n            assert np.all(st1.right_child_array == st2.right_child)\n            assert np.all(st1.left_sib_array == st2.left_sib)\n            assert np.all(st1.right_sib_array == st2.right_sib)\n            assert np.all(st1.num_children_array == st2.num_children)\n            assert np.all(st1.edge_array == st2.edge)\n\n    def test_tree_arrays(self):\n        ts = msprime.simulate(10, recombination_rate=1, random_seed=1)\n        assert ts.num_trees > 1\n        self.verify_tree_arrays_python_ts(ts)\n        for tree in ts.trees():\n            self.verify_tree_arrays(tree)\n\n    @pytest.mark.parametrize(\n        \"array\",\n        [\n            \"parent\",\n            \"left_child\",\n            \"right_child\",\n            \"left_sib\",\n            \"right_sib\",\n            \"num_children\",\n            \"edge\",\n        ],\n    )\n    def test_tree_array_properties(self, array):\n        name = array + \"_array\"\n        ts = msprime.simulate(10, random_seed=1)\n        tree = ts.first()\n        a = getattr(tree, name)\n        assert getattr(tree, name) is a\n        assert a.base is tree._ll_tree\n        with pytest.raises(AttributeError):\n            setattr(tree, name, None)\n        with pytest.raises(AttributeError):\n            delattr(tree, name)\n\n    def verify_empty_tree(self, tree):\n        ts = tree.tree_sequence\n        assert tree.index == -1\n        assert tree.parent_dict == {}\n        for u in range(ts.num_nodes):\n            assert tree.parent(u) == tskit.NULL\n            assert tree.left_child(u) == tskit.NULL\n            assert tree.right_child(u) == tskit.NULL\n            assert tree.num_children(u) == 0\n            assert tree.edge(u) == tskit.NULL\n            if not ts.node(u).is_sample():\n                assert tree.left_sib(u) == tskit.NULL\n                assert tree.right_sib(u) == tskit.NULL\n        # Samples should have left-sib right-sibs set\n        samples = ts.samples()\n        assert tree.left_root == samples[0]\n        for j in range(ts.num_samples):\n            if j > 0:\n                assert tree.left_sib(samples[j]) == samples[j - 1]\n            if j < ts.num_samples - 1:\n                assert tree.right_sib(samples[j]) == samples[j + 1]\n        self.verify_tree_arrays(tree)\n\n    def test_empty_tree(self):\n        ts = msprime.simulate(10, recombination_rate=3, length=3, random_seed=42)\n        assert ts.num_trees > 5\n        tree = tskit.Tree(ts)\n        self.verify_empty_tree(tree)\n        while tree.next():\n            pass\n        self.verify_empty_tree(tree)\n        while tree.prev():\n            pass\n        self.verify_empty_tree(tree)\n\n    def test_clear(self):\n        ts = msprime.simulate(10, recombination_rate=3, length=3, random_seed=42)\n        assert ts.num_trees > 5\n        tree = tskit.Tree(ts)\n        tree.first()\n        tree.clear()\n        self.verify_empty_tree(tree)\n        tree.last()\n        tree.clear()\n        self.verify_empty_tree(tree)\n        tree.seek_index(ts.num_trees // 2)\n        tree.clear()\n        self.verify_empty_tree(tree)\n\n    def verify_trees_identical(self, t1, t2):\n        assert t1.tree_sequence is t2.tree_sequence\n        assert np.all(t1.parent_array == t2.parent_array)\n        assert np.all(t1.left_child_array == t2.left_child_array)\n        assert np.all(t1.right_child_array == t2.right_child_array)\n        assert np.all(t1.left_sib_array == t2.left_sib_array)\n        assert np.all(t1.right_sib_array == t2.right_sib_array)\n        assert np.all(t1.num_children_array == t2.num_children_array)\n        assert np.all(t1.edge_array == t2.edge_array)\n        assert list(t1.sites()) == list(t2.sites())\n\n    def test_copy_seek(self):\n        ts = msprime.simulate(10, recombination_rate=3, length=3, random_seed=42)\n        assert ts.num_trees > 5\n        tree = tskit.Tree(ts)\n        copy = tree.copy()\n        self.verify_empty_tree(copy)\n        while tree.next():\n            copy = tree.copy()\n            self.verify_trees_identical(tree, copy)\n        while tree.prev():\n            copy = tree.copy()\n            self.verify_trees_identical(tree, copy)\n        tree.clear()\n        copy = tree.copy()\n        tree.first()\n        # Make sure the underlying arrays are different\n        assert np.any(tree.parent_array != copy.parent_array)\n        copy.first()\n        while tree.index != -1:\n            self.verify_trees_identical(tree, copy)\n            assert tree.next() == copy.next()\n        tree.last()\n        copy.last()\n        while tree.index != -1:\n            self.verify_trees_identical(tree, copy)\n            assert tree.prev() == copy.prev()\n        # Seek to middle and two independent trees.\n        tree.seek_index(ts.num_trees // 2)\n        left_copy = tree.copy()\n        right_copy = tree.copy()\n        self.verify_trees_identical(tree, left_copy)\n        self.verify_trees_identical(tree, right_copy)\n        left_copy.prev()\n        assert left_copy.index == tree.index - 1\n        right_copy.next()\n        assert right_copy.index == tree.index + 1\n\n    def test_copy_tracked_samples(self):\n        ts = msprime.simulate(10, recombination_rate=2, length=3, random_seed=42)\n        tree = tskit.Tree(ts, tracked_samples=[0, 1])\n        while tree.next():\n            copy = tree.copy()\n            for j in range(ts.num_nodes):\n                assert tree.num_tracked_samples(j) == copy.num_tracked_samples(j)\n        copy = tree.copy()\n        while tree.next():\n            copy.next()\n            for j in range(ts.num_nodes):\n                assert tree.num_tracked_samples(j) == copy.num_tracked_samples(j)\n\n    def test_copy_multiple_roots(self):\n        ts = msprime.simulate(20, recombination_rate=2, length=3, random_seed=42)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for root_threshold in [1, 2, 100]:\n            tree = tskit.Tree(ts, root_threshold=root_threshold)\n            copy = tree.copy()\n            assert copy.roots == tree.roots\n            assert copy.root_threshold == root_threshold\n            while tree.next():\n                copy = tree.copy()\n                assert copy.roots == tree.roots\n                assert copy.root_threshold == root_threshold\n            copy = tree.copy()\n            assert copy.roots == tree.roots\n            assert copy.root_threshold == root_threshold\n\n    def test_map_mutations(self):\n        ts = msprime.simulate(5, random_seed=42)\n        tree = ts.first()\n        genotypes = np.zeros(5, dtype=np.int8)\n        alleles = [str(j) for j in range(64)]\n        ancestral_state, transitions = tree.map_mutations(genotypes, alleles)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 0\n        for j in range(1, 64):\n            genotypes[0] = j\n            ancestral_state, transitions = tree.map_mutations(genotypes, alleles)\n            assert ancestral_state == \"0\"\n            assert len(transitions) == 1\n        for j in range(64, 67):\n            genotypes[0] = j\n            with pytest.raises(ValueError):\n                tree.map_mutations(genotypes, alleles)\n        tree.map_mutations([0] * 5, alleles)\n        tree.map_mutations(np.zeros(5, dtype=int), alleles)\n\n    def test_sample_count_deprecated(self):\n        ts = msprime.simulate(5, random_seed=42)\n        with warnings.catch_warnings(record=True) as w:\n            ts.trees(sample_counts=True)\n            assert len(w) == 1\n            assert issubclass(w[0].category, RuntimeWarning)\n\n        with warnings.catch_warnings(record=True) as w:\n            tskit.Tree(ts, sample_counts=False)\n            assert len(w) == 1\n            assert issubclass(w[0].category, RuntimeWarning)\n\n    def test_node_edges(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=42)\n        assert ts.num_trees > 2\n        edge_table = ts.tables.edges\n        for tree in ts.trees():\n            nodes = set(tree.nodes())\n            midpoint = sum(tree.interval) / 2\n            # mapping = tree._node_edges()\n            mapping = tree.edge_array\n            for node, edge in enumerate(mapping):\n                if node in nodes and tree.parent(node) != tskit.NULL:\n                    edge_above_node = np.where(\n                        np.logical_and.reduce(\n                            (\n                                edge_table.child == node,\n                                edge_table.left < midpoint,\n                                edge_table.right > midpoint,\n                            )\n                        )\n                    )[0]\n                    assert len(edge_above_node) == 1\n                    assert edge_above_node[0] == edge\n                else:\n                    assert edge == tskit.NULL\n\n\nclass TestSiblings:\n    def test_balanced_binary_tree(self):\n        t = tskit.Tree.generate_balanced(num_leaves=3)\n        assert t.has_single_root\n        # Nodes 0 to 2 are leaves\n        for u in range(2):\n            assert t.is_leaf(u)\n        assert t.siblings(0) == (3,)\n        assert t.siblings(1) == (2,)\n        assert t.siblings(2) == (1,)\n        # Node 3 is the internal node\n        assert t.is_internal(3)\n        assert t.siblings(3) == (0,)\n        # Node 4 is the root\n        assert 4 == t.root\n        assert t.siblings(4) == tuple()\n        # Node 5 is the virtual root\n        assert 5 == t.virtual_root\n        assert t.siblings(5) == tuple()\n\n    def test_star(self):\n        t = tskit.Tree.generate_star(num_leaves=3)\n        assert t.has_single_root\n        # Nodes 0 to 2 are leaves\n        for u in range(2):\n            assert t.is_leaf(u)\n        assert t.siblings(0) == (1, 2)\n        assert t.siblings(1) == (0, 2)\n        assert t.siblings(2) == (0, 1)\n        # Node 3 is the root\n        assert 3 == t.root\n        assert t.siblings(3) == tuple()\n        # Node 4 is the virtual root\n        assert 4 == t.virtual_root\n        assert t.siblings(4) == tuple()\n\n    def test_multiroot_tree(self):\n        ts = tskit.Tree.generate_balanced(4, arity=2).tree_sequence\n        t = ts.decapitate(ts.node(5).time).first()\n        assert t.has_multiple_roots\n        # Nodes 0 to 3 are leaves\n        assert t.siblings(0) == (1,)\n        assert t.siblings(1) == (0,)\n        assert t.siblings(2) == (3,)\n        assert t.siblings(3) == (2,)\n        # Nodes 4 and 5 are both roots\n        assert 4 in t.roots\n        assert t.siblings(4) == (5,)\n        assert 5 in t.roots\n        assert t.siblings(5) == (4,)\n        # Node 7 is the virtual root\n        assert 7 == t.virtual_root\n        assert t.siblings(7) == tuple()\n\n    @pytest.mark.parametrize(\"flag,expected\", [(0, ()), (1, (2,))])\n    def test_isolated_node(self, flag, expected):\n        tables = tskit.Tree.generate_balanced(2, arity=2).tree_sequence.dump_tables()\n        tables.nodes.add_row(flags=flag)  # Add node 3\n        t = tables.tree_sequence().first()\n        assert t.is_isolated(3)\n        assert t.siblings(3) == expected\n\n\nclass TestNodeOrdering(HighLevelTestCase):\n    \"\"\"\n    Verify that we can use any node ordering for internal nodes\n    and get the same topologies.\n    \"\"\"\n\n    num_random_permutations = 10\n\n    def verify_tree_sequences_equal(self, ts1, ts2, approximate=False):\n        assert ts1.get_num_trees() == ts2.get_num_trees()\n        assert ts1.get_sample_size() == ts2.get_sample_size()\n        assert ts1.get_num_nodes() == ts2.get_num_nodes()\n        j = 0\n        for r1, r2 in zip(ts1.edges(), ts2.edges()):\n            assert r1.parent == r2.parent\n            assert r1.child == r2.child\n            if approximate:\n                assert r1.left == pytest.approx(r2.left)\n                assert r1.right == pytest.approx(r2.right)\n            else:\n                assert r1.left == r2.left\n                assert r1.right == r2.right\n            j += 1\n        assert ts1.num_edges == j\n        j = 0\n        for n1, n2 in zip(ts1.nodes(), ts2.nodes()):\n            assert n1.metadata == n2.metadata\n            assert n1.population == n2.population\n            if approximate:\n                assert n1.time == pytest.approx(n2.time)\n            else:\n                assert n1.time == n2.time\n            j += 1\n        assert ts1.num_nodes == j\n\n    def verify_random_permutation(self, ts):\n        n = ts.sample_size\n        node_map = {}\n        for j in range(n):\n            node_map[j] = j\n        internal_nodes = list(range(n, ts.num_nodes))\n        random.shuffle(internal_nodes)\n        for j, node in enumerate(internal_nodes):\n            node_map[n + j] = node\n        other_tables = tskit.TableCollection(ts.sequence_length)\n        # Insert the new nodes into the table.\n        inv_node_map = {v: k for k, v in node_map.items()}\n        for j in range(ts.num_nodes):\n            node = ts.node(inv_node_map[j])\n            other_tables.nodes.append(node)\n        for e in ts.edges():\n            other_tables.edges.append(\n                e.replace(parent=node_map[e.parent], child=node_map[e.child])\n            )\n        for _ in range(ts.num_populations):\n            other_tables.populations.add_row()\n        other_tables.sort()\n        other_ts = other_tables.tree_sequence()\n\n        assert ts.get_num_trees() == other_ts.get_num_trees()\n        assert ts.get_sample_size() == other_ts.get_sample_size()\n        assert ts.get_num_nodes() == other_ts.get_num_nodes()\n        j = 0\n        for t1, t2 in zip(ts.trees(), other_ts.trees()):\n            # Verify the topologies are identical. We do this by traversing\n            # upwards to the root for every sample and checking if we map to\n            # the correct node and time.\n            for u in range(n):\n                v_orig = u\n                v_map = u\n                while v_orig != tskit.NULL:\n                    assert node_map[v_orig] == v_map\n                    assert t1.get_time(v_orig) == t2.get_time(v_map)\n                    v_orig = t1.get_parent(v_orig)\n                    v_map = t2.get_parent(v_map)\n                assert v_orig == tskit.NULL\n                assert v_map == tskit.NULL\n            j += 1\n        assert j == ts.get_num_trees()\n        # Verify we can dump this new tree sequence OK.\n        with tempfile.TemporaryDirectory() as tempdir:\n            temp_file = pathlib.Path(tempdir) / \"tmp.trees\"\n            other_ts.dump(temp_file)\n            ts3 = tskit.load(temp_file)\n        self.verify_tree_sequences_equal(other_ts, ts3)\n        nodes_file = io.StringIO()\n        edges_file = io.StringIO()\n        # Also verify we can read the text version.\n        other_ts.dump_text(nodes=nodes_file, edges=edges_file, precision=14)\n        nodes_file.seek(0)\n        edges_file.seek(0)\n        ts3 = tskit.load_text(nodes_file, edges_file)\n        self.verify_tree_sequences_equal(other_ts, ts3, True)\n\n    def test_single_locus(self):\n        ts = msprime.simulate(7)\n        for _ in range(self.num_random_permutations):\n            self.verify_random_permutation(ts)\n\n    def test_multi_locus(self):\n        ts = msprime.simulate(20, recombination_rate=10)\n        for _ in range(self.num_random_permutations):\n            self.verify_random_permutation(ts)\n\n    def test_nonbinary(self):\n        ts = msprime.simulate(\n            sample_size=20,\n            recombination_rate=10,\n            demographic_events=[\n                msprime.SimpleBottleneck(time=0.5, population=0, proportion=1)\n            ],\n        )\n        # Make sure this really has some non-binary nodes\n        found = False\n        for t in ts.trees():\n            for u in t.nodes():\n                if len(t.children(u)) > 2:\n                    found = True\n                    break\n            if found:\n                break\n        assert found\n        for _ in range(self.num_random_permutations):\n            self.verify_random_permutation(ts)\n\n\ndef assert_trees_identical(t1, t2):\n    assert t1.tree_sequence == t2.tree_sequence\n    assert t1.index == t2.index\n    assert_array_equal(t1.parent_array, t2.parent_array)\n    assert_array_equal(t1.left_child_array, t2.left_child_array)\n    assert_array_equal(t1.left_sib_array, t2.left_sib_array)\n    assert_array_equal(t1.right_child_array, t2.right_child_array)\n    assert_array_equal(t1.right_sib_array, t2.right_sib_array)\n\n\ndef assert_trees_equivalent(t1, t2):\n    assert t1.tree_sequence == t2.tree_sequence\n    assert t1.index == t2.index\n    assert_array_equal(t1.parent_array, t2.parent_array)\n    assert_array_equal(t1.edge_array, t2.edge_array)\n    for u in range(t1.tree_sequence.num_nodes):\n        # this isn't fully testing the data model, but that's done elsewhere\n        assert sorted(t1.children(u)) == sorted(t2.children(u))\n\n\ndef assert_same_tree_different_order(t1, t2):\n    assert t1.tree_sequence == t2.tree_sequence\n    assert t1.index == t2.index\n    assert np.all(t1.parent_array == t2.parent_array)\n    assert not np.all(t1.left_child_array == t2.left_child_array)\n\n\ndef seek(tree, x):\n    \"\"\"\n    Python implementation of the seek algorithm. Useful for developing\n    tests.\n    \"\"\"\n    L = tree.tree_sequence.sequence_length\n    t_l, t_r = tree.interval\n    if x < t_l:\n        # |-----|-----|========|---------|\n        # 0     x    t_l      t_r        L\n        distance_left = t_l - x\n        distance_right = L - t_r + x\n    else:\n        # |------|========|------|-------|\n        # 0     t_l      t_r     x       L\n        distance_right = x - t_r\n        distance_left = t_l + L - x\n    if distance_right <= distance_left:\n        while not (tree.interval.left <= x < tree.interval.right):\n            tree.next()\n    else:\n        while not (tree.interval.left <= x < tree.interval.right):\n            tree.prev()\n\n\nclass TestSeekDirection:\n    \"\"\"\n    Test if we seek in the correct direction according to our hueristics.\n    \"\"\"\n\n    # 2.00┊       ┊   4   ┊   4   ┊   4   ┊\n    #     ┊       ┊ ┏━┻┓  ┊  ┏┻━┓ ┊  ┏┻━┓ ┊\n    # 1.00┊   3   ┊ ┃  3  ┊  3  ┃ ┊  3  ┃ ┊\n    #     ┊ ┏━╋━┓ ┊ ┃ ┏┻┓ ┊ ┏┻┓ ┃ ┊ ┏┻┓ ┃ ┊\n    # 0.00┊ 0 1 2 ┊ 0 1 2 ┊ 0 2 1 ┊ 0 1 2 ┊\n    #     0       1       2       3       4\n    @tests.cached_example\n    def ts(self):\n        return tsutil.all_trees_ts(3)\n\n    def get_tree_pair(self):\n        ts = self.ts()\n        t1 = tskit.Tree(ts)\n        t2 = tskit.Tree(ts)\n        # # Note: for development we can monkeypatch in the Python implementation\n        # # above like this:\n        # import functools\n        # t2.seek = functools.partial(seek, t2)\n        return t1, t2\n\n    @pytest.mark.parametrize(\"index\", range(4))\n    def test_index_from_different_directions(self, index):\n        # Check that we get different orderings of the children arrays\n        # for all trees when we go in different directions.\n        t1, t2 = self.get_tree_pair()\n        while t1.index != index:\n            t1.next()\n        while t2.index != index:\n            t2.prev()\n        assert_same_tree_different_order(t1, t2)\n\n    @pytest.mark.parametrize(\"position\", [0, 1, 2, 3])\n    def test_seek_from_null(self, position):\n        t1, t2 = self.get_tree_pair()\n        t1.clear()\n        t1.seek(position)\n        t2.first()\n        t2.seek(position, skip=False)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"position\", [0, 1, 2, 3])\n    def test_skip_from_null(self, position):\n        t1, t2 = self.get_tree_pair()\n        t1.clear()\n        t1.seek(position)\n        t2.first()\n        t2.seek(position, skip=True)\n        assert_trees_equivalent(t1, t2)\n\n    @pytest.mark.parametrize(\"index\", range(3))\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_next_tree(self, index, skip):\n        t1, t2 = self.get_tree_pair()\n        while t1.index != index:\n            t1.next()\n            t2.next()\n        t1.next()\n        t2.seek(index + 1, skip=skip)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"index\", [3, 2, 1])\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_prev_tree(self, index, skip):\n        t1, t2 = self.get_tree_pair()\n        while t1.index != index:\n            t1.prev()\n            t2.prev()\n        t1.prev()\n        t2.seek(index - 1, skip=skip)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_1_from_0(self, skip):\n        t1, t2 = self.get_tree_pair()\n        t1.first()\n        t1.next()\n        t2.first()\n        t2.seek(1, skip)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_1_5_from_0(self, skip):\n        t1, t2 = self.get_tree_pair()\n        t1.first()\n        t1.next()\n        t2.first()\n        t2.seek(1.5, skip)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_1_5_from_1(self, skip):\n        t1, t2 = self.get_tree_pair()\n        for _ in range(2):\n            t1.next()\n            t2.next()\n        t2.seek(1.5, skip)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_3_from_null(self, skip):\n        t1, t2 = self.get_tree_pair()\n        t1.last()\n        t2.seek(3, skip)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"skip\", [False, True])\n    def test_seek_3_from_null_prev(self, skip):\n        t1, t2 = self.get_tree_pair()\n        t1.last()\n        t1.prev()\n        t2.seek(3, skip)\n        t2.prev()\n        assert_trees_identical(t1, t2)\n\n    def test_seek_3_from_0(self):\n        t1, t2 = self.get_tree_pair()\n        t1.last()\n        t2.first()\n        t2.seek(3)\n        assert_trees_identical(t1, t2)\n\n    def test_skip_3_from_0(self):\n        t1, t2 = self.get_tree_pair()\n        t1.last()\n        t2.first()\n        t2.seek(3, True)\n        assert_trees_equivalent(t1, t2)\n\n    def test_skip_0_from_3(self):\n        t1, t2 = self.get_tree_pair()\n        t1.last()\n        t1.first()\n        t2.last()\n        t2.seek(0, True)\n        assert_trees_equivalent(t1, t2)\n\n    def test_seek_0_from_3(self):\n        t1, t2 = self.get_tree_pair()\n        t1.last()\n        t1.first()\n        t2.last()\n        t2.seek(0)\n        assert_trees_identical(t1, t2)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_seek_mid_null_and_middle(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        mid = breakpoints[:-1] + np.diff(breakpoints) / 2\n        for index, x in enumerate(mid[:-1]):\n            t1 = tskit.Tree(ts)\n            t1.seek(x)\n            # Also seek to this point manually to make sure we're not\n            # reusing the seek from null under the hood.\n            t2 = tskit.Tree(ts)\n            if index <= ts.num_trees / 2:\n                while t2.index != index:\n                    t2.next()\n            else:\n                while t2.index != index:\n                    t2.prev()\n            assert_trees_equivalent(t1, t2)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_seek_skip_middle(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        mid = breakpoints[:-1] + np.diff(breakpoints) / 2\n        for _, x in enumerate(mid[:-1]):\n            t1 = tskit.Tree(ts)\n            t1.seek(x, skip=False)\n            t2 = tskit.Tree(ts)\n            t2.seek(x, skip=True)\n            assert_trees_equivalent(t1, t2)\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_seek_last_then_prev(self, ts):\n        t1 = tskit.Tree(ts)\n        t1.seek(ts.sequence_length - 0.00001)\n        assert t1.index == ts.num_trees - 1\n        t2 = tskit.Tree(ts)\n        t2.prev()\n        assert_trees_identical(t1, t2)\n        t1.prev()\n        t2.prev()\n        assert_trees_identical(t1, t2)\n\n\nclass TestSeek:\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_new_seek_breakpoints(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        for index, left in enumerate(breakpoints[:-1]):\n            tree = tskit.Tree(ts)\n            tree.seek(left)\n            assert tree.index == index\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_new_seek_mid(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        mid = breakpoints[:-1] + np.diff(breakpoints) / 2\n        for index, left in enumerate(mid[:-1]):\n            tree = tskit.Tree(ts)\n            tree.seek(left)\n            assert tree.index == index\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_same_seek_breakpoints(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        tree = tskit.Tree(ts)\n        for index, left in enumerate(breakpoints[:-1]):\n            tree.seek(left)\n            assert tree.index == index\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_new_seek_breakpoints_reversed(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        for index, left in reversed(list(enumerate(breakpoints[:-1]))):\n            tree = tskit.Tree(ts)\n            tree.seek(left)\n            assert tree.index == index\n\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_same_seek_breakpoints_reversed(self, ts):\n        breakpoints = ts.breakpoints(as_array=True)\n        tree = tskit.Tree(ts)\n        for index, left in reversed(list(enumerate(breakpoints[:-1]))):\n            tree.seek(left)\n            assert tree.index == index\n\n    def test_example(self):\n        L = 10\n        ts = msprime.simulate(10, recombination_rate=3, length=L, random_seed=42)\n        assert ts.num_trees > 5\n        same_tree = tskit.Tree(ts)\n        for j in range(L):\n            for tree in [same_tree, tskit.Tree(ts)]:\n                tree.seek(j)\n                index = tree.index\n                assert tree.interval.left <= j < tree.interval.right\n                tree.seek(tree.interval.left)\n                assert tree.index == index\n                if tree.interval.right < L:\n                    tree.seek(tree.interval.right)\n                    assert tree.index == index + 1\n        for j in reversed(range(L)):\n            for tree in [same_tree, tskit.Tree(ts)]:\n                tree.seek(j)\n                assert tree.interval.left <= j < tree.interval.right\n\n    def test_errors(self, ts_fixture):\n        L = ts_fixture.sequence_length\n        tree = tskit.Tree(ts_fixture)\n        for bad_position in [-1, L, L + 1, -L]:\n            with pytest.raises(ValueError):\n                tree.seek(bad_position)\n\n\nclass SimpleContainersMixin:\n    \"\"\"\n    Tests for the SimpleContainer classes.\n    \"\"\"\n\n    def test_equality(self):\n        c1, c2 = self.get_instances(2)\n        assert c1 == c1\n        assert not (c1 == c2)\n        assert not (c1 != c1)\n        assert c1 != c2\n        (c3,) = self.get_instances(1)\n        assert c1 == c3\n        assert not (c1 != c3)\n\n    def test_repr(self):\n        (c,) = self.get_instances(1)\n        assert len(repr(c)) > 0\n\n\nclass SimpleContainersWithMetadataMixin:\n    \"\"\"\n    Tests for the SimpleContainerWithMetadata classes.\n    \"\"\"\n\n    def test_metadata(self):\n        # Test decoding\n        instances = self.get_instances(5)\n        for j, inst in enumerate(instances):\n            assert inst.metadata == (\"x\" * j) + \"decoded\"\n\n        # Decoder doesn't effect equality\n        (inst,) = self.get_instances(1)\n        (inst2,) = self.get_instances(1)\n        assert inst == inst2\n        inst._metadata = \"different\"\n        assert inst != inst2\n\n    def test_decoder_run_once(self):\n        # For a given instance, the decoded metadata should be cached, with the decoder\n        # called once\n        (inst,) = self.get_instances(1)\n        times_run = 0\n\n        # Hack in a tracing decoder\n        def decoder(m):\n            nonlocal times_run\n            times_run += 1\n            return m.decode() + \"decoded\"\n\n        inst._metadata_decoder = decoder\n        assert times_run == 0\n        _ = inst.metadata\n        assert times_run == 1\n        _ = inst.metadata\n        assert times_run == 1\n\n\nclass TestIndividualContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Individual(\n                id=j,\n                flags=j,\n                location=[j],\n                parents=[j],\n                nodes=[j],\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n            )\n            for j in range(n)\n        ]\n\n\nclass TestNodeContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Node(\n                id=j,\n                flags=j,\n                time=j,\n                population=j,\n                individual=j,\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n            )\n            for j in range(n)\n        ]\n\n\nclass TestEdgeContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Edge(\n                left=j,\n                right=j,\n                parent=j,\n                child=j,\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n                id=j,\n            )\n            for j in range(n)\n        ]\n\n\nclass TestSiteContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Site(\n                id=j,\n                position=j,\n                ancestral_state=\"A\" * j,\n                mutations=TestMutationContainer().get_instances(j),\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n            )\n            for j in range(n)\n        ]\n\n\nclass TestMutationContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Mutation(\n                id=j,\n                site=j,\n                node=j,\n                time=j,\n                derived_state=\"A\" * j,\n                parent=j,\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n            )\n            for j in range(n)\n        ]\n\n    def test_nan_equality(self):\n        a = tskit.Mutation(\n            id=42,\n            site=42,\n            node=42,\n            time=UNKNOWN_TIME,\n            derived_state=\"A\" * 42,\n            parent=42,\n            metadata=b\"x\" * 42,\n            metadata_decoder=lambda m: m.decode() + \"decoded\",\n        )\n        b = tskit.Mutation(\n            id=42,\n            site=42,\n            node=42,\n            derived_state=\"A\" * 42,\n            parent=42,\n            metadata=b\"x\" * 42,\n            metadata_decoder=lambda m: m.decode() + \"decoded\",\n        )\n        c = tskit.Mutation(\n            id=42,\n            site=42,\n            node=42,\n            time=math.nan,\n            derived_state=\"A\" * 42,\n            parent=42,\n            metadata=b\"x\" * 42,\n            metadata_decoder=lambda m: m.decode() + \"decoded\",\n        )\n        assert a == a\n        assert a == b\n        assert not (a == c)\n        assert not (b == c)\n        assert not (a != a)\n        assert not (a != b)\n        assert a != c\n        assert c != c\n        assert not (c == c)\n\n\nclass TestMigrationContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Migration(\n                id=j,\n                left=j,\n                right=j,\n                node=j,\n                source=j,\n                dest=j,\n                time=j,\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n            )\n            for j in range(n)\n        ]\n\n\nclass TestPopulationContainer(SimpleContainersMixin, SimpleContainersWithMetadataMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Population(\n                id=j,\n                metadata=b\"x\" * j,\n                metadata_decoder=lambda m: m.decode() + \"decoded\",\n            )\n            for j in range(n)\n        ]\n\n\nclass TestProvenanceContainer(SimpleContainersMixin):\n    def get_instances(self, n):\n        return [\n            tskit.Provenance(id=j, timestamp=\"x\" * j, record=\"y\" * j) for j in range(n)\n        ]\n\n\nclass TestEdgesetContainer(SimpleContainersMixin):\n    def get_instances(self, n):\n        return [tskit.Edgeset(left=j, right=j, parent=j, children=j) for j in range(n)]\n\n\nclass TestContainersAppend:\n    def test_containers_append(self, ts_fixture):\n        \"\"\"\n        Test that the containers work with `Table.append`\n        \"\"\"\n        tables = ts_fixture.dump_tables()\n        tables.clear(clear_provenance=True)\n        for table_name in tskit.TABLE_NAMES:\n            table = getattr(tables, table_name)\n            for i in range(len(getattr(ts_fixture.tables, table_name))):\n                table.append(getattr(ts_fixture, table_name[:-1])(i))\n        ts_fixture.tables.assert_equals(tables)\n\n\nclass TestTskitConversionOutput(unittest.TestCase):\n    \"\"\"\n    Tests conversion output to ensure it is correct.\n    \"\"\"\n\n    @classmethod\n    def setUpClass(cls):\n        ts = msprime.simulate(\n            length=1,\n            recombination_rate=2,\n            mutation_rate=2,\n            random_seed=1,\n            migration_matrix=[[0, 1], [1, 0]],\n            population_configurations=[\n                msprime.PopulationConfiguration(5) for _ in range(2)\n            ],\n            record_migrations=True,\n        )\n        assert ts.num_migrations > 0\n        cls._tree_sequence = tsutil.insert_random_ploidy_individuals(ts)\n\n    def test_macs(self):\n        output = self._tree_sequence.to_macs().splitlines()\n        assert output[0].startswith(\"COMMAND:\")\n        assert output[1].startswith(\"SEED:\")\n        assert len(output) == 2 + self._tree_sequence.get_num_mutations()\n        n = self._tree_sequence.get_sample_size()\n        m = self._tree_sequence.get_sequence_length()\n        sites = list(self._tree_sequence.sites())\n        haplotypes = list(self._tree_sequence.haplotypes())\n        for site_id, line in enumerate(output[2:]):\n            splits = line.split()\n            assert splits[0] == \"SITE:\"\n            assert int(splits[1]) == site_id\n            position = sites[site_id].position / m\n            self.assertAlmostEqual(float(splits[2]), position)\n            col = splits[4]\n            assert len(col) == n\n            for j in range(n):\n                assert col[j] == haplotypes[j][site_id]\n\n    def test_macs_error(self):\n        tables = tskit.TableCollection(1)\n        tables.sites.add_row(position=0.5, ancestral_state=\"A\")\n        tables.nodes.add_row(time=1, flags=tskit.NODE_IS_SAMPLE)\n        tables.mutations.add_row(node=0, site=0, derived_state=\"FOO\")\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            ValueError, match=\"macs output only supports single letter alleles\"\n        ):\n            ts.to_macs()\n\n\nclass TestTreeSequenceGetSite:\n    \"\"\"\n    Tests for getting Site objects from a TreeSequence object\n    by specifying the position.\n    \"\"\"\n\n    def get_example_ts_discrete_coordinates(self):\n        tables = tskit.TableCollection(sequence_length=10)\n        tables.sites.add_row(position=3, ancestral_state=\"A\")\n        tables.sites.add_row(position=5, ancestral_state=\"C\")\n        tables.sites.add_row(position=7, ancestral_state=\"G\")\n        return tables.tree_sequence()\n\n    def get_example_ts_continuous_coordinates(self):\n        tables = tskit.TableCollection(sequence_length=10)\n        tables.sites.add_row(position=0.5, ancestral_state=\"A\")\n        tables.sites.add_row(position=6.2, ancestral_state=\"C\")\n        tables.sites.add_row(position=8.3, ancestral_state=\"T\")\n        return tables.tree_sequence()\n\n    def get_example_ts_without_sites(self):\n        tables = tskit.TableCollection(sequence_length=10)\n        return tables.tree_sequence()\n\n    @pytest.mark.parametrize(\"id_\", [0, 1, 2])\n    def test_site_id(self, id_):\n        ts = self.get_example_ts_discrete_coordinates()\n        site = ts.site(id_)\n        assert site.id == id_\n\n    @pytest.mark.parametrize(\"position\", [3, 5, 7])\n    def test_position_discrete_coordinates(self, position):\n        ts = self.get_example_ts_discrete_coordinates()\n        site = ts.site(position=position)\n        assert site.position == position\n\n    @pytest.mark.parametrize(\"position\", [0.5, 6.2, 8.3])\n    def test_position_continuous_coordinates(self, position):\n        ts = self.get_example_ts_continuous_coordinates()\n        site = ts.site(position=position)\n        assert site.position == position\n\n    @pytest.mark.parametrize(\"position\", [0, 2.999999999, 5.000000001, 9])\n    def test_position_not_found(self, position):\n        with pytest.raises(ValueError, match=r\"There is no site at position\"):\n            ts = self.get_example_ts_discrete_coordinates()\n            ts.site(position=position)\n\n    @pytest.mark.parametrize(\n        \"position\",\n        [\n            np.array([3], dtype=float)[0],\n            np.array([3], dtype=int)[0],\n            decimal.Decimal(3),\n        ],\n    )\n    def test_position_good_type(self, position):\n        ts = self.get_example_ts_discrete_coordinates()\n        ts.site(position=position)\n\n    def test_position_not_scalar(self):\n        with pytest.raises(\n            ValueError, match=\"Position must be provided as a scalar value.\"\n        ):\n            ts = self.get_example_ts_discrete_coordinates()\n            ts.site(position=[1, 4, 8])\n\n    @pytest.mark.parametrize(\"position\", [-1, 10, 11])\n    def test_position_out_of_bounds(self, position):\n        with pytest.raises(\n            ValueError,\n            match=\"Position is beyond the coordinates defined by sequence length.\",\n        ):\n            ts = self.get_example_ts_discrete_coordinates()\n            ts.site(position=position)\n\n    def test_query_position_siteless_ts(self):\n        with pytest.raises(ValueError, match=r\"There is no site at position\"):\n            ts = self.get_example_ts_without_sites()\n            ts.site(position=1)\n\n    def test_site_id_and_position_are_none(self):\n        with pytest.raises(TypeError, match=\"Site id or position must be provided.\"):\n            ts = self.get_example_ts_discrete_coordinates()\n            ts.site(None, position=None)\n\n    def test_site_id_and_position_are_specified(self):\n        ts = self.get_example_ts_discrete_coordinates()\n        with pytest.raises(\n            TypeError, match=\"Only one of site id or position needs to be provided.\"\n        ):\n            ts.site(0, position=3)\n\n\ndef num_lineages_definition(tree, t):\n    lineages = 0\n    for u in tree.nodes():\n        v = tree.parent(u)\n        if v != tskit.NULL:\n            if tree.time(u) <= t < tree.time(v):\n                lineages += 1\n    return lineages\n\n\nclass TestNumLineages:\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_tree_midpoint_definition(self, ts):\n        t = 0\n        if ts.num_nodes > 0:\n            t = np.max(ts.tables.nodes.time) / 2\n        tree = ts.first()\n        assert tree.num_lineages(t) == num_lineages_definition(tree, t)\n\n    @pytest.mark.parametrize(\"t\", [-np.inf, np.inf, np.nan])\n    def test_nonfinite_time(self, t):\n        tree = tskit.Tree.generate_balanced(2)\n        with pytest.raises(tskit.LibraryError, match=\"NONFINITE\"):\n            tree.num_lineages(t)\n\n    @pytest.mark.parametrize(\"t\", [1, 1.0, np.array([1.0])[0]])\n    def test_number_types(self, t):\n        tree = tskit.Tree.generate_balanced(2)\n        assert tree.num_lineages(t) == 0\n\n    # 2.00┊        12         ┊\n    #     ┊   ┏━━━━━╋━━━━━┓   ┊\n    # 1.00┊   9    10    11   ┊\n    #     ┊ ┏━╋━┓ ┏━╋━┓ ┏━╋━┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 ┊\n    #     0                   1\n    @pytest.mark.parametrize(\n        [\"t\", \"expected\"],\n        [\n            (-0.00001, 0),\n            (0, 9),\n            (0.0000001, 9),\n            (0.99999, 9),\n            (1, 3),\n            (1.999999, 3),\n            (2, 0),\n            (2.000001, 0),\n        ],\n    )\n    def test_balanced_ternary(self, t, expected):\n        tree = tskit.Tree.generate_balanced(9, arity=3)\n        assert tree.num_lineages(t) == expected\n\n    # 3.00┊            15     ┊\n    #     ┊          ┏━━┻━┓   ┊\n    # 2.00┊   11     ┃   14   ┊\n    #     ┊  ┏━┻━┓   ┃  ┏━┻┓  ┊\n    # 1.00┊  9  10  12  ┃ 13  ┊\n    #     ┊ ┏┻┓ ┏┻┓ ┏┻┓ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 ┊\n    #     0                   1\n    @pytest.mark.parametrize(\n        [\"t\", \"expected\"],\n        [\n            (-0.00001, 0),\n            (0, 9),\n            (0.0000001, 9),\n            (0.99999, 9),\n            (1, 5),\n            (1.999999, 5),\n            (2, 2),\n            (2.000001, 2),\n            (3.00000, 0),\n            (5.00000, 0),\n        ],\n    )\n    def test_multiroot_different_times(self, t, expected):\n        tables = tskit.Tree.generate_balanced(9, arity=2).tree_sequence.dump_tables()\n        edges = tables.edges.copy()\n        tables.edges.clear()\n        for edge in edges:\n            if edge.parent != 16:\n                tables.edges.append(edge)\n        ts = tables.tree_sequence()\n        tree = ts.first()\n        assert tree.num_lineages(t) == expected\n\n    # 4.00┊   8       ┊\n    #     ┊ ┏━┻━┓     ┊\n    # 3.00┊ 0   7     ┊\n    #     ┊   ┏━┻━┓   ┊\n    # 2.00┊   1   6   ┊\n    #     ┊     ┏━┻┓  ┊\n    # 1.00┊     2  5  ┊\n    #     ┊       ┏┻┓ ┊\n    # 0.00┊       3 4 ┊\n    #     0           1\n    @pytest.mark.parametrize(\n        [\"t\", \"expected\"],\n        [\n            (-0.00001, 0),\n            (0, 2),\n            (1, 2),\n            (2, 2),\n            (3, 2),\n            (4, 0),\n        ],\n    )\n    def test_comb_different_leaf_times(self, t, expected):\n        tables = tskit.Tree.generate_comb(5).tree_sequence.dump_tables()\n        time = tables.nodes.time\n        time[2] = 1\n        time[1] = 2\n        time[0] = 3\n        tables.nodes.time = time\n        ts = tables.tree_sequence()\n        tree = ts.first()\n        assert tree.num_lineages(t) == expected\n\n    @pytest.mark.parametrize(\n        [\"t\", \"expected\"],\n        [\n            (-0.00001, 0),\n            (0, 0),\n            (1, 0),\n            (2, 0),\n            (3, 0),\n        ],\n    )\n    def test_missing_data_different_times(self, t, expected):\n        tables = tskit.TableCollection(1)\n        for j in range(3):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=j)\n        ts = tables.tree_sequence()\n        tree = ts.first()\n        assert tree.num_lineages(t) == expected\n\n\n@pytest.fixture\ndef struct_metadata_ts(ts_fixture):\n    schema = metadata.MetadataSchema(\n        {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                \"name\": {\"type\": \"string\", \"binaryFormat\": \"10s\"},\n                \"value\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                \"active\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n            },\n        }\n    )\n    tables = ts_fixture.dump_tables()\n    for table_name in TestStructuredNumpyMetadata.metadata_tables:\n        table = getattr(tables, table_name)\n        table.metadata_schema = schema\n        table_copy = table.copy()\n        table.clear()\n        for j, row in enumerate(table_copy):\n            table.append(\n                row.replace(\n                    metadata={\"id\": j, \"name\": \"name\", \"value\": 1.0, \"active\": True}\n                )\n            )\n    return tables.tree_sequence()\n\n\nclass TestStructuredNumpyMetadata:\n    metadata_tables = [\n        \"nodes\",\n        \"edges\",\n        \"sites\",\n        \"mutations\",\n        \"migrations\",\n        \"individuals\",\n        \"populations\",\n    ]\n\n    @pytest.mark.parametrize(\"table_name\", metadata_tables)\n    def test_not_implemented_json(self, table_name, ts_fixture):\n        with pytest.raises(NotImplementedError):\n            getattr(ts_fixture, f\"{table_name}_metadata\")\n\n    @pytest.mark.parametrize(\"table_name\", metadata_tables)\n    def test_array_attr_properties(self, struct_metadata_ts, table_name):\n        ts = struct_metadata_ts\n        attr_name = f\"{table_name}_metadata\"\n        a = getattr(ts, attr_name)\n        assert isinstance(a, np.ndarray)\n        with pytest.raises(AttributeError):\n            setattr(ts, attr_name, None)\n        with pytest.raises(AttributeError):\n            delattr(ts, attr_name)\n        with pytest.raises(ValueError, match=\"read-only\"):\n            a[:] = 1\n\n    @pytest.mark.parametrize(\"table_name\", metadata_tables)\n    def test_array_contents(self, struct_metadata_ts, table_name):\n        ts = struct_metadata_ts\n        attr_name = f\"{table_name}_metadata\"\n        a = getattr(ts, attr_name)\n        assert len(a) == getattr(ts, f\"num_{table_name}\")\n        for j, row in enumerate(a):\n            assert row[\"id\"] == j\n            assert row[\"name\"] == b\"name\"\n            assert row[\"value\"] == 1.0\n            assert row[\"active\"]\n\n    @pytest.mark.parametrize(\"table_name\", metadata_tables)\n    def test_error_if_no_schema(self, table_name):\n        ts = msprime.simulate(10)\n        with pytest.raises(NotImplementedError):\n            getattr(ts, f\"{table_name}_metadata\")\n\n\nclass TestIndividualsNodes:\n    def test_basic_individuals_nodes(self, tmp_path):\n        # Create a basic tree sequence with two individuals\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        result = ts.individuals_nodes\n        assert result.shape == (2, 2)\n        assert_array_equal(result, [[0, 1], [2, 3]])\n\n    def test_variable_ploidy(self, tmp_path):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")  # Diploid\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")  # Haploid\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")  # Triploid\n\n        # Diploid individual\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n\n        # Haploid individual\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n\n        # Triploid individual\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)\n\n        ts = tables.tree_sequence()\n\n        result = ts.individuals_nodes\n\n        assert result.shape == (3, 3)\n\n        expected = np.array(\n            [[0, 1, -1], [2, -1, -1], [3, 4, 5]]  # Diploid  # Haploid  # Triploid\n        )\n        assert_array_equal(result, expected)\n\n    def test_no_individuals(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.individuals_nodes\n        expected = np.array([], dtype=np.int32).reshape(0, 0)\n        assert result.shape == (0, 0)\n        assert_array_equal(result, expected)\n\n    def test_no_nodes_with_individuals(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        # Node without individual reference\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.individuals_nodes\n        expected = np.array([[]])\n        assert result.shape == (1, 0)\n        assert_array_equal(result, expected)\n\n    def test_individual_with_no_nodes(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        # Only add nodes for first individual\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        ts = tables.tree_sequence()\n\n        result = ts.individuals_nodes\n        expected = np.array([[0], [-1]])\n        assert result.shape == (2, 1)\n        assert_array_equal(result, expected)\n\n    def test_mixed_sample_status(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=0, time=0, individual=0)\n        ts = tables.tree_sequence()\n\n        result = ts.individuals_nodes\n        expected = np.array([[0, 1]])\n        assert result.shape == (1, 2)\n        assert_array_equal(result, expected)\n\n\nclass TestRaggedArrays:\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0 or higher\")\n    @pytest.mark.parametrize(\"num_rows\", [0, 1, 100])\n    @pytest.mark.parametrize(\"column\", [\"ancestral_state\", \"derived_state\"])\n    def test_site_ancestral_state(self, num_rows, column):\n        tables = tskit.TableCollection(sequence_length=100)\n        rng = random.Random(42)\n        for i in range(num_rows):\n            state_length = rng.randint(0, 10)\n            state = \"\".join(\n                chr(rng.randint(0x1F300, 0x1F6FF)) for _ in range(state_length)\n            )\n            if column == \"ancestral_state\":\n                tables.sites.add_row(position=i, ancestral_state=state)\n            elif column == \"derived_state\":\n                tables.nodes.add_row()\n                tables.sites.add_row(position=i, ancestral_state=\"A\")\n                tables.mutations.add_row(site=i, node=0, derived_state=state)\n        ts = tables.tree_sequence()\n        a = getattr(\n            ts,\n            (\n                \"sites_ancestral_state\"\n                if column == \"ancestral_state\"\n                else \"mutations_derived_state\"\n            ),\n        )\n        assert isinstance(a, np.ndarray)\n        assert a.shape == (num_rows,)\n        assert a.dtype == np.dtype(\"T\")\n        assert a.size == num_rows\n\n        # Check that the value is cached\n        assert a is getattr(\n            ts,\n            (\n                \"sites_ancestral_state\"\n                if column == \"ancestral_state\"\n                else \"mutations_derived_state\"\n            ),\n        )\n\n        for state, row in itertools.zip_longest(\n            a, ts.sites() if column == \"ancestral_state\" else ts.mutations()\n        ):\n            assert state == getattr(row, column)\n\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0 or higher\")\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_equality_sites_ancestral_state(self, ts):\n        assert_array_equal(\n            ts.sites_ancestral_state, [site.ancestral_state for site in ts.sites()]\n        )\n\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0 or higher\")\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_equality_mutations_derived_state(self, ts):\n        assert_array_equal(\n            ts.mutations_derived_state,\n            [mutation.derived_state for mutation in ts.mutations()],\n        )\n\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0 or higher\")\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_equality_mutations_inherited_state(self, ts):\n        assert_array_equal(\n            ts.mutations_inherited_state,\n            [mutation.inherited_state for mutation in ts.mutations()],\n        )\n\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0 or higher\")\n    @pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\n    def test_mutations_inherited_state(self, ts):\n        inherited_state = ts.mutations_inherited_state\n        assert len(inherited_state) == ts.num_mutations\n        assert isinstance(inherited_state, np.ndarray)\n        assert inherited_state.shape == (ts.num_mutations,)\n        assert inherited_state.dtype == np.dtype(\"T\")\n        assert inherited_state.size == ts.num_mutations\n\n        for mut in ts.mutations():\n            state0 = ts.site(mut.site).ancestral_state\n            if mut.parent != -1:\n                state0 = ts.mutation(mut.parent).derived_state\n            assert state0 == inherited_state[mut.id]\n\n        # Test caching - second access should return the same object\n        inherited_state2 = ts.mutations_inherited_state\n        assert inherited_state is inherited_state2\n\n    @pytest.mark.skipif(_tskit.HAS_NUMPY_2, reason=\"Test only on Numpy 1.X\")\n    @pytest.mark.parametrize(\n        \"column\",\n        [\n            \"sites_ancestral_state\",\n            \"mutations_derived_state\",\n            \"mutations_inherited_state\",\n        ],\n    )\n    def test_ragged_array_not_supported(self, column):\n        tables = tskit.TableCollection(sequence_length=100)\n        ts = tables.tree_sequence()\n\n        with pytest.raises(\n            RuntimeError,\n            match=\"requires numpy 2.0\",\n        ):\n            getattr(ts, column)\n\n    @pytest.mark.skipif(_tskit.HAS_NUMPY_2, reason=\"Test only on Numpy 1.X\")\n    def test_tables_emits_warning(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        ts = tables.tree_sequence()\n\n        with warnings.catch_warnings(record=True) as caught:\n            warnings.simplefilter(\"always\", UserWarning)\n            result = ts.tables\n\n        assert isinstance(result, tskit.TableCollection)\n        assert len(caught) == 1\n        warning = caught[0]\n        assert warning.category is UserWarning\n        assert \"Immutable table views require tskit\" in str(warning.message)\n\n\nclass TestSampleNodesByPloidy:\n    @pytest.mark.parametrize(\n        \"n_samples,ploidy,expected\",\n        [\n            (6, 2, np.array([[0, 1], [2, 3], [4, 5]])),  # Basic diploid\n            (9, 3, np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])),  # Triploid\n            (5, 1, np.array([[0], [1], [2], [3], [4]])),  # Ploidy of 1\n            (4, 4, np.array([[0, 1, 2, 3]])),  # Ploidy equals number of samples\n        ],\n    )\n    def test_various_ploidy_scenarios(self, n_samples, ploidy, expected):\n        tables = tskit.TableCollection(sequence_length=100)\n        for _ in range(n_samples):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.sample_nodes_by_ploidy(ploidy)\n        expected_shape = (n_samples // ploidy, ploidy)\n        assert result.shape == expected_shape\n        assert_array_equal(result, expected)\n\n    def test_mixed_sample_status(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=0, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=0, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.sample_nodes_by_ploidy(2)\n        assert result.shape == (2, 2)\n        expected = np.array([[0, 2], [4, 5]])\n        assert_array_equal(result, expected)\n\n    def test_no_sample_nodes(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.nodes.add_row(flags=0, time=0)\n        tables.nodes.add_row(flags=0, time=0)\n        ts = tables.tree_sequence()\n\n        with pytest.raises(ValueError, match=\"No sample nodes in tree sequence\"):\n            ts.sample_nodes_by_ploidy(2)\n\n    def test_not_multiple_of_ploidy(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        for _ in range(5):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        with pytest.raises(ValueError, match=\"not a multiple of ploidy\"):\n            ts.sample_nodes_by_ploidy(2)\n\n    def test_with_existing_individuals(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        tables.individuals.add_row(flags=0, location=(0, 0), metadata=b\"\")\n        # Add nodes with individual references but in a different order\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n\n        ts = tables.tree_sequence()\n        result = ts.sample_nodes_by_ploidy(2)\n        expected = np.array([[0, 1], [2, 3]])\n        assert_array_equal(result, expected)\n        ind_nodes = ts.individuals_nodes\n        assert not np.array_equal(result, ind_nodes)\n\n    def test_different_node_flags(self):\n        tables = tskit.TableCollection(sequence_length=100)\n        OTHER_FLAG1 = 1 << 1\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.nodes.add_row(flags=OTHER_FLAG1, time=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE | OTHER_FLAG1, time=0)\n        tables.nodes.add_row()\n        ts = tables.tree_sequence()\n        result = ts.sample_nodes_by_ploidy(2)\n        assert result.shape == (1, 2)\n        assert_array_equal(result, np.array([[0, 2]]))\n\n\nclass TestMapToVcfModel:\n    def test_no_individuals_default_ploidy(self):\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        assert ts.num_individuals == 0\n\n        # Default ploidy should be 1\n        result = ts.map_to_vcf_model()\n        assert isinstance(result, tskit.VcfModelMapping)\n        assert result.individuals_nodes.shape == (4, 1)\n        for i in range(4):\n            assert result.individuals_nodes[i, 0] == i\n        assert result.individuals_name.shape == (4,)\n        for i in range(4):\n            assert result.individuals_name[i] == f\"tsk_{i}\"\n\n        with pytest.raises(\n            ValueError,\n            match=\"Cannot include non-sample nodes when individuals are not present\",\n        ):\n            ts.map_to_vcf_model(include_non_sample_nodes=True)\n\n    def test_no_individuals_custom_ploidy(self):\n        ts = tskit.Tree.generate_balanced(6).tree_sequence\n        assert ts.num_individuals == 0\n\n        # Use ploidy = 2\n        result = ts.map_to_vcf_model(ploidy=2)\n        assert isinstance(result, tskit.VcfModelMapping)\n        assert result.individuals_nodes.shape == (3, 2)\n        for i in range(3):\n            assert result.individuals_nodes[i, 0] == i * 2\n            assert result.individuals_nodes[i, 1] == i * 2 + 1\n        assert result.individuals_name.shape == (3,)\n        for i in range(3):\n            assert result.individuals_name[i] == f\"tsk_{i}\"\n\n    def test_no_individuals_uneven_ploidy(self):\n        ts = tskit.Tree.generate_balanced(5).tree_sequence\n        # This tree sequence has no individuals\n        assert ts.num_individuals == 0\n\n        # 5 samples cannot be evenly divided into ploidy=2\n        with pytest.raises(ValueError, match=\"not a multiple\"):\n            ts.map_to_vcf_model(ploidy=2)\n\n    def test_with_individuals(self):\n        ts = msprime.sim_ancestry(\n            5,\n            random_seed=42,\n        )\n        result = ts.map_to_vcf_model()\n        assert isinstance(result, tskit.VcfModelMapping)\n        assert result.individuals_nodes.shape == (5, 2)\n        assert np.array_equal(\n            result.individuals_nodes,\n            np.array([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]),\n        )\n        assert result.individuals_name.shape == (5,)\n        for i in range(5):\n            assert result.individuals_name[i] == f\"tsk_{i}\"\n\n    def test_with_individuals_and_ploidy_error(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        ts = tables.tree_sequence()\n\n        with pytest.raises(ValueError, match=\"Cannot specify ploidy when individuals\"):\n            ts.map_to_vcf_model(ploidy=2)\n\n    def test_specific_individuals(self):\n        tables = tskit.TableCollection(1.0)\n        # Create 5 individuals with varying ploidy\n        for i in range(5):\n            tables.individuals.add_row()\n            # Individuals have ploidy i+1\n            for _ in range(i + 1):\n                tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=i)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model(individuals=[1, 3])\n        assert isinstance(result, tskit.VcfModelMapping)\n        # Individual 1 has ploidy 2, individual 3 has ploidy 4\n        assert result.individuals_nodes.shape == (2, 5)\n        assert np.array_equal(result.individuals_nodes[0], [1, 2, -1, -1, -1])\n        assert np.array_equal(result.individuals_nodes[1], [6, 7, 8, 9, -1])\n\n        assert result.individuals_name.shape == (2,)\n        assert result.individuals_name[0] == \"tsk_1\"\n        assert result.individuals_name[1] == \"tsk_3\"\n\n    def test_individual_with_no_nodes(self):\n        tables = tskit.TableCollection(1.0)\n        # Individual with no nodes\n        tables.individuals.add_row()\n        # Individual with nodes\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model()\n        assert result.individuals_nodes.shape == (2, 1)\n        assert np.array_equal(result.individuals_nodes, [[-1], [0]])\n\n    def test_individual_with_no_nodes_only(self):\n        tables = tskit.TableCollection(1.0)\n        # Individual with no nodes\n        tables.individuals.add_row()\n        # Individual with nodes\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model(individuals=[0])\n        assert result.individuals_nodes.shape == (1, 1)\n        assert np.array_equal(result.individuals_nodes, [[-1]])\n\n    def test_invalid_individual_id(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        ts = tables.tree_sequence()\n\n        with pytest.raises(ValueError, match=\"Invalid individual ID\"):\n            ts.map_to_vcf_model(individuals=[-1])\n\n        with pytest.raises(ValueError, match=\"Invalid individual ID\"):\n            ts.map_to_vcf_model(individuals=[1])\n\n    def test_mixed_sample_non_sample_ordering(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=0, time=0, individual=0)  # Non-sample node\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=0, time=0, individual=0)  # Non-sample node\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=0, time=0, individual=1)  # Non-sample node\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model()\n        assert result.individuals_nodes.shape == (2, 4)\n        assert np.array_equal(\n            result.individuals_nodes,\n            np.array([[0, 2, -1, -1], [5, -1, -1, -1]]),\n        )\n\n        result = ts.map_to_vcf_model(include_non_sample_nodes=True)\n        assert result.individuals_nodes.shape == (2, 4)\n        assert np.array_equal(\n            result.individuals_nodes,\n            np.array([[0, 1, 2, 3], [4, 5, -1, -1]]),\n        )\n\n    def test_samples_without_individuals_warning(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        # Node with individual\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        # Node without individual\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=tskit.NULL)\n        ts = tables.tree_sequence()\n\n        with warnings.catch_warnings(record=True) as w:\n            ts.map_to_vcf_model()\n            assert len(w) == 1\n            assert \"At least one sample node does not have an individual ID\" in str(\n                w[0].message\n            )\n\n    def test_metadata_key_for_names(self):\n        tables = tskit.TableCollection(1.0)\n\n        # Add individuals with metadata\n        tables.individuals.metadata_schema = tskit.MetadataSchema(\n            {\n                \"codec\": \"json\",\n                \"type\": \"object\",\n                \"properties\": {\"name\": {\"type\": \"string\"}},\n            }\n        )\n        tables.individuals.add_row(metadata={\"name\": \"ind1\"})\n        tables.individuals.add_row(metadata={\"name\": \"ind2\"})\n\n        # Add nodes\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model(name_metadata_key=\"name\")\n        assert result.individuals_name.shape == (2,)\n        assert result.individuals_name[0] == \"ind1\"\n        assert result.individuals_name[1] == \"ind2\"\n\n    def test_custom_individual_names(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        custom_names = [\"individual_A\", \"individual_B\"]\n        result = ts.map_to_vcf_model(individual_names=custom_names)\n        assert result.individuals_name.shape == (2,)\n        assert result.individuals_name[0] == \"individual_A\"\n        assert result.individuals_name[1] == \"individual_B\"\n\n    def test_name_conflict_error(self):\n        tables = tskit.TableCollection(1.0)\n        ts = tables.tree_sequence()\n        with pytest.raises(\n            ValueError,\n            match=\"Cannot specify both name_metadata_key and individual_names\",\n        ):\n            ts.map_to_vcf_model(\n                name_metadata_key=\"name\", individual_names=[\"custom_name\"]\n            )\n\n    def test_name_count_mismatch_error(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)\n        ts = tables.tree_sequence()\n\n        with pytest.raises(\n            ValueError, match=\"number of individuals does not match the number of names\"\n        ):\n            ts.map_to_vcf_model(individual_names=[\"only_one_name\"])\n\n    def test_all_individuals_no_nodes(self):\n        tables = tskit.TableCollection(1.0)\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        ts = tables.tree_sequence()\n        result = ts.map_to_vcf_model()\n        assert result.individuals_nodes.shape == (2, 0)\n\n    def test_position_transform_default_and_custom(self):\n        tables = tskit.TableCollection(10.6)\n        tables.sites.add_row(position=1.3, ancestral_state=\"A\")\n        tables.sites.add_row(position=5.7, ancestral_state=\"T\")\n        tables.sites.add_row(position=9.9, ancestral_state=\"C\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model()\n        assert np.array_equal(result.transformed_positions, [1, 6, 10])\n        assert result.contig_length == 11\n\n        def floor_transform(positions):\n            return np.floor(positions).astype(int)\n\n        result = ts.map_to_vcf_model(position_transform=floor_transform)\n        assert np.array_equal(result.transformed_positions, [1, 5, 9])\n        assert result.contig_length == 10\n\n    def test_legacy_position_transform(self):\n        # Test legacy transform with duplicate positions\n        tables = tskit.TableCollection(10.0)\n        tables.sites.add_row(position=1.4, ancestral_state=\"A\")\n        tables.sites.add_row(position=1.6, ancestral_state=\"T\")\n        tables.sites.add_row(position=1.7, ancestral_state=\"T\")\n        tables.sites.add_row(position=3.2, ancestral_state=\"C\")\n        tables.sites.add_row(position=3.8, ancestral_state=\"G\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model(position_transform=\"legacy\")\n        assert np.array_equal(result.transformed_positions, [1, 2, 3, 4, 5])\n        assert result.contig_length == 10\n\n    def test_position_transform_no_sites(self):\n        tables = tskit.TableCollection(5.5)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model()\n        assert result.transformed_positions.shape == (0,)\n        assert result.contig_length == 6\n\n    def test_invalid_position_transform_return_shape(self):\n        tables = tskit.TableCollection(10.0)\n        tables.sites.add_row(position=1.0, ancestral_state=\"A\")\n        tables.sites.add_row(position=5.0, ancestral_state=\"T\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        def bad_transform(positions):\n            return np.array([1])  # Wrong length\n\n        with pytest.raises(\n            ValueError,\n            match=\"Position transform must return an array of the same length\",\n        ):\n            ts.map_to_vcf_model(position_transform=bad_transform)\n\n    def test_contig_id(self):\n        tables = tskit.TableCollection(10.0)\n        tables.sites.add_row(position=1.0, ancestral_state=\"A\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model(contig_id=\"chr1\")\n        assert result.contig_id == \"chr1\"\n\n        result = ts.map_to_vcf_model()\n        assert result.contig_id == \"1\"\n\n    def test_isolated_as_missing(self):\n        tables = tskit.TableCollection(10.0)\n        tables.sites.add_row(position=1.0, ancestral_state=\"A\")\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        ts = tables.tree_sequence()\n\n        result = ts.map_to_vcf_model(isolated_as_missing=False)\n        assert result.isolated_as_missing is False\n\n        result = ts.map_to_vcf_model()\n        assert result.isolated_as_missing is True\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_mutations_edge(ts):\n    for mut, mut_edge in itertools.zip_longest(ts.mutations(), ts.mutations_edge):\n        assert mut.edge == mut_edge\n"
  },
  {
    "path": "python/tests/test_ibd.py",
    "content": "import collections\nimport io\nimport itertools\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests\nimport tests.ibd as ibd\nimport tests.test_wright_fisher as wf\nimport tskit\nfrom tests.tsutil import get_example_tree_sequences\n\n\"\"\"\nTests of IBD finding algorithms.\n\"\"\"\n\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this. The example_ts here is intended to be the\n# basic tree sequence which should give a meaningful result for\n# most operations. Probably rename it to ``examples.simple_ts()``\n# or something.\n\n\n@tests.cached_example\ndef example_ts():\n    return [msprime.sim_ancestry(2, random_seed=1)]\n\n\ndef ibd_segments(\n    ts,\n    *,\n    within=None,\n    between=None,\n    min_span=0,\n    max_time=None,\n    compare_lib=True,\n    print_c=False,\n    print_py=False,\n    squash=False,\n):\n    \"\"\"\n    Calculates IBD segments using Python and converts output to lists of segments.\n    Also compares result with C library.\n    \"\"\"\n    ibd_f = ibd.IbdFinder(\n        ts, within=within, between=between, max_time=max_time, min_span=min_span\n    )\n    ibd_segs = ibd_f.run(squash=squash)\n    if print_py:\n        print(\"Python output:\\n\")\n        print(ibd_segs)\n    # ibd_f.print_state()\n    if compare_lib:\n        c_out = ts.ibd_segments(\n            within=within,\n            between=between,\n            max_time=max_time,\n            min_span=min_span,\n            store_segments=True,\n        )\n        if print_c:\n            print(\"C output:\\n\")\n            print(c_out)\n        assert_ibd_equal(ibd_segs, c_out)\n    return ibd_segs\n\n\ndef naive_ibd(ts, a, b):\n    \"\"\"\n    Returns the IBD segments along the genome for a and b.\n    \"\"\"\n\n    tree = ts.first()\n    mrca = tree.mrca(a, b)\n    last_mrca = mrca\n    left = 0.0\n    segs = []\n    while tree.next():\n        mrca = tree.mrca(a, b)\n        if mrca != last_mrca:\n            segs.append(tskit.IdentitySegment(left, tree.interval.left, last_mrca))\n            left = tree.interval.left\n            last_mrca = mrca\n    segs.append(tskit.IdentitySegment(left, ts.sequence_length, last_mrca))\n\n    # Filter out segments with no mrca\n    return [seg for seg in segs if seg.node != -1]\n\n\ndef naive_ibd_all_pairs(ts, samples=None):\n    samples = ts.samples() if samples is None else samples\n    all_pairs_map = {\n        (a, b): naive_ibd(ts, a, b) for a, b in itertools.combinations(samples, 2)\n    }\n    # Filter out pairs with empty segment lists\n    return {key: value for key, value in all_pairs_map.items() if len(value) > 0}\n\n\nclass TestIbdDefinition:\n    @pytest.mark.skip(\"help\")\n    @pytest.mark.xfail\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences(custom_max=15))\n    def test_all_pairs(self, ts):\n        if ts.num_samples > 10:\n            samples = ts.samples()[:10]\n            ts = ts.simplify(samples=samples)\n        else:\n            samples = ts.samples()\n        ibd_lib = ts.ibd_segments(within=samples, store_segments=True)\n        ibd_def = naive_ibd_all_pairs(ts, samples=samples)\n        assert_ibd_equal(ibd_lib, ibd_def)\n\n    @pytest.mark.skip(\"help\")\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences(custom_max=15))\n    def test_all_pairs_python_only(self, ts):\n        samples = ts.samples()[:10]\n        ibd_pylib = ibd_segments(ts, within=samples, squash=True, compare_lib=False)\n        ibd_def = naive_ibd_all_pairs(ts, samples=samples)\n        assert_ibd_equal(ibd_pylib, ibd_def)\n\n    @pytest.mark.skip(\"help\")\n    @pytest.mark.parametrize(\"N\", [2, 5, 10])\n    @pytest.mark.parametrize(\"T\", [2, 5, 10])\n    def test_wright_fisher_examples(self, N, T):\n        tables = wf.wf_sim(N, T, deep_history=False, seed=42)\n        tables.sort()\n        # NB this is essential! We get spurious breakpoints otherwise\n        tables.edges.squash()\n        tables.sort()\n        ts = tables.tree_sequence()\n        ibd0 = ibd_segments(ts, squash=True, compare_lib=False)\n        ibd1 = naive_ibd_all_pairs(ts)\n        assert_ibd_equal(ibd0, ibd1)\n\n\nclass TestIbdImplementations:\n    @pytest.mark.skip(\"help\")\n    @pytest.mark.xfail\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences(custom_max=15))\n    def test_all_pairs(self, ts):\n        # Automatically compares the two implementations\n        samples = ts.samples()[:10]\n        ts = ts.simplify(samples=samples)\n        ibd_segments(ts, squash=True)\n\n\ndef assert_ibd_equal(dict1, dict2):\n    \"\"\"\n    Verifies that two dictionaries have the same keys, and that\n    the set of items corresponding to each key is identical.\n    Used to check identical IBD output.\n    \"\"\"\n    assert len(dict1) == len(dict2)\n    for key, val in dict1.items():\n        assert key in dict2\n        assert len(val) == len(dict2[key])\n        segs1 = list(sorted(val))\n        segs2 = list(sorted(dict2[key]))\n        assert segs1 == segs2\n\n\nclass TestIbdSingleBinaryTree:\n    @tests.cached_example\n    def ts(self):\n        #\n        # 2        4\n        #         / \\\n        # 1      3   \\\n        #       / \\   \\\n        # 0    0   1   2\n        print(\"evaluating ts\")\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    # Basic test\n    def test_defaults(self):\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0.0, 1.0, 3)],\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 4)],\n            (1, 2): [tskit.IdentitySegment(0.0, 1.0, 4)],\n        }\n        ibd_segs = ibd_segments(self.ts(), within=[0, 1, 2], squash=True)\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_within(self):\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0.0, 1.0, 3)],\n        }\n        ibd_segs = ibd_segments(self.ts(), within=[0, 1], squash=True)\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_between_0_1(self):\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0.0, 1.0, 3)],\n        }\n        ibd_segs = ibd_segments(self.ts(), between=[[0], [1]], squash=True)\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_between_0_2(self):\n        true_segs = {\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 4)],\n        }\n        ibd_segs = ibd_segments(self.ts(), between=[[0], [2]], squash=True)\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_between_0_1_2(self):\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0.0, 1.0, 3)],\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 4)],\n            (1, 2): [tskit.IdentitySegment(0.0, 1.0, 4)],\n        }\n        ibd_segs = ibd_segments(self.ts(), between=[[0], [1], [2]], squash=True)\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_between_0_12(self):\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0.0, 1.0, 3)],\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 4)],\n        }\n        ibd_segs = ibd_segments(self.ts(), between=[[0], [1, 2]], squash=True)\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_time(self):\n        ibd_segs = ibd_segments(\n            self.ts(),\n            max_time=1.5,\n            squash=True,\n        )\n        true_segs = {(0, 1): [tskit.IdentitySegment(0.0, 1.0, 3)]}\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=2, squash=True)\n        assert_ibd_equal(ibd_segs, {})\n\n\nclass TestIbdInterface:\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_input_errors_within(self, ts):\n        with pytest.raises(tskit.LibraryError, match=\"Node out of bounds\"):\n            ts.ibd_segments(within=[-1])\n        with pytest.raises(tskit.LibraryError, match=\"Duplicate sample value\"):\n            ts.ibd_segments(within=[0, 0])\n\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_input_errors_between(self, ts):\n        with pytest.raises(tskit.LibraryError, match=\"Node out of bounds\"):\n            ts.ibd_segments(between=[[0], [-1]])\n        with pytest.raises(tskit.LibraryError, match=\"Duplicate sample\"):\n            ts.ibd_segments(between=[[0], [0]])\n\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_within_between_mutually_exclusive(self, ts):\n        with pytest.raises(ValueError, match=\"mutually exclusive\"):\n            ts.ibd_segments(within=[0], between=[1])\n\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_tables_interface(self, ts):\n        ibd_tab = ts.dump_tables().ibd_segments(store_segments=True)\n        ibd_ts = ts.ibd_segments(store_segments=True)\n        assert ibd_tab == ibd_ts\n\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_empty_within(self, ts):\n        ibd = ts.ibd_segments(within=[], store_pairs=True)\n        assert len(ibd) == 0\n\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_empty_between(self, ts):\n        ibd = ts.ibd_segments(between=[], store_pairs=True)\n        assert len(ibd) == 0\n\n    @pytest.mark.parametrize(\"ts\", example_ts())\n    def test_empty_in_between(self, ts):\n        ibd = ts.ibd_segments(between=[[1, 2], []], store_pairs=True)\n        assert len(ibd) == 0\n\n\nclass TestIbdTwoSamplesTwoTrees:\n    # 2\n    #             |     3\n    # 1      2    |    / \\\n    #       / \\   |   /   \\\n    # 0    0   1  |  0     1\n    # |------------|----------|\n    # 0.0          0.4        1.0\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           1.5\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       0.4     2       0,1\n        0.4     1.0     3       0,1\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    # Basic test\n    def test_basic(self):\n        ibd_segs = ibd_segments(self.ts(), squash=True)\n        true_segs = {\n            (0, 1): [\n                tskit.IdentitySegment(0.0, 0.4, 2),\n                tskit.IdentitySegment(0.4, 1.0, 3),\n            ]\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    # Max time = 1.2\n    def test_time(self):\n        ibd_segs = ibd_segments(self.ts(), max_time=1.2, squash=True)\n        true_segs = {(0, 1): [tskit.IdentitySegment(0.0, 0.4, 2)]}\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    # Min length = 0.5\n    def test_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=0.5, squash=True)\n        true_segs = {(0, 1): [tskit.IdentitySegment(0.4, 1.0, 3)]}\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdUnrelatedSamples:\n    #\n    #    2   3\n    #    |   |\n    #    0   1\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       3       1\n        \"\"\"\n        )\n\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_basic(self):\n        ibd_segs = ibd_segments(self.ts(), squash=True)\n        assert len(ibd_segs) == 0\n\n    def test_time(self):\n        ibd_segs = ibd_segments(self.ts(), max_time=1.2, squash=True)\n        assert len(ibd_segs) == 0\n\n    def test_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=0.2, squash=True)\n        assert len(ibd_segs) == 0\n\n\nclass TestIbdNoSamples:\n    #\n    #     2\n    #    / \\\n    #   /   \\\n    #  /     \\\n    # (0)   (1)\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           0\n        1       0           0\n        2       0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       2       1\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_defaults(self):\n        result = ibd_segments(self.ts(), squash=True)\n        assert len(result) == 0\n\n    def test_specified_samples(self):\n        ibd_segs = ibd_segments(self.ts(), within=[0, 1], squash=True)\n        true_segs = {\n            (0, 1): [\n                tskit.IdentitySegment(0.0, 1, 2),\n            ]\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdSamplesAreDescendants:\n    #\n    # 4     5\n    # |     |\n    # 2     3\n    # |     |\n    # 0     1\n    #\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           1\n        3       1           1\n        4       0           2\n        5       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       3       1\n        0       1       4       2\n        0       1       5       3\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_basic(self):\n        ibd_segs = ibd_segments(self.ts(), squash=True)\n        true_segs = {\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 2)],\n            (1, 3): [tskit.IdentitySegment(0.0, 1.0, 3)],\n        }\n\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_input_within(self):\n        ibd_segs = ibd_segments(self.ts(), within=[0, 2, 3, 5], squash=True)\n        true_segs = {\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 2)],\n            (3, 5): [tskit.IdentitySegment(0.0, 1.0, 5)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_all_samples(self):\n        # FIXME\n        ibd_segs = ibd_segments(self.ts(), within=range(6), compare_lib=False)\n        true_segs = {\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 2)],\n            (0, 4): [tskit.IdentitySegment(0.0, 1.0, 4)],\n            (2, 4): [tskit.IdentitySegment(0.0, 1.0, 4)],\n            (1, 3): [tskit.IdentitySegment(0.0, 1.0, 3)],\n            (1, 5): [tskit.IdentitySegment(0.0, 1.0, 5)],\n            (3, 5): [tskit.IdentitySegment(0.0, 1.0, 5)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdSimpleInternalSampleChain:\n    #\n    # 2\n    # |\n    # 1\n    # |\n    # 0\n    #\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           1\n        2       1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        0       1       2       1\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_basic(self):\n        # FIXME\n        ibd_segs = ibd_segments(self.ts(), compare_lib=False, squash=True)\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0.0, 1.0, 1)],\n            (0, 2): [tskit.IdentitySegment(0.0, 1.0, 2)],\n            (1, 2): [tskit.IdentitySegment(0.0, 1.0, 2)],\n        }\n\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdDifferentPaths:\n    #\n    #        4       |      4       |        4\n    #       / \\      |     / \\      |       / \\\n    #      /   \\     |    /   3     |      /   \\\n    #     /     \\    |   2     \\    |     /     \\\n    #    /       \\   |  /       \\   |    /       \\\n    #   0         1  | 0         1  |   0         1\n    #                |              |\n    #                0.2            0.7\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           1.5\n        4       0           2.5\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.2     0.7     2       0\n        0.2     0.7     3       1\n        0.0     0.2     4       0\n        0.0     0.2     4       1\n        0.7     1.0     4       0\n        0.7     1.0     4       1\n        0.2     0.7     4       2\n        0.2     0.7     4       3\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_defaults(self):\n        ibd_segs = ibd_segments(self.ts(), squash=True, compare_lib=False)\n        true_segs = {\n            (0, 1): [\n                tskit.IdentitySegment(0.0, 1.0, 4),\n            ]\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_time(self):\n        ibd_segs = ibd_segments(self.ts(), max_time=1.8)\n        assert len(ibd_segs) == 0\n\n    def test_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=0.6, squash=True, compare_lib=False)\n        true_segs = {(0, 1): [tskit.IdentitySegment(0.0, 1.0, 4)]}\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdDifferentPaths2:\n    #\n    #        5         |\n    #       / \\        |\n    #      /   4       |      4\n    #     /   / \\      |     / \\\n    #    /   /   \\     |    /   \\\n    #   /   /     \\    |   3     \\\n    #  /   /       \\   |  / \\     \\\n    # 0   1         2  | 0   2     1\n    #                  |\n    #                  0.2\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2.5\n        5       0           3.5\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.2     1.0     3       0\n        0.2     1.0     3       2\n        0.0     1.0     4       1\n        0.0     0.2     4       2\n        0.2     1.0     4       3\n        0.0     0.2     5       0\n        0.0     0.2     5       4\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_defaults(self):\n        ibd_segs = ibd_segments(self.ts(), within=[1, 2], squash=True, compare_lib=False)\n        true_segs = {\n            (1, 2): [\n                tskit.IdentitySegment(0.0, 1.0, 4),\n            ],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_length(self):\n        ibd_segs = ibd_segments(\n            self.ts(), within=[1, 2], min_span=0.5, squash=True, compare_lib=False\n        )\n        true_segs = {\n            (1, 2): [\n                tskit.IdentitySegment(0.0, 1.0, 4),\n            ],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdDifferentPaths3:\n    # 2.00┊   4   ┊   4   ┊\n    #     ┊ ┏━╋━┓ ┊ ┏━╋━┓ ┊\n    # 1.00┊ 2 ┃ 3 ┊ 3 ┃ 2 ┊\n    #     ┊ ┃ ┃   ┊ ┃ ┃   ┊\n    # 0.00┊ 0 1   ┊ 0 1   ┊\n    #     0       5      10\n    @tests.cached_example\n    def ts(self):\n        t = tskit.TableCollection(sequence_length=10)\n        t.nodes.add_row(flags=1, time=0)\n        t.nodes.add_row(flags=1, time=0)\n        t.nodes.add_row(flags=0, time=1)\n        t.nodes.add_row(flags=0, time=1)\n        t.nodes.add_row(flags=0, time=2)\n        t.edges.add_row(parent=2, child=0, left=0, right=5)\n        t.edges.add_row(parent=3, child=0, left=5, right=10)\n        t.edges.add_row(parent=4, child=2, left=0, right=10)\n        t.edges.add_row(parent=4, child=3, left=0, right=10)\n        t.edges.add_row(parent=4, child=1, left=0, right=10)\n        t.sort()\n        return t.tree_sequence()\n\n    def test_defaults(self):\n        ibd_segs = ibd_segments(self.ts(), squash=True, compare_lib=False)\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0, 10, 4)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=0.6, squash=True, compare_lib=False)\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0, 10, 4)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdPolytomies:\n    #\n    #          5         |         5\n    #         / \\        |        / \\\n    #        4   \\       |       4   \\\n    #       /|\\   \\      |      /|\\   \\\n    #      / | \\   \\     |     / | \\   \\\n    #     /  |  \\   \\    |    /  |  \\   \\\n    #    /   |   \\   \\   |   /   |   \\   \\\n    #   0    1    2   3  |  0    1    3   2\n    #                    |\n    #                   0.3\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       1           0\n        4       0           2.5\n        5       0           3.5\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     1.0     4       0\n        0.0     1.0     4       1\n        0.0     0.3     4       2\n        0.3     1.0     4       3\n        0.3     1.0     5       2\n        0.0     0.3     5       3\n        0.0     1.0     5       4\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_defaults(self):\n        ibd_segs = ibd_segments(self.ts(), squash=True, compare_lib=False)\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0, 1, 4)],\n            (0, 2): [\n                tskit.IdentitySegment(0, 0.3, 4),\n                tskit.IdentitySegment(0.3, 1, 5),\n            ],\n            (0, 3): [\n                tskit.IdentitySegment(0, 0.3, 5),\n                tskit.IdentitySegment(0.3, 1, 4),\n            ],\n            (1, 2): [\n                tskit.IdentitySegment(0, 0.3, 4),\n                tskit.IdentitySegment(0.3, 1, 5),\n            ],\n            (1, 3): [\n                tskit.IdentitySegment(0, 0.3, 5),\n                tskit.IdentitySegment(0.3, 1, 4),\n            ],\n            (2, 3): [\n                tskit.IdentitySegment(0, 1, 5),\n            ],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_time(self):\n        ibd_segs = ibd_segments(self.ts(), max_time=3)\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0, 1, 4)],\n            (0, 2): [tskit.IdentitySegment(0, 0.3, 4)],\n            (0, 3): [tskit.IdentitySegment(0.3, 1, 4)],\n            (1, 2): [tskit.IdentitySegment(0, 0.3, 4)],\n            (1, 3): [tskit.IdentitySegment(0.3, 1, 4)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=0.5, squash=True, compare_lib=False)\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0, 1, 4)],\n            (0, 2): [tskit.IdentitySegment(0.3, 1, 5)],\n            (0, 3): [tskit.IdentitySegment(0.3, 1, 4)],\n            (1, 2): [tskit.IdentitySegment(0.3, 1, 5)],\n            (1, 3): [tskit.IdentitySegment(0.3, 1, 4)],\n            (2, 3): [tskit.IdentitySegment(0, 1, 5)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n    def test_input_within(self):\n        ibd_segs = ibd_segments(self.ts(), within=[0, 1, 2])\n        true_segs = {\n            (0, 1): [tskit.IdentitySegment(0, 1, 4)],\n            (0, 2): [\n                tskit.IdentitySegment(0, 0.3, 4),\n                tskit.IdentitySegment(0.3, 1, 5),\n            ],\n            (1, 2): [\n                tskit.IdentitySegment(0, 0.3, 4),\n                tskit.IdentitySegment(0.3, 1, 5),\n            ],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdInternalSamples:\n    #\n    #\n    #      3\n    #     / \\\n    #    /   2\n    #   /     \\\n    #  0      (1)\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       0           0\n        2       1           1\n        3       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     1.0     2       1\n        0.0     1.0     3       0\n        0.0     1.0     3       2\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_defaults(self):\n        ibd_segs = ibd_segments(self.ts())\n        true_segs = {\n            (0, 2): [tskit.IdentitySegment(0, 1, 3)],\n        }\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdLengthThreshold:\n    \"\"\"\n    Tests the behaviour of the min_span argument in niche cases.\n    \"\"\"\n\n    # 2\n    #             |     3\n    # 1      2    |    / \\\n    #       / \\   |   /   \\\n    # 0    0   1  |  0     1\n    # |------------|----------|\n    # 0.0          0.4        1.0\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           1.5\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       0.4     2       0,1\n        0.4     1.0     3       0,1\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_length_exceeds_segment(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=1.1)\n        assert_ibd_equal(ibd_segs, {})\n\n    def test_length_is_negative(self):\n        with pytest.raises(tskit.LibraryError):\n            ibd_segments(self.ts(), min_span=-0.1)\n\n    def test_equal_to_length(self):\n        ibd_segs = ibd_segments(self.ts(), min_span=0.4)\n        true_segs = {(0, 1): [tskit.IdentitySegment(0.4, 1.0, 3)]}\n        assert_ibd_equal(ibd_segs, true_segs)\n\n\nclass TestIbdProperties:\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_default_within_all_samples(self, ts):\n        segs = ts.ibd_segments(store_pairs=True)\n        for a, b in segs.keys():\n            assert ts.node(a).is_sample()\n            assert ts.node(b).is_sample()\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_within_subset(self, ts):\n        samples = ts.samples()\n        samples = samples[:3]\n        segs = ts.ibd_segments(store_pairs=True, within=samples)\n        for a, b in segs.keys():\n            assert a in samples\n            assert b in samples\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_between_two_subsets(self, ts):\n        samples = ts.samples()\n        k = len(samples) // 2\n        A = samples[:k]\n        B = samples[k:]\n        segs = ts.ibd_segments(store_pairs=True, between=[A, B])\n        for a, b in segs.keys():\n            assert a in A\n            assert b in B\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_between_same_segments_as_filtered_within_pair(self, ts):\n        samples = ts.samples()[:10]\n        all_segs = ts.ibd_segments(within=samples, store_segments=True)\n        A = samples[1::2]\n        B = samples[::2]\n        between_segs = ts.ibd_segments(store_segments=True, between=[A, B])\n        filtered_segs = collections.defaultdict(list)\n        for (u, v), seglist in all_segs.items():\n            if (u in A and v in B) or (v in A and u in B):\n                filtered_segs[(u, v)] = seglist\n        assert_ibd_equal(between_segs, filtered_segs)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_between_same_segments_as_filtered_within_triple(self, ts):\n        samples = ts.samples()[:9]  # Limit the number of samples\n        all_segs = ts.ibd_segments(within=samples, store_segments=True)\n        A = samples[1::3]\n        B = samples[2::3]\n        C = samples[0::3]\n        all_pairs = set()\n        for set_pair in itertools.combinations([A, B, C], 2):\n            for pair in itertools.product(*set_pair):\n                all_pairs.add(tuple(sorted(pair)))\n        between_segs = ts.ibd_segments(store_segments=True, between=[A, B, C])\n        filtered_segs = collections.defaultdict(list)\n        for pair, seglist in all_segs.items():\n            if pair in all_pairs:\n                filtered_segs[pair] = seglist\n        assert_ibd_equal(between_segs, filtered_segs)\n\n\nclass TestIdentitySegments:\n    \"\"\"\n    Test the IdentitySegments class interface.\n    \"\"\"\n\n    def verify_segments(self, ts, ibd_segments):\n        samples = set(ts.samples())\n        for (a, b), segment_list in ibd_segments.items():\n            assert a < b\n            assert a in samples\n            assert b in samples\n            left = segment_list.left\n            right = segment_list.right\n            node = segment_list.node\n\n            num_segments = 0\n            total_span = 0\n            for j, seg in enumerate(segment_list):\n                assert isinstance(seg, tskit.IdentitySegment)\n                total_span += seg.span\n                num_segments += 1\n                assert seg.span == seg.right - seg.left\n                assert seg.left == left[j]\n                assert seg.right == right[j]\n                assert seg.node == node[j]\n                assert 0 <= seg.node < ts.num_nodes\n            assert total_span == segment_list.total_span\n            assert num_segments == len(segment_list)\n\n        total_span = sum(lst.total_span for lst in ibd_segments.values())\n        np.testing.assert_allclose(ibd_segments.total_span, total_span)\n        num_segments = sum(len(lst) for lst in ibd_segments.values())\n        assert num_segments == ibd_segments.num_segments\n\n    @pytest.mark.parametrize(\"store_segments\", [True, False])\n    @pytest.mark.parametrize(\"store_pairs\", [True, False])\n    def test_str(self, store_segments, store_pairs):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(store_segments=store_segments, store_pairs=store_pairs)\n        s = str(result)\n        assert \"IdentitySegments\" in s\n        assert \"max_time\" in s\n        assert \"min_span\" in s\n\n    def test_repr_store_segments(self):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(store_segments=True)\n        s = repr(result)\n        assert s.startswith(\"IdentitySegments({\")\n        for lst in result.values():\n            s = repr(lst)\n            assert s.startswith(\"IdentitySegmentList([\")\n\n    def test_repr_without_store_segments(self):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(store_pairs=True)\n        s = repr(result)\n        assert s.startswith(\"<tskit.tables.IdentitySegments\")\n        result = ts.ibd_segments()\n        s = repr(result)\n        assert s.startswith(\"<tskit.tables.IdentitySegments\")\n\n    def test_store_segs_implies_store_pairs(self):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(store_pairs=True)\n        assert result.num_pairs == 6\n        result = ts.ibd_segments(store_segments=True)\n        assert result.num_pairs == 6\n\n    def test_operations_available_by_default(self):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments()\n        assert result.num_segments == 6\n        assert result.total_span == 6\n        with pytest.raises(tskit.IdentityPairsNotStoredError):\n            _ = result.num_pairs\n        with pytest.raises(tskit.IdentityPairsNotStoredError):\n            _ = len(result)\n        with pytest.raises(tskit.IdentityPairsNotStoredError):\n            _ = result.pairs\n        with pytest.raises(tskit.IdentityPairsNotStoredError):\n            _ = result[0, 1]\n        with pytest.raises(tskit.IdentityPairsNotStoredError):\n            _ = list(result)\n        with pytest.raises(tskit.IdentityPairsNotStoredError):\n            _ = result == result\n        # It's OK to when we compare with another type\n        assert result != []\n\n    def test_operations_available_store_pairs(self):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(store_pairs=True)\n        assert result.num_segments == 6\n        assert result.total_span == 6\n        assert result.num_pairs == 6\n        assert len(result) == 6\n        assert result.pairs is not None\n        seglist = result[0, 1]\n        assert seglist.total_span == 1\n        assert len(seglist) == 1\n        with pytest.raises(tskit.IdentitySegmentsNotStoredError):\n            _ = list(seglist)\n        with pytest.raises(tskit.IdentitySegmentsNotStoredError):\n            _ = seglist.left\n        with pytest.raises(tskit.IdentitySegmentsNotStoredError):\n            _ = seglist.right\n        with pytest.raises(tskit.IdentitySegmentsNotStoredError):\n            _ = seglist.node\n        with pytest.raises(tskit.IdentitySegmentsNotStoredError):\n            _ = seglist == seglist\n\n    @pytest.mark.parametrize(\"n\", [1, 2, 3])\n    def test_pairs_all_samples(self, n):\n        ts = msprime.sim_ancestry(n, random_seed=2)\n        result = ts.ibd_segments(store_segments=True)\n        pairs = np.array(list(itertools.combinations(ts.samples(), 2)))\n        np.testing.assert_array_equal(pairs, result.pairs)\n        self.verify_segments(ts, result)\n\n    @pytest.mark.parametrize(\"n\", [3, 4, 5])\n    def test_pairs_subset(self, n):\n        ts = msprime.sim_ancestry(n, random_seed=2)\n        pairs = np.array([(0, 1), (0, 2), (1, 2)])\n        result = ts.ibd_segments(within=[0, 1, 2], store_segments=True)\n        np.testing.assert_array_equal(pairs, result.pairs)\n        self.verify_segments(ts, result)\n\n    @pytest.mark.parametrize(\"max_time\", [0, 1, 10])\n    def test_max_time(self, max_time):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(max_time=max_time, store_segments=True)\n        assert result.max_time == max_time\n        self.verify_segments(ts, result)\n\n    def test_max_time_default(self):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(store_segments=True)\n        assert np.isinf(result.max_time)\n        self.verify_segments(ts, result)\n\n    @pytest.mark.parametrize(\"min_span\", [0, 1, 10])\n    def test_min_span(self, min_span):\n        ts = msprime.sim_ancestry(2, random_seed=2)\n        result = ts.ibd_segments(min_span=min_span, store_segments=True)\n        assert result.min_span == min_span\n        self.verify_segments(ts, result)\n\n    @pytest.mark.parametrize(\"min_span\", [100, 101, 100000])\n    def test_min_span_longer_than_seq_length(self, min_span):\n        ts = msprime.sim_ancestry(\n            100, recombination_rate=0.1, sequence_length=100, random_seed=2\n        )\n        result = ts.ibd_segments(min_span=min_span, store_segments=True)\n        assert result.min_span == min_span\n        assert result.num_segments == 0\n        self.verify_segments(ts, result)\n\n    def test_recombination_discrete(self):\n        ts = msprime.sim_ancestry(\n            10, sequence_length=100, recombination_rate=0.1, random_seed=2\n        )\n        assert ts.num_trees > 2\n        result = ts.ibd_segments(store_segments=True)\n        self.verify_segments(ts, result)\n\n    def test_recombination_continuous(self):\n        ts = msprime.sim_ancestry(\n            10,\n            recombination_rate=1,\n            random_seed=2,\n            discrete_genome=False,\n            sequence_length=1,\n        )\n        assert ts.num_trees > 2\n        result = ts.ibd_segments(store_segments=True)\n        self.verify_segments(ts, result)\n\n    def test_dict_interface(self):\n        ts = msprime.sim_ancestry(5, random_seed=2)\n        pairs = list(itertools.combinations(ts.samples(), 2))\n        result = ts.ibd_segments(store_segments=True)\n        assert len(result) == len(pairs)\n        for pair in pairs:\n            assert pair in result\n            assert result[pair] is not None\n        for k, v in result.items():\n            assert k in pairs\n            assert isinstance(v, tskit.IdentitySegmentList)\n\n\nclass TestIdentitySegmentsList:\n    \"\"\"\n    Tests for the IdentitySegmentList class.\n    \"\"\"\n\n    example_ts = msprime.sim_ancestry(\n        3, sequence_length=100, recombination_rate=0.1, random_seed=2\n    )\n\n    def test_list_semantics(self):\n        result = self.example_ts.ibd_segments(store_segments=True)\n        assert len(result) > 0\n        for seglist in result.values():\n            lst = list(seglist)\n            assert len(lst) == len(seglist)\n            assert lst == list(seglist)\n\n    def test_str(self):\n        result = self.example_ts.ibd_segments(store_segments=True)\n        seglist = list(result.values())[0]\n        assert str(seglist).startswith(\"IdentitySegmentList\")\n\n    def test_repr(self):\n        result = self.example_ts.ibd_segments(store_segments=True)\n        seglist = list(result.values())[0]\n        assert repr(seglist).startswith(\"IdentitySegmentList([IdentitySegment\")\n\n    def test_eq_semantics(self):\n        result = self.example_ts.ibd_segments(store_segments=True)\n        seglists = list(result.values())\n        assert len(result) == len(seglists)\n        assert len(seglists) > 1\n        for seglist1, seglist2 in zip(result.values(), seglists):\n            assert seglist1 == seglist2\n            assert not (seglist1 != seglist2)\n            assert seglist1 != result\n            assert seglist1 != []\n        # The chance of getting two identical seglists is miniscule\n        for seglist in seglists[1:]:\n            assert seglist != seglists[0]\n\n    def test_eq_fails_without_store_segments(self):\n        result = self.example_ts.ibd_segments(store_pairs=True)\n        for seglist in result.values():\n            with pytest.raises(tskit.IdentitySegmentsNotStoredError):\n                _ = seglist == seglist\n            # But it's OK when comparing to another type, since we know\n            # it'll be False regardless\n            assert seglist != []\n\n    def test_list_contents(self):\n        result = self.example_ts.ibd_segments(store_segments=True)\n        assert len(result) > 0\n        for seglist in result.values():\n            for seg in seglist:\n                assert isinstance(seg, tskit.IdentitySegment)\n"
  },
  {
    "path": "python/tests/test_immutable_table_collection.py",
    "content": "import inspect\nimport re\n\nimport numpy as np\nimport pytest\n\nimport _tskit\nimport tests.tsutil as tsutil\nimport tskit\n\npytestmark = pytest.mark.skipif(\n    not getattr(_tskit, \"HAS_NUMPY_2\", False),\n    reason=\"ImmutableTableCollection requires NumPy 2 runtime\",\n)\n\n\ndef get_mutable_and_immutable(ts):\n    mutable = ts.dump_tables()\n    immutable = ts.tables\n    assert isinstance(immutable, tskit.tables.ImmutableTableCollection)\n    return mutable, immutable\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\nclass TestCollectionParity:\n    def test_basic_properties_match(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        assert mutable.sequence_length == immutable.sequence_length\n        assert mutable.time_units == immutable.time_units\n        assert mutable.file_uuid == immutable.file_uuid\n        assert mutable.metadata_schema == immutable.metadata_schema\n        assert mutable.metadata == immutable.metadata\n        assert mutable.metadata_schema.encode_row(mutable.metadata) == bytes(\n            immutable.metadata_bytes\n        )\n\n    def test_asdict_equals(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n\n        d_mut = mutable.asdict()\n        d_imm = immutable.asdict()\n        assert set(d_mut.keys()) == set(d_imm.keys())\n        for key, val in d_mut.items():\n            if isinstance(val, dict):\n                for col, arr in val.items():\n                    arr2 = d_imm[key][col]\n                    assert np.array_equal(arr, arr2) or (\n                        np.all(map(tskit.is_unknown_time, arr))\n                        and np.all(map(tskit.is_unknown_time, arr2))\n                    )\n            else:\n                assert d_imm[key] == val\n\n    def test_equals_bidirectional(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        assert mutable.equals(mutable)\n        assert mutable.equals(immutable)\n        assert immutable.equals(mutable)\n        assert immutable.equals(immutable)\n        # assert_equals should not raise\n        mutable.assert_equals(mutable)\n        mutable.assert_equals(immutable)\n        immutable.assert_equals(mutable)\n        immutable.assert_equals(immutable)\n\n    def test_equals_ignore_flags(self, ts):\n        # Create two mutable copies and an immutable baseline\n        m1, imm = get_mutable_and_immutable(ts)\n        m2 = m1.copy()\n        # Diverge TS-level metadata\n        m1.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\", \"type\": \"object\"})\n        m1.metadata = {\"x\": 1}\n        assert not imm.equals(m1)\n        assert imm.equals(m1, ignore_ts_metadata=True)\n        # Diverge provenance\n        m1.provenances.add_row(record=\"random stuff\")\n        assert not imm.equals(m1)\n        assert imm.equals(m1, ignore_ts_metadata=True, ignore_provenance=True)\n        # Reset to identical and verify equals again\n        m1 = m2\n        assert imm.equals(m1)\n\n    def test_nbytes_parity(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        assert mutable.nbytes == immutable.nbytes\n\n    def test_reference_sequence_and_index_flags(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n\n        assert bool(mutable.has_reference_sequence()) == bool(\n            immutable.has_reference_sequence()\n        )\n        if mutable.has_reference_sequence():\n            mutable.reference_sequence.assert_equals(immutable.reference_sequence)\n\n        assert mutable.has_index() == immutable.has_index()\n        if mutable.has_index():\n            assert np.array_equal(\n                mutable.indexes.edge_insertion_order,\n                immutable.indexes.edge_insertion_order,\n            )\n            assert np.array_equal(\n                mutable.indexes.edge_removal_order, immutable.indexes.edge_removal_order\n            )\n\n    def test_copy_dump_tree_sequence_roundtrip(self, tmp_path, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n\n        # copy() returns a mutable TableCollection equal to both\n        copy_tc = immutable.copy()\n        assert isinstance(copy_tc, tskit.TableCollection)\n        copy_tc.assert_equals(mutable)\n        copy_tc.assert_equals(immutable)\n\n        # dump() uses the mutable copy under the hood\n        out = tmp_path / \"tables\"\n        immutable.dump(out)\n        loaded = tskit.load(out)\n        ts.tables.assert_equals(loaded.tables)\n\n        # tree_sequence() identical to original\n        ts2 = immutable.tree_sequence()\n        ts.tables.assert_equals(ts2.tables)\n\n    def test_str_contains_identifier(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        s = str(immutable)\n        assert \"ImmutableTableCollection\" in s\n\n    def test_link_ancestors_parity(self, ts):\n        # Can't link ancestors when edges have metadata.\n        if ts.tables.edges.metadata_schema != tskit.MetadataSchema(schema=None):\n            pytest.skip(\"link_ancestors does not support edges with metadata\")\n\n        mutable, immutable = get_mutable_and_immutable(ts)\n        samples = ts.samples()\n        if len(samples) == 0:\n            pytest.skip(\"Tree sequence has no samples\")\n\n        ancestor_nodes = [u.id for u in ts.nodes() if not u.is_sample()]\n        if len(ancestor_nodes) == 0:\n            ancestor_nodes = list(samples)\n\n        samples = samples[: min(len(samples), 10)]\n        ancestors = ancestor_nodes[: min(len(ancestor_nodes), 10)]\n\n        mutable_result = mutable.link_ancestors(samples, ancestors)\n        immutable_result = immutable.link_ancestors(samples, ancestors)\n        assert mutable_result == immutable_result\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\nclass TestTablesParity:\n    def test_table_name_map_and_lengths(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        assert set(mutable.table_name_map.keys()) == set(immutable.table_name_map.keys())\n\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n            assert len(mt) == len(it)\n\n    def test_columns_and_rows_equal(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            for col_name in mt.column_names:\n                a = getattr(mt, col_name)\n                b = getattr(it, col_name)\n                assert np.array_equal(a, b) or (\n                    np.all(map(tskit.is_unknown_time, a))\n                    and np.all(map(tskit.is_unknown_time, b))\n                )\n\n            # Row object equality\n            if len(mt) > 0:\n                for idx in [0, len(mt) - 1]:\n                    assert mt[idx] == it[idx]\n\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n            for col_name in mt.column_names:\n                a = getattr(mt, col_name)\n                b = getattr(it, col_name)\n                assert np.array_equal(a, b) or (\n                    np.all(map(tskit.is_unknown_time, a))\n                    and np.all(map(tskit.is_unknown_time, b))\n                )\n\n    def test_slicing_and_boolean_index(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n            # Slice view\n            sl = slice(0, max(0, len(mt)))\n            it_view = it[sl]\n            mt_view = mt[sl]\n            it_view.assert_equals(mt_view)\n\n            mask = np.zeros(len(it), dtype=bool)\n            if len(it) > 0:\n                mask[0] = True\n            it_view2 = it[mask]\n            mt_view2 = mt[mask]\n            it_view2.assert_equals(mt_view2)\n\n    def test_mask_then_slice(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            if len(it) < 3:\n                continue\n\n            mask = np.zeros(len(it), dtype=bool)\n            mask[0] = True\n            mask[-1] = True\n            mask[len(it) // 2] = True\n\n            it_mask = it[mask]\n            mt_mask = mt[mask]\n\n            if len(it_mask) <= 1:\n                continue\n\n            it_slice = it_mask[1:]\n            mt_slice = mt_mask[1:]\n            it_slice.assert_equals(mt_slice)\n\n    def test_slice_then_mask(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            if len(it) < 4:\n                continue\n\n            it_slice = it[1:-1]\n            mt_slice = mt[1:-1]\n\n            if len(it_slice) == 0:\n                continue\n\n            mask = np.zeros(len(it_slice), dtype=bool)\n            mask[0] = True\n            mask[-1] = True\n\n            it_mask = it_slice[mask]\n            mt_mask = mt_slice[mask]\n            it_mask.assert_equals(mt_mask)\n\n    def test_slice_view_indexing(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            if len(it) == 0:\n                continue\n\n            if len(it) >= 10:\n                mt_view = mt[-10:]\n                it_view = it[-10:]\n                for i in [0, 5, -1]:\n                    mt_row = mt_view[i]\n                    it_row = it_view[i]\n                    assert mt_row == it_row\n                    if i == 0:\n                        assert it_row == it[-10]\n                        assert mt_row == mt[-10]\n                    elif i == 5:\n                        assert it_row == it[-5]\n                        assert mt_row == mt[-5]\n                    elif i == -1:\n                        assert it_row == it[-1]\n                        assert mt_row == mt[-1]\n\n            if len(it) >= 20:\n                mt_view = mt[5:15]\n                it_view = it[5:15]\n                for i in [0, 4, -1]:\n                    mt_row = mt_view[i]\n                    it_row = it_view[i]\n                    assert mt_row == it_row\n                    if i == 0:\n                        assert it_row == it[5]\n                        assert mt_row == mt[5]\n                    elif i == 4:\n                        assert it_row == it[9]\n                        assert mt_row == mt[9]\n                    elif i == -1:\n                        assert it_row == it[14]\n                        assert mt_row == mt[14]\n\n    def test_slice_view_iteration(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            if len(it) < 10:\n                continue\n\n            mt_view = mt[-10:]\n            it_view = it[-10:]\n\n            mt_rows = list(mt_view)\n            it_rows = list(it_view)\n\n            assert len(mt_rows) == len(it_rows)\n            for mt_row, it_row in zip(mt_rows, it_rows):\n                assert mt_row == it_row\n\n    def test_slice_view_ragged_column_access(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n\n        if ts.num_mutations >= 100:\n            mt = mutable.mutations\n            it = immutable.mutations\n\n            mt_slice = mt[-100:]\n            it_slice = it[-100:]\n\n            for idx, (mt_mut, it_mut) in enumerate(zip(mt_slice, it_slice)):\n                assert mt_mut.derived_state == it_mut.derived_state\n                assert mt_mut.metadata == it_mut.metadata\n                original_idx = len(mt) - 100 + idx\n                assert it_mut.derived_state == it[original_idx].derived_state\n                assert mt_mut.derived_state == mt[original_idx].derived_state\n\n        if ts.num_sites >= 50:\n            mt = mutable.sites\n            it = immutable.sites\n\n            mt_slice = mt[-50:]\n            it_slice = it[-50:]\n\n            for idx, (mt_site, it_site) in enumerate(zip(mt_slice, it_slice)):\n                assert mt_site.ancestral_state == it_site.ancestral_state\n                assert mt_site.metadata == it_site.metadata\n                original_idx = len(mt) - 50 + idx\n                assert it_site.ancestral_state == it[original_idx].ancestral_state\n                assert mt_site.ancestral_state == mt[original_idx].ancestral_state\n\n    def test_nested_slicing(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            if len(it) < 50:\n                continue\n\n            # Create nested slices\n            mt_slice1 = mt[-50:]\n            it_slice1 = it[-50:]\n\n            mt_slice2 = mt_slice1[10:30]\n            it_slice2 = it_slice1[10:30]\n\n            mt_row = mt_slice2[5]\n            it_row = it_slice2[5]\n            assert mt_row == it_row\n\n            for mt_r, it_r in zip(mt_slice2, it_slice2):\n                assert mt_r == it_r\n\n            for col_name in mt.column_names:\n                a = getattr(mt_slice2, col_name)\n                b = getattr(it_slice2, col_name)\n                assert np.array_equal(a, b) or (\n                    np.all(map(tskit.is_unknown_time, a))\n                    and np.all(map(tskit.is_unknown_time, b))\n                )\n\n    def test_random_access_on_slice(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n\n            if len(it) < 100:\n                continue\n\n            mt_slice = mt[-100:]\n            it_slice = it[-100:]\n\n            indices = [0, 10, 50, 75, 99, -1, -10]\n            for idx in indices:\n                mt_row = mt_slice[idx]\n                it_row = it_slice[idx]\n                assert mt_row == it_row\n\n            for col_name in mt.column_names:\n                a = getattr(mt_slice, col_name)\n                b = getattr(it_slice, col_name)\n                assert np.array_equal(a, b) or (\n                    np.all(map(tskit.is_unknown_time, a))\n                    and np.all(map(tskit.is_unknown_time, b))\n                )\n\n    def test_table_equals_bidirectional(self, ts):\n        mutable, immutable = get_mutable_and_immutable(ts)\n        for name in mutable.table_name_map.keys():\n            mt = mutable.table_name_map[name]\n            it = immutable.table_name_map[name]\n            assert mt.equals(it)\n            assert it.equals(mt)\n            mt.assert_equals(it)\n            it.assert_equals(mt)\n            if isinstance(mt, tskit.tables.MutableMetadataTable) or isinstance(\n                it, tskit.tables.ImmutableMetadataTable\n            ):\n                mt.assert_equals(it, ignore_metadata=True)\n                it.assert_equals(mt, ignore_metadata=True)\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\nclass TestImmutableErrors:\n    def test_collection_mutators_raise(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        for name in type(immutable)._MUTATOR_METHODS:\n            with pytest.raises(tskit.ImmutableTableError):\n                getattr(immutable, name)\n\n    def test_collection_property_setters_raise(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.metadata = {}\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.metadata_bytes = b\"\"\n\n    def test_table_mutators_raise(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        for it in immutable.table_name_map.values():\n            for name in tskit.tables.ImmutableBaseTable._MUTATION_METHODS:\n                with pytest.raises(tskit.ImmutableTableError):\n                    getattr(it, name)\n\n    def test_table_metadata_schema_setter_raises(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        for _, itab in immutable.table_name_map.items():\n            if isinstance(itab, tskit.tables.ImmutableMetadataTable):\n                with pytest.raises(tskit.ImmutableTableError):\n                    itab.metadata_schema = tskit.MetadataSchema(None)\n\n    def test_table_attribute_assignment_raises(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        for _, itab in immutable.table_name_map.items():\n            # Try setting a known column if any exist\n            col_names = [\n                c for c in getattr(itab, \"column_names\", []) if not c.endswith(\"_schema\")\n            ]\n            if col_names:\n                col = col_names[0]\n                current = getattr(itab, col)\n                with pytest.raises(tskit.ImmutableTableError):\n                    setattr(itab, col, current)\n            # Setting an unknown public attribute should also raise\n            with pytest.raises(tskit.ImmutableTableError):\n                itab.not_a_real_column = 123\n\n    def test_collection_attribute_assignment_raises(self, ts):\n        _, immutable = get_mutable_and_immutable(ts)\n        # Try to set table attributes\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.nodes = immutable.nodes\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.edges = immutable.edges\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.sites = immutable.sites\n        # Try to set arbitrary public attributes\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.new_attribute = 123\n        with pytest.raises(tskit.ImmutableTableError):\n            immutable.sequence_length = 100\n\n\nclass TestMethodParity:\n    def test_immutable_has_method_or_mutator(self, ts_fixture):\n        tc = ts_fixture.dump_tables()\n        it = ts_fixture.tables\n        # Collect instance-bound public methods of TableCollection\n        tc_methods = []\n        for name in dir(tc):\n            if name.startswith(\"_\"):\n                continue\n            # Use getattr_static first so we don't trigger properties like\n            # the deprecated ``name_map`` attribute, which emits warnings.\n            attr = inspect.getattr_static(tc, name)\n            if isinstance(attr, property):\n                continue\n            attr = getattr(tc, name)\n            if inspect.ismethod(attr) and getattr(attr, \"__self__\", None) is tc:\n                tc_methods.append(name)\n\n        missing = []\n        for name in tc_methods:\n            try:\n                inspect.getattr_static(it, name)\n                present = True\n            except AttributeError:\n                present = False\n            if present:\n                continue\n            if name in type(it)._MUTATOR_METHODS:\n                continue\n            missing.append(name)\n\n        assert missing == [], (\n            f\"ImmutableTableCollection missing non-mutator methods: {missing}\"\n        )\n\n    def test_immutable_tables_have_method_or_mutator(self, ts_fixture):\n        tc = ts_fixture.dump_tables()\n        itc = ts_fixture.tables\n\n        for table_name, mt in tc.table_name_map.items():\n            it = itc.table_name_map[table_name]\n            # Collect instance-bound public methods on the mutable table\n            mt_methods = []\n            for name in dir(mt):\n                if name.startswith(\"_\"):\n                    continue\n                attr = getattr(mt, name)\n                if inspect.ismethod(attr) and getattr(attr, \"__self__\", None) is mt:\n                    mt_methods.append(name)\n\n            missing = []\n            for name in mt_methods:\n                # Use getattr_static to avoid triggering __getattr__ on immutable tables\n                try:\n                    inspect.getattr_static(it, name)\n                    present = True\n                except AttributeError:\n                    present = False\n                if present:\n                    continue\n                if name in tskit.tables.ImmutableBaseTable._MUTATION_METHODS:\n                    continue\n                missing.append(name)\n\n            assert missing == [], (\n                f\"Immutable {table_name} table missing non-mutator methods: {missing}\"\n            )\n\n\nclass TestImmutableTimestampHandling:\n    def test_assert_equals_ignore_timestamps_roundtrip(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.provenances.add_row(record=\"{}\", timestamp=\"2024-01-01T00:00:00Z\")\n\n        ts = tables.tree_sequence()\n        immutable_prov = ts.tables.provenances\n\n        mutable_tables = ts.dump_tables()\n        mutable_tables.provenances.clear()\n        mutable_tables.provenances.add_row(record=\"{}\", timestamp=\"2024-02-01T00:00:00Z\")\n        mutable_prov = mutable_tables.provenances\n\n        with pytest.raises(AssertionError, match=\"timestamp\"):\n            immutable_prov.assert_equals(mutable_prov)\n        immutable_prov.assert_equals(mutable_prov, ignore_timestamps=True)\n        mutable_prov.assert_equals(immutable_prov, ignore_timestamps=True)\n\n    def test_assert_equals_ignore_timestamps_guard(self, ts_fixture, monkeypatch):\n        immutable_prov = ts_fixture.tables.provenances\n        monkeypatch.setattr(immutable_prov.__class__, \"table_name\", \"not_provenances\")\n        with pytest.raises(ValueError, match=\"only valid for Provenance tables\"):\n            immutable_prov.assert_equals(immutable_prov, ignore_timestamps=True)\n\n    def test_assert_equals_ignore_timestamps_other_difference(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.provenances.add_row(record=\"{}\", timestamp=\"2024-01-01T00:00:00Z\")\n\n        ts = tables.tree_sequence()\n        immutable = ts.tables\n        mutable = ts.dump_tables()\n        mutable.provenances.clear()\n        mutable.provenances.add_row(record=\"different\", timestamp=\"2024-02-01T00:00:00Z\")\n\n        immutable_msg = re.escape(\n            \"ImmutableProvenanceTable row 0 differs:\\n\"\n            \"self.record={} other.record=different\"\n        )\n\n        with pytest.raises(AssertionError, match=immutable_msg):\n            immutable.assert_equals(mutable, ignore_timestamps=True)\n        with pytest.raises(AssertionError, match=immutable_msg):\n            mutable.assert_equals(immutable, ignore_timestamps=True)\n\n\nclass TestImmutableIndexViews:\n    def test_index_view_ragged_columns(self, ts_fixture):\n        immutable_tables = ts_fixture.tables\n        mutations = immutable_tables.mutations\n        if mutations.num_rows < 3:\n            pytest.skip(\"Need mutations with metadata for this test\")\n\n        indices = np.array([0, mutations.num_rows - 1, 1], dtype=np.int64)\n        view = mutations[indices]\n\n        expected_rows = [mutations[i] for i in indices]\n        assert list(view) == expected_rows\n\n        base_ds = mutations.derived_state\n        base_ds_offset = mutations.derived_state_offset\n        expected_ds = []\n        expected_ds_lengths = []\n        for idx in indices:\n            start = base_ds_offset[idx]\n            end = base_ds_offset[idx + 1]\n            expected_ds.extend(base_ds[start:end])\n            expected_ds_lengths.append(end - start)\n        assert np.array_equal(view.derived_state, np.array(expected_ds, dtype=np.int8))\n        derived_offsets = view.derived_state_offset\n        assert list(derived_offsets[1:] - derived_offsets[:-1]) == expected_ds_lengths\n\n        base_md = mutations.metadata\n        base_md_offset = mutations.metadata_offset\n        expected_md = []\n        expected_md_lengths = []\n        for idx in indices:\n            start = base_md_offset[idx]\n            end = base_md_offset[idx + 1]\n            expected_md.extend(base_md[start:end])\n            expected_md_lengths.append(end - start)\n        assert np.array_equal(view.metadata, np.array(expected_md, dtype=np.int8))\n        metadata_offsets = view.metadata_offset\n        assert list(metadata_offsets[1:] - metadata_offsets[:-1]) == expected_md_lengths\n\n    def test_index_view_offset_columns(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        n0 = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        n1 = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        site = tables.sites.add_row(position=0.1, ancestral_state=\"A\")\n        tables.mutations.add_row(site=site, node=n0, derived_state=\"AA\")\n        tables.mutations.add_row(site=site, node=n1, derived_state=\"BBB\")\n\n        ts = tables.tree_sequence()\n        mutations = ts.tables.mutations\n        indices = np.array([1, 0], dtype=np.int64)\n        view = mutations[indices]\n\n        expected_offsets = np.array([0, 3, 5], dtype=np.uint32)\n        assert np.array_equal(view.derived_state_offset, expected_offsets)\n        assert view[0] == mutations[1]\n\n    def test_index_view_empty_selection(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        site = tables.sites.add_row(position=0.1, ancestral_state=\"A\")\n        tables.mutations.add_row(site=site, node=0, derived_state=\"A\")\n        ts = tables.tree_sequence()\n\n        mutations = ts.tables.mutations\n        indices = np.array([], dtype=np.int64)\n        view = mutations[indices]\n\n        assert view.num_rows == 0\n        assert np.array_equal(view.derived_state_offset, np.array([0], dtype=np.uint32))\n        assert np.array_equal(view.metadata_offset, np.array([0], dtype=np.uint32))\n\n    def test_index_out_of_bounds(self, ts_fixture):\n        nodes = ts_fixture.tables.nodes\n        with pytest.raises(IndexError, match=\"Index out of bounds\"):\n            nodes[nodes.num_rows]\n        with pytest.raises(IndexError, match=\"Index out of bounds\"):\n            nodes[-nodes.num_rows - 1]\n\n    def test_boolean_index_length_mismatch(self, ts_fixture):\n        nodes = ts_fixture.tables.nodes\n        mask = np.zeros(nodes.num_rows + 1, dtype=bool)\n        with pytest.raises(\n            IndexError, match=\"Boolean index must be same length as table\"\n        ):\n            nodes[mask]\n\n\ndef test_immutable_table_metadata_schema_difference():\n    tables = tskit.TableCollection(sequence_length=1.0)\n    tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n\n    tables_with_schema = tables.copy()\n    tables_with_schema.nodes.metadata_schema = tskit.MetadataSchema(\n        {\"codec\": \"json\", \"type\": \"object\"}\n    )\n\n    ts_plain = tables.tree_sequence()\n    ts_schema = tables_with_schema.tree_sequence()\n\n    plain_nodes = ts_plain.tables.nodes\n    schema_nodes = ts_schema.tables.nodes\n\n    assert not plain_nodes.equals(schema_nodes)\n    assert not schema_nodes.equals(plain_nodes)\n    with pytest.raises(AssertionError, match=\"metadata schemas differ\"):\n        plain_nodes.assert_equals(schema_nodes)\n"
  },
  {
    "path": "python/tests/test_intervals.py",
    "content": "# MIT License\n#\n# Copyright (c) 2023-2024 Tskit Developers\n# Copyright (C) 2020-2022 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n#\n\"\"\"\nTest cases for the intervals module.\n\"\"\"\n\nimport decimal\nimport fractions\nimport gzip\nimport io\nimport os\nimport pickle\nimport textwrap\nimport xml\n\nimport numpy as np\nimport pytest\nfrom numpy.testing import assert_array_equal\n\nimport tskit\n\n\nclass TestRateMapErrors:\n    @pytest.mark.parametrize(\n        (\"position\", \"rate\"),\n        [\n            ([], []),\n            ([0], []),\n            ([0], [0]),\n            ([1, 2], [0]),\n            ([0, -1], [0]),\n            ([0, 1], [-1]),\n        ],\n    )\n    def test_bad_input(self, position, rate):\n        with pytest.raises(ValueError):\n            tskit.RateMap(position=position, rate=rate)\n\n    def test_zero_length_interval(self):\n        with pytest.raises(ValueError, match=r\"at indexes \\[2 4\\]\"):\n            tskit.RateMap(position=[0, 1, 1, 2, 2, 3], rate=[0, 0, 0, 0, 0])\n\n    def test_bad_length(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([0, 1, 2])\n        with pytest.raises(ValueError, match=\"one less entry\"):\n            tskit.RateMap(position=positions, rate=rates)\n\n    def test_bad_first_pos(self):\n        positions = np.array([1, 2, 3])\n        rates = np.array([1, 1])\n        with pytest.raises(ValueError, match=\"First position\"):\n            tskit.RateMap(position=positions, rate=rates)\n\n    def test_bad_rate(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([1, -1])\n        with pytest.raises(ValueError, match=\"negative.*1\"):\n            tskit.RateMap(position=positions, rate=rates)\n\n    def test_bad_rate_with_missing(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([np.nan, -1])\n        with pytest.raises(ValueError, match=\"negative.*1\"):\n            tskit.RateMap(position=positions, rate=rates)\n\n    def test_read_only(self):\n        positions = np.array([0, 0.25, 0.5, 0.75, 1])\n        rates = np.array([0.125, 0.25, 0.5, 0.75])  # 1 shorter than positions\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        assert np.all(rates == rate_map.rate)\n        assert np.all(positions == rate_map.position)\n        with pytest.raises(AttributeError):\n            rate_map.rate = 2 * rate_map.rate\n        with pytest.raises(AttributeError):\n            rate_map.position = 2 * rate_map.position\n        with pytest.raises(AttributeError):\n            rate_map.left = 1234\n        with pytest.raises(AttributeError):\n            rate_map.right = 1234\n        with pytest.raises(AttributeError):\n            rate_map.mid = 1234\n        with pytest.raises(ValueError):\n            rate_map.rate[0] = 1\n        with pytest.raises(ValueError):\n            rate_map.position[0] = 1\n        with pytest.raises(ValueError):\n            rate_map.left[0] = 1\n        with pytest.raises(ValueError):\n            rate_map.mid[0] = 1\n        with pytest.raises(ValueError):\n            rate_map.right[0] = 1\n\n\nclass TestGetRateAllKnown:\n    examples = [\n        tskit.RateMap(position=[0, 1], rate=[0]),\n        tskit.RateMap(position=[0, 1], rate=[0.1]),\n        tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2]),\n        tskit.RateMap(position=[0, 1, 2], rate=[0, 0.2]),\n        tskit.RateMap(position=[0, 1, 2], rate=[0.1, 1e-6]),\n        tskit.RateMap(position=range(100), rate=range(99)),\n    ]\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_get_rate_mid(self, rate_map):\n        rate = rate_map.get_rate(rate_map.mid)\n        assert len(rate) == len(rate_map)\n        for j in range(len(rate_map)):\n            assert rate[j] == rate_map[rate_map.mid[j]]\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_get_rate_left(self, rate_map):\n        rate = rate_map.get_rate(rate_map.left)\n        assert len(rate) == len(rate_map)\n        for j in range(len(rate_map)):\n            assert rate[j] == rate_map[rate_map.left[j]]\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_get_rate_right(self, rate_map):\n        rate = rate_map.get_rate(rate_map.right[:-1])\n        assert len(rate) == len(rate_map) - 1\n        for j in range(len(rate_map) - 1):\n            assert rate[j] == rate_map[rate_map.right[j]]\n\n\nclass TestOperations:\n    examples = [\n        tskit.RateMap.uniform(sequence_length=1, rate=0),\n        tskit.RateMap.uniform(sequence_length=1, rate=0.1),\n        tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2]),\n        tskit.RateMap(position=[0, 1, 2], rate=[0, 0.2]),\n        tskit.RateMap(position=[0, 1, 2], rate=[0.1, 1e-6]),\n        tskit.RateMap(position=range(100), rate=range(99)),\n        # Missing data\n        tskit.RateMap(position=[0, 1, 2], rate=[np.nan, 0]),\n        tskit.RateMap(position=[0, 1, 2], rate=[0, np.nan]),\n        tskit.RateMap(position=[0, 1, 2, 3], rate=[0, np.nan, 1]),\n    ]\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_num_intervals(self, rate_map):\n        assert rate_map.num_intervals == len(rate_map.rate)\n        assert rate_map.num_missing_intervals == np.sum(np.isnan(rate_map.rate))\n        assert rate_map.num_non_missing_intervals == np.sum(~np.isnan(rate_map.rate))\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_mask_arrays(self, rate_map):\n        assert_array_equal(rate_map.missing, np.isnan(rate_map.rate))\n        assert_array_equal(rate_map.non_missing, ~np.isnan(rate_map.rate))\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_missing_intervals(self, rate_map):\n        missing = []\n        for left, right, rate in zip(rate_map.left, rate_map.right, rate_map.rate):\n            if np.isnan(rate):\n                missing.append([left, right])\n        if len(missing) == 0:\n            assert len(rate_map.missing_intervals()) == 0\n        else:\n            assert_array_equal(missing, rate_map.missing_intervals())\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_mean_rate(self, rate_map):\n        total_span = 0\n        total_mass = 0\n        for span, mass in zip(rate_map.span, rate_map.mass):\n            if not np.isnan(mass):\n                total_span += span\n                total_mass += mass\n        assert total_mass / total_span == rate_map.mean_rate\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_total_mass(self, rate_map):\n        assert rate_map.total_mass == np.nansum(rate_map.mass)\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_get_cumulative_mass(self, rate_map):\n        assert list(rate_map.get_cumulative_mass([0])) == [0]\n        assert list(rate_map.get_cumulative_mass([rate_map.sequence_length])) == [\n            rate_map.total_mass\n        ]\n        assert_array_equal(\n            rate_map.get_cumulative_mass(rate_map.right), np.nancumsum(rate_map.mass)\n        )\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_get_rate(self, rate_map):\n        assert_array_equal(rate_map.get_rate([0]), rate_map.rate[0])\n        assert_array_equal(\n            rate_map.get_rate([rate_map.sequence_length - 1e-9]), rate_map.rate[-1]\n        )\n        assert_array_equal(rate_map.get_rate(rate_map.left), rate_map.rate)\n\n    @pytest.mark.parametrize(\"rate_map\", examples)\n    def test_map_semantics(self, rate_map):\n        assert len(rate_map) == rate_map.num_non_missing_intervals\n        assert_array_equal(list(rate_map.keys()), rate_map.mid[rate_map.non_missing])\n        for x in rate_map.left[rate_map.missing]:\n            assert x not in rate_map\n        for x in rate_map.mid[rate_map.missing]:\n            assert x not in rate_map\n\n    def test_asdict(self):\n        rate_map = tskit.RateMap.uniform(sequence_length=2, rate=4)\n        d = rate_map.asdict()\n        assert_array_equal(d[\"position\"], np.array([0.0, 2.0]))\n        assert_array_equal(d[\"rate\"], np.array([4.0]))\n\n\nclass TestFindIndex:\n    def test_one_interval(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.1])\n        for j in range(10):\n            assert rate_map.find_index(j) == 0\n        assert rate_map.find_index(0.0001) == 0\n        assert rate_map.find_index(9.999) == 0\n\n    def test_two_intervals(self):\n        rate_map = tskit.RateMap(position=[0, 5, 10], rate=[0.1, 0.1])\n        assert rate_map.find_index(0) == 0\n        assert rate_map.find_index(0.0001) == 0\n        assert rate_map.find_index(4.9999) == 0\n        assert rate_map.find_index(5) == 1\n        assert rate_map.find_index(5.1) == 1\n        assert rate_map.find_index(7) == 1\n        assert rate_map.find_index(9.999) == 1\n\n    def test_three_intervals(self):\n        rate_map = tskit.RateMap(position=[0, 5, 10, 15], rate=[0.1, 0.1, 0.1])\n        assert rate_map.find_index(0) == 0\n        assert rate_map.find_index(0.0001) == 0\n        assert rate_map.find_index(4.9999) == 0\n        assert rate_map.find_index(5) == 1\n        assert rate_map.find_index(5.1) == 1\n        assert rate_map.find_index(7) == 1\n        assert rate_map.find_index(9.999) == 1\n        assert rate_map.find_index(10) == 2\n        assert rate_map.find_index(10.1) == 2\n        assert rate_map.find_index(12) == 2\n        assert rate_map.find_index(14.9999) == 2\n\n    def test_out_of_bounds(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.1])\n        for bad_value in [-1, -0.0001, 10, 10.0001, 1e9]:\n            with pytest.raises(KeyError, match=\"out of bounds\"):\n                rate_map.find_index(bad_value)\n\n    def test_input_types(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.1])\n        assert rate_map.find_index(0) == 0\n        assert rate_map.find_index(0.0) == 0\n        assert rate_map.find_index(np.zeros(1)[0]) == 0\n\n\nclass TestSimpleExamples:\n    def test_all_missing_one_interval(self):\n        with pytest.raises(ValueError, match=\"missing data\"):\n            tskit.RateMap(position=[0, 10], rate=[np.nan])\n\n    def test_all_missing_two_intervals(self):\n        with pytest.raises(ValueError, match=\"missing data\"):\n            tskit.RateMap(position=[0, 5, 10], rate=[np.nan, np.nan])\n\n    def test_count(self):\n        rate_map = tskit.RateMap(position=[0, 5, 10], rate=[np.nan, 1])\n        assert rate_map.num_intervals == 2\n        assert rate_map.num_missing_intervals == 1\n        assert rate_map.num_non_missing_intervals == 1\n\n    def test_missing_arrays(self):\n        rate_map = tskit.RateMap(position=[0, 5, 10], rate=[np.nan, 1])\n        assert list(rate_map.missing) == [True, False]\n        assert list(rate_map.non_missing) == [False, True]\n\n    def test_missing_at_start_mean_rate(self):\n        positions = np.array([0, 0.5, 1, 2])\n        rates = np.array([np.nan, 0, 1])\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        assert np.isclose(rate_map.mean_rate, 1 / (1 + 0.5))\n\n    def test_missing_at_end_mean_rate(self):\n        positions = np.array([0, 1, 1.5, 2])\n        rates = np.array([1, 0, np.nan])\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        assert np.isclose(rate_map.mean_rate, 1 / (1 + 0.5))\n\n    def test_interval_properties_all_known(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        assert list(rate_map.left) == [0, 1, 2]\n        assert list(rate_map.right) == [1, 2, 3]\n        assert list(rate_map.mid) == [0.5, 1.5, 2.5]\n        assert list(rate_map.span) == [1, 1, 1]\n        assert list(rate_map.mass) == [0.1, 0.2, 0.3]\n\n    def test_pickle_non_missing(self):\n        r1 = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        r2 = pickle.loads(pickle.dumps(r1))\n        assert r1 == r2\n\n    def test_pickle_missing(self):\n        r1 = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, np.nan, 0.3])\n        r2 = pickle.loads(pickle.dumps(r1))\n        assert r1 == r2\n\n    def test_get_cumulative_mass_all_known(self):\n        rate_map = tskit.RateMap(position=[0, 10, 20, 30], rate=[0.1, 0.2, 0.3])\n        assert list(rate_map.mass) == [1, 2, 3]\n        assert list(rate_map.get_cumulative_mass([10, 20, 30])) == [1, 3, 6]\n\n    def test_cumulative_mass_missing(self):\n        rate_map = tskit.RateMap(position=[0, 10, 20, 30], rate=[0.1, np.nan, 0.3])\n        assert list(rate_map.get_cumulative_mass([10, 20, 30])) == [1, 1, 4]\n\n\nclass TestDisplay:\n    def test_str(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.1])\n        s = \"\"\"\\\n        ╔════╤═════╤═══╤════╤════╗\n        ║left│right│mid│span│rate║\n        ╠════╪═════╪═══╪════╪════╣\n        ║0   │10   │  5│  10│ 0.1║\n        ╚════╧═════╧═══╧════╧════╝\n        \"\"\"\n        assert textwrap.dedent(s) == str(rate_map)\n\n    def test_str_scinot(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.000001])\n        s = \"\"\"\\\n        ╔════╤═════╤═══╤════╤═════╗\n        ║left│right│mid│span│rate ║\n        ╠════╪═════╪═══╪════╪═════╣\n        ║0   │10   │  5│  10│1e-06║\n        ╚════╧═════╧═══╧════╧═════╝\n        \"\"\"\n        assert textwrap.dedent(s) == str(rate_map)\n\n    def test_repr(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.1])\n        s = \"RateMap(position=array([ 0., 10.]), rate=array([0.1]))\"\n        assert repr(rate_map) == s\n\n    def test_repr_html(self):\n        rate_map = tskit.RateMap(position=[0, 10], rate=[0.1])\n        html = rate_map._repr_html_()\n        root = xml.etree.ElementTree.fromstring(html)\n        assert root.tag == \"div\"\n        table = root.find(\"table\")\n        rows = list(table.find(\"tbody\"))\n        assert len(rows) == 1\n\n    def test_long_table(self):\n        n = 100\n        rate_map = tskit.RateMap(position=range(n + 1), rate=[0.1] * n)\n        headers, data = rate_map._text_header_and_rows(limit=20)\n        assert len(headers) == 5\n        assert len(data) == 21\n        # check some left values\n        assert int(data[0][0]) == 0\n        assert int(data[-1][0]) == n - 1\n\n    def test_short_table(self):\n        n = 10\n        rate_map = tskit.RateMap(position=range(n + 1), rate=[0.1] * n)\n        headers, data = rate_map._text_header_and_rows(limit=20)\n        assert len(headers) == 5\n        assert len(data) == n\n        # check some left values.\n        assert int(data[0][0]) == 0\n        assert int(data[-1][0]) == n - 1\n\n\nclass TestRateMapIsMapping:\n    def test_items(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        items = list(rate_map.items())\n        assert items[0] == (0.5, 0.1)\n        assert items[1] == (1.5, 0.2)\n        assert items[2] == (2.5, 0.3)\n\n    def test_keys(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        assert list(rate_map.keys()) == [0.5, 1.5, 2.5]\n\n    def test_values(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        assert list(rate_map.values()) == [0.1, 0.2, 0.3]\n\n    def test_in_points(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        # Any point within the map are True\n        for x in [0, 0.5, 1, 2.9999]:\n            assert x in rate_map\n        # Points outside the map are False\n        for x in [-1, -0.0001, 3, 3.1]:\n            assert x not in rate_map\n\n    def test_in_slices(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        # slices that are within the map are \"in\"\n        for x in [slice(0, 0.5), slice(0, 1), slice(0, 2), slice(2, 3), slice(0, 3)]:\n            assert x in rate_map\n        # Any slice that doesn't fully intersect with the map \"not in\"\n        assert slice(-0.001, 1) not in rate_map\n        assert slice(0, 3.0001) not in rate_map\n        assert slice(2.9999, 3.0001) not in rate_map\n        assert slice(3, 4) not in rate_map\n        assert slice(-2, -1) not in rate_map\n\n    def test_other_types_not_in(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        for other_type in [None, \"sdf\", \"123\", {}, [], Exception]:\n            assert other_type not in rate_map\n\n    def test_len(self):\n        rate_map = tskit.RateMap(position=[0, 1], rate=[0.1])\n        assert len(rate_map) == 1\n        rate_map = tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2])\n        assert len(rate_map) == 2\n        rate_map = tskit.RateMap(position=[0, 1, 2, 3], rate=[0.1, 0.2, 0.3])\n        assert len(rate_map) == 3\n\n    def test_immutable(self):\n        rate_map = tskit.RateMap(position=[0, 1], rate=[0.1])\n        with pytest.raises(TypeError, match=\"item assignment\"):\n            rate_map[0] = 1\n        with pytest.raises(TypeError, match=\"item deletion\"):\n            del rate_map[0]\n\n    def test_eq(self):\n        r1 = tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2])\n        r2 = tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2])\n        assert r1 == r1\n        assert r1 == r2\n        r2 = tskit.RateMap(position=[0, 1, 3], rate=[0.1, 0.2])\n        assert r1 != r2\n        assert tskit.RateMap(position=[0, 1], rate=[0.1]) != tskit.RateMap(\n            position=[0, 1], rate=[0.2]\n        )\n        assert tskit.RateMap(position=[0, 1], rate=[0.1]) != tskit.RateMap(\n            position=[0, 10], rate=[0.1]\n        )\n\n    def test_getitem_value(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2])\n        assert rate_map[0] == 0.1\n        assert rate_map[0.5] == 0.1\n        assert rate_map[1] == 0.2\n        assert rate_map[1.5] == 0.2\n        assert rate_map[1.999] == 0.2\n        # Try other types\n        assert rate_map[np.array([1], dtype=np.float32)[0]] == 0.2\n        assert rate_map[np.array([1], dtype=np.int32)[0]] == 0.2\n        assert rate_map[np.array([1], dtype=np.float64)[0]] == 0.2\n        assert rate_map[1 / 2] == 0.1\n        assert rate_map[fractions.Fraction(1, 3)] == 0.1\n        assert rate_map[decimal.Decimal(1)] == 0.2\n\n    def test_getitem_slice(self):\n        r1 = tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2])\n        # The semantics of the slice() function are tested elsewhere.\n        assert r1[:] == r1.copy()\n        assert r1[:] is not r1\n        assert r1[1:] == r1.slice(left=1)\n        assert r1[:1.5] == r1.slice(right=1.5)\n        assert r1[0.5:1.5] == r1.slice(left=0.5, right=1.5)\n\n    def test_getitem_slice_step(self):\n        r1 = tskit.RateMap(position=[0, 1, 2], rate=[0.1, 0.2])\n        # Trying to set a \"step\" is a error\n        with pytest.raises(TypeError, match=\"interval slicing\"):\n            r1[0:3:1]\n\n\nclass TestMappingMissingData:\n    def test_get_missing(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2], rate=[np.nan, 0.2])\n        with pytest.raises(KeyError, match=\"within a missing interval\"):\n            rate_map[0]\n        with pytest.raises(KeyError, match=\"within a missing interval\"):\n            rate_map[0.999]\n\n    def test_in_missing(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2], rate=[np.nan, 0.2])\n        assert 0 not in rate_map\n        assert 0.999 not in rate_map\n        assert 1 in rate_map\n\n    def test_keys_missing(self):\n        rate_map = tskit.RateMap(position=[0, 1, 2], rate=[np.nan, 0.2])\n        assert list(rate_map.keys()) == [1.5]\n\n\nclass TestGetIntermediates:\n    def test_get_rate(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([1, 4])\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        assert np.all(rate_map.get_rate([0.5, 1.5]) == rates)\n\n    def test_get_rate_out_of_bounds(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([1, 4])\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        with pytest.raises(ValueError, match=\"out of bounds\"):\n            rate_map.get_rate([1, -0.1])\n        with pytest.raises(ValueError, match=\"out of bounds\"):\n            rate_map.get_rate([2])\n\n    def test_get_cumulative_mass(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([1, 4])\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        assert np.allclose(rate_map.get_cumulative_mass([0.5, 1.5]), np.array([0.5, 3]))\n        assert rate_map.get_cumulative_mass([2]) == rate_map.total_mass\n\n    def test_get_bad_cumulative_mass(self):\n        positions = np.array([0, 1, 2])\n        rates = np.array([1, 4])\n        rate_map = tskit.RateMap(position=positions, rate=rates)\n        with pytest.raises(ValueError, match=\"positions\"):\n            rate_map.get_cumulative_mass([1, -0.1])\n        with pytest.raises(ValueError, match=\"positions\"):\n            rate_map.get_cumulative_mass([1, 2.1])\n\n\nclass TestSlice:\n    def test_slice_no_params(self):\n        # test RateMap.slice(..., trim=False)\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[0, 1, 2, 3])\n        b = a.slice()\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal(a.position, b.position)\n        assert_array_equal(a.rate, b.rate)\n        assert a == b\n\n    def test_slice_left_examples(self):\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[0, 1, 2, 3])\n        b = a.slice(left=50)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 50, 100, 200, 300, 400], b.position)\n        assert_array_equal([np.nan, 0, 1, 2, 3], b.rate)\n\n        b = a.slice(left=100)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 100, 200, 300, 400], b.position)\n        assert_array_equal([np.nan, 1, 2, 3], b.rate)\n\n        b = a.slice(left=150)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 150, 200, 300, 400], b.position)\n        assert_array_equal([np.nan, 1, 2, 3], b.rate)\n\n    def test_slice_right_examples(self):\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[0, 1, 2, 3])\n        b = a.slice(right=300)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 100, 200, 300, 400], b.position)\n        assert_array_equal([0, 1, 2, np.nan], b.rate)\n\n        b = a.slice(right=250)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 100, 200, 250, 400], b.position)\n        assert_array_equal([0, 1, 2, np.nan], b.rate)\n\n    def test_slice_left_right_examples(self):\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[0, 1, 2, 3])\n        b = a.slice(left=50, right=300)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 50, 100, 200, 300, 400], b.position)\n        assert_array_equal([np.nan, 0, 1, 2, np.nan], b.rate)\n\n        b = a.slice(left=150, right=250)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 150, 200, 250, 400], b.position)\n        assert_array_equal([np.nan, 1, 2, np.nan], b.rate)\n\n        b = a.slice(left=150, right=300)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 150, 200, 300, 400], b.position)\n        assert_array_equal([np.nan, 1, 2, np.nan], b.rate)\n\n        b = a.slice(left=150, right=160)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 150, 160, 400], b.position)\n        assert_array_equal([np.nan, 1, np.nan], b.rate)\n\n    def test_slice_right_missing(self):\n        # If we take a right-slice into a trailing missing region,\n        # we should recover the same map.\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[0, 1, 2, np.nan])\n        b = a.slice(right=350)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal(a.position, b.position)\n        assert_array_equal(a.rate, b.rate)\n\n        b = a.slice(right=300)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal(a.position, b.position)\n        assert_array_equal(a.rate, b.rate)\n\n    def test_slice_left_missing(self):\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[np.nan, 1, 2, 3])\n        b = a.slice(left=50)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal(a.position, b.position)\n        assert_array_equal(a.rate, b.rate)\n\n        b = a.slice(left=100)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal(a.position, b.position)\n        assert_array_equal(a.rate, b.rate)\n\n    def test_slice_with_floats(self):\n        #  test RateMap.slice(..., trim=False) with floats\n        a = tskit.RateMap(\n            position=[np.pi * x for x in [0, 100, 200, 300, 400]], rate=[0, 1, 2, 3]\n        )\n        b = a.slice(left=50 * np.pi)\n        assert a.sequence_length == b.sequence_length\n        assert_array_equal([0, 50 * np.pi] + list(a.position[1:]), b.position)\n        assert_array_equal([np.nan] + list(a.rate), b.rate)\n\n    def test_slice_trim_left(self):\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[1, 2, 3, 4])\n        b = a.slice(left=100, trim=True)\n        assert b == tskit.RateMap(position=[0, 100, 200, 300], rate=[2, 3, 4])\n        b = a.slice(left=50, trim=True)\n        assert b == tskit.RateMap(position=[0, 50, 150, 250, 350], rate=[1, 2, 3, 4])\n\n    def test_slice_trim_right(self):\n        a = tskit.RateMap(position=[0, 100, 200, 300, 400], rate=[1, 2, 3, 4])\n        b = a.slice(right=300, trim=True)\n        assert b == tskit.RateMap(position=[0, 100, 200, 300], rate=[1, 2, 3])\n        b = a.slice(right=350, trim=True)\n        assert b == tskit.RateMap(position=[0, 100, 200, 300, 350], rate=[1, 2, 3, 4])\n\n    def test_slice_error(self):\n        recomb_map = tskit.RateMap(position=[0, 100], rate=[1])\n        with pytest.raises(KeyError):\n            recomb_map.slice(left=-1)\n        with pytest.raises(KeyError):\n            recomb_map.slice(right=-1)\n        with pytest.raises(KeyError):\n            recomb_map.slice(left=200)\n        with pytest.raises(KeyError):\n            recomb_map.slice(right=200)\n        with pytest.raises(KeyError):\n            recomb_map.slice(left=20, right=10)\n\n\nclass TestReadHapmap:\n    def test_read_hapmap_simple(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 1 x 0\n            chr1 2 x 0.000001 x\n            chr1 3 x 0.000006 x x\"\"\"\n        )\n        rm = tskit.RateMap.read_hapmap(hapfile)\n        assert_array_equal(rm.position, [0, 1, 2, 3])\n        assert np.allclose(rm.rate, [np.nan, 1e-8, 5e-8], equal_nan=True)\n\n    def test_read_hapmap_from_filename(self, tmp_path):\n        with open(tmp_path / \"hapfile.txt\", \"w\") as hapfile:\n            hapfile.write(\n                \"\"\"\\\n                HEADER\n                chr1 1 x 0\n                chr1 2 x 0.000001 x\n                chr1 3 x 0.000006 x x\"\"\"\n            )\n        rm = tskit.RateMap.read_hapmap(tmp_path / \"hapfile.txt\")\n        assert_array_equal(rm.position, [0, 1, 2, 3])\n        assert np.allclose(rm.rate, [np.nan, 1e-8, 5e-8], equal_nan=True)\n\n    @pytest.mark.filterwarnings(\"ignore:loadtxt\")\n    def test_read_hapmap_empty(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\"\"\"\n        )\n        with pytest.raises(ValueError, match=\"Empty\"):\n            tskit.RateMap.read_hapmap(hapfile)\n\n    def test_read_hapmap_col_pos(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            0 0\n            0.000001 1 x\n            0.000006 2 x x\"\"\"\n        )\n        rm = tskit.RateMap.read_hapmap(hapfile, position_col=1, map_col=0)\n        assert_array_equal(rm.position, [0, 1, 2])\n        assert np.allclose(rm.rate, [1e-8, 5e-8])\n\n    def test_read_hapmap_map_and_rate(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 0 0 0\n            chr1 1 1 0.000001 x\n            chr1 2 2 0.000006 x x\"\"\"\n        )\n        with pytest.raises(ValueError, match=\"both rate_col and map_col\"):\n            tskit.RateMap.read_hapmap(hapfile, rate_col=2, map_col=3)\n\n    def test_read_hapmap_duplicate_pos(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            0 0\n            0.000001 1 x\n            0.000006 2 x x\"\"\"\n        )\n        with pytest.raises(ValueError, match=\"same columns\"):\n            tskit.RateMap.read_hapmap(hapfile, map_col=1)\n\n    def test_read_hapmap_nonzero_rate_start(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 1 5 x\n            chr1 2 0 x x x\"\"\"\n        )\n        rm = tskit.RateMap.read_hapmap(hapfile, rate_col=2)\n        assert_array_equal(rm.position, [0, 1, 2])\n        assert_array_equal(rm.rate, [np.nan, 5e-8])\n\n    def test_read_hapmap_nonzero_rate_end(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 0 5 x\n            chr1 2 1 x x x\"\"\"\n        )\n        with pytest.raises(ValueError, match=\"last entry.*must be zero\"):\n            tskit.RateMap.read_hapmap(hapfile, rate_col=2)\n\n    def test_read_hapmap_gzipped(self, tmp_path):\n        hapfile = os.path.join(tmp_path, \"hapmap.txt.gz\")\n        with gzip.GzipFile(hapfile, \"wb\") as gzfile:\n            gzfile.write(b\"HEADER\\n\")\n            gzfile.write(b\"chr1 0 1\\n\")\n            gzfile.write(b\"chr1 1 5.5\\n\")\n            gzfile.write(b\"chr1 2 0\\n\")\n        rm = tskit.RateMap.read_hapmap(hapfile, rate_col=2)\n        assert_array_equal(rm.position, [0, 1, 2])\n        assert_array_equal(rm.rate, [1e-8, 5.5e-8])\n\n    def test_read_hapmap_nonzero_map_start(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 1 x 0.000001\n            chr1 2 x 0.000001 x\n            chr1 3 x 0.000006 x x x\"\"\"\n        )\n        rm = tskit.RateMap.read_hapmap(hapfile)\n        assert_array_equal(rm.position, [0, 1, 2, 3])\n        assert np.allclose(rm.rate, [1e-8, 0, 5e-8])\n\n    def test_read_hapmap_bad_nonzero_map_start(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 0 x 0.0000005\n            chr1 1 x 0.000001 x\n            chr1 2 x 0.000006 x x x\"\"\"\n        )\n        with pytest.raises(ValueError, match=\"start.*must be zero\"):\n            tskit.RateMap.read_hapmap(hapfile)\n\n    def test_sequence_length(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 0 x 0\n            chr1 1 x 0.000001 x\n            chr1 2 x 0.000006 x x x\"\"\"\n        )\n        # test identical seq len\n        rm = tskit.RateMap.read_hapmap(hapfile, sequence_length=2)\n        assert_array_equal(rm.position, [0, 1, 2])\n        assert np.allclose(rm.rate, [1e-8, 5e-8])\n\n        hapfile.seek(0)\n        rm = tskit.RateMap.read_hapmap(hapfile, sequence_length=10)\n        assert_array_equal(rm.position, [0, 1, 2, 10])\n        assert np.allclose(rm.rate, [1e-8, 5e-8, np.nan], equal_nan=True)\n\n    def test_bad_sequence_length(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            HEADER\n            chr1 0 x 0\n            chr1 1 x 0.000001 x\n            chr1 2 x 0.000006 x x x\"\"\"\n        )\n        with pytest.raises(ValueError, match=\"sequence_length\"):\n            tskit.RateMap.read_hapmap(hapfile, sequence_length=1.999)\n\n    def test_no_header(self):\n        data = \"\"\"\\\n            chr1 0 x 0\n            chr1 1 x 0.000001 x\n            chr1 2 x 0.000006 x x x\"\"\"\n        hapfile_noheader = io.StringIO(data)\n        hapfile_header = io.StringIO(\"chr pos rate cM\\n\" + data)\n        with pytest.raises(ValueError):\n            tskit.RateMap.read_hapmap(hapfile_header, has_header=False)\n        rm1 = tskit.RateMap.read_hapmap(hapfile_header)\n        rm2 = tskit.RateMap.read_hapmap(hapfile_noheader, has_header=False)\n        assert_array_equal(rm1.rate, rm2.rate)\n        assert_array_equal(rm1.position, rm2.position)\n\n    def test_hapmap_fragment(self):\n        hapfile = io.StringIO(\n            \"\"\"\\\n            chr pos        rate                    cM\n            1   4283592    3.79115663174456        0\n            1   4361401    0.0664276817058413      0.294986106359414\n            1   7979763   10.9082897515584         0.535345505591925\n            1   8007051    0.0976780648822495      0.833010916332456\n            1   8762788    0.0899929572085616      0.906829844052373\n            1   9477943    0.0864382908650907      0.971188757364862\n            1   9696341    4.76495005895746        0.990066707213216\n            1   9752154    0.0864316558730679      1.25601286485381\n            1   9881751    0.0                     1.26721414815999\"\"\"\n        )\n        rm1 = tskit.RateMap.read_hapmap(hapfile)\n        hapfile.seek(0)\n        rm2 = tskit.RateMap.read_hapmap(hapfile, rate_col=2)\n        assert np.allclose(rm1.position, rm2.position)\n        assert np.allclose(rm1.rate, rm2.rate, equal_nan=True)\n"
  },
  {
    "path": "python/tests/test_jit.py",
    "content": "import itertools\nimport sys\n\nimport msprime\nimport numba\nimport numpy as np\nimport numpy.testing as nt\nimport pytest\n\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.jit.numba as jit_numba\n\n\ndef test_numba_import_error():\n    # Make the modules unavailable temporarily\n    original_numba = sys.modules.get(\"numba\")\n    original_jit_numba = sys.modules.get(\"tskit.jit.numba\")\n    try:\n        if \"numba\" in sys.modules:\n            del sys.modules[\"numba\"]\n        if \"tskit.jit.numba\" in sys.modules:\n            del sys.modules[\"tskit.jit.numba\"]\n\n        # Mock numba as not available at all\n        sys.modules[\"numba\"] = None\n        with pytest.raises(ImportError, match=\"pip install numba\"):\n            import tskit.jit.numba  # noqa: F401\n    finally:\n        # Restore original modules\n        sys.modules[\"numba\"] = original_numba\n        sys.modules[\"tskit.jit.numba\"] = original_jit_numba\n\n\ndef _verify_tree_index_state(tree_index, edge_diff, tree, reverse=False):\n    assert edge_diff.interval == tree_index.interval\n\n    if reverse:\n        edge_range = range(tree_index.in_range.start, tree_index.in_range.stop, -1)\n    else:\n        edge_range = range(tree_index.in_range.start, tree_index.in_range.stop)\n\n    for edge_in_index, edge in itertools.zip_longest(edge_range, edge_diff.edges_in):\n        assert edge.id == tree_index.in_range.order[edge_in_index]\n\n    if reverse:\n        edge_range = range(tree_index.out_range.start, tree_index.out_range.stop, -1)\n    else:\n        edge_range = range(tree_index.out_range.start, tree_index.out_range.stop)\n\n    for edge_out_index, edge in itertools.zip_longest(edge_range, edge_diff.edges_out):\n        assert edge.id == tree_index.out_range.order[edge_out_index]\n\n    sites = [s.id for s in tree.sites()]\n    if len(sites) > 0:\n        assert tree_index.site_range == (min(sites), max(sites) + 1)\n    else:\n        assert tree_index.site_range[0] == tree_index.site_range[1]\n\n    muts = [m.id for m in tree.mutations()]\n    if len(muts) > 0:\n        assert tree_index.mutation_range == (min(muts), max(muts) + 1)\n    else:\n        assert tree_index.mutation_range[0] == tree_index.mutation_range[1]\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_correct_trees_forward(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n    tree_index = numba_ts.tree_index()\n    ts_edge_diffs = ts.edge_diffs()\n    tree = ts.first()\n    while tree_index.next():\n        edge_diff = next(ts_edge_diffs)\n        _verify_tree_index_state(tree_index, edge_diff, tree, reverse=False)\n        last_tree = not tree.next()\n    assert last_tree\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_correct_trees_backwards(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n    tree_index = numba_ts.tree_index()\n    ts_edge_diffs = ts.edge_diffs(direction=tskit.REVERSE)\n    tree = ts.last()\n    while tree_index.prev():\n        edge_diff = next(ts_edge_diffs)\n        _verify_tree_index_state(tree_index, edge_diff, tree, reverse=True)\n        last_tree = not tree.prev()\n    assert last_tree\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_correct_trees_backwards_and_forwards(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n    tree_index = numba_ts.tree_index()\n    ts_edge_diffs = ts.edge_diffs(direction=tskit.REVERSE)\n    tree = ts.last()\n    while tree_index.prev():\n        edge_diff = next(ts_edge_diffs)\n        _verify_tree_index_state(tree_index, edge_diff, tree, reverse=True)\n        last_tree = not tree.prev()\n    assert last_tree\n    tree = ts.first()\n    ts_edge_diffs = ts.edge_diffs()\n    while tree_index.next():\n        edge_diff = next(ts_edge_diffs)\n        _verify_tree_index_state(tree_index, edge_diff, tree, reverse=False)\n        last_tree = not tree.next()\n    assert last_tree\n    tree = ts.last()\n    ts_edge_diffs = ts.edge_diffs(direction=tskit.REVERSE)\n    while tree_index.prev():\n        edge_diff = next(ts_edge_diffs)\n        _verify_tree_index_state(tree_index, edge_diff, tree, reverse=True)\n        last_tree = not tree.prev()\n    assert last_tree\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_child_index_correctness(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n    child_index = numba_ts.child_index()\n    for node in range(ts.num_nodes):\n        start, stop = child_index[node]\n\n        expected_children = []\n        for edge_id in range(ts.num_edges):\n            if ts.edges_parent[edge_id] == node:\n                expected_children.append(edge_id)\n\n        if len(expected_children) == 0:\n            assert start == -1 and stop == -1\n        else:\n            assert stop > start\n            actual_children = list(range(start, stop))\n            for edge_id in actual_children:\n                assert ts.edges_parent[edge_id] == node\n            assert actual_children == expected_children\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_parent_index_correctness(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n    parent_index = numba_ts.parent_index()\n    for node in range(ts.num_nodes):\n        start, stop = parent_index.index_range[node]\n\n        expected_parents = []\n        for edge_id in range(ts.num_edges):\n            if ts.edges_child[edge_id] == node:\n                expected_parents.append(edge_id)\n\n        if len(expected_parents) == 0:\n            assert start == stop\n        else:\n            assert stop > start\n            actual_parent_edge_ids = []\n            for j in range(start, stop):\n                edge_id = parent_index.edge_index[j]\n                actual_parent_edge_ids.append(edge_id)\n                assert ts.edges_child[edge_id] == node\n            assert set(actual_parent_edge_ids) == set(expected_parents)\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_parent_index_tree_reconstruction(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n    parent_index = numba_ts.parent_index()\n\n    # Test tree reconstruction at all breakpoints\n    for tree in ts.trees():\n        position = tree.interval.left + 0.5 * tree.span\n        reconstructed_parent = np.full(ts.num_nodes, -1, dtype=np.int32)\n        for node in range(ts.num_nodes):\n            start, stop = parent_index.index_range[node]\n            if start != -1:\n                for j in range(start, stop):\n                    edge_id = parent_index.edge_index[j]\n                    if ts.edges_left[edge_id] <= position < ts.edges_right[edge_id]:\n                        reconstructed_parent[node] = ts.edges_parent[edge_id]\n                        break\n        expected_parent = tree.parent_array\n\n        # Compare parent arrays (excluding virtual root)\n        nt.assert_array_equal(\n            reconstructed_parent,\n            expected_parent[:-1],\n        )\n\n\ndef test_child_parent_index_from_jit_function():\n    ts = msprime.sim_ancestry(\n        samples=10, sequence_length=100, recombination_rate=1, random_seed=42\n    )\n\n    @numba.njit\n    def _count_children_parents_numba(numba_ts):\n        child_index = numba_ts.child_index()\n        parent_index = numba_ts.parent_index()\n\n        total_child_edges = 0\n        total_parent_edges = 0\n\n        for node in range(numba_ts.num_nodes):\n            # Count child edges\n            child_start, child_stop = child_index[node]\n            if child_start != -1:\n                total_child_edges += child_stop - child_start\n\n            # Count parent edges\n            parent_start, parent_stop = parent_index.index_range[node]\n            if parent_start != -1:\n                total_parent_edges += parent_stop - parent_start\n\n        return total_child_edges, total_parent_edges\n\n    def count_children_parents_python(ts):\n        total_child_edges = 0\n        total_parent_edges = 0\n\n        for node in range(ts.num_nodes):\n            # Count child edges\n            for edge in ts.edges():\n                if edge.parent == node:\n                    total_child_edges += 1\n                if edge.child == node:\n                    total_parent_edges += 1\n\n        return total_child_edges, total_parent_edges\n\n    numba_ts = jit_numba.jitwrap(ts)\n    numba_result = _count_children_parents_numba(numba_ts)\n    python_result = count_children_parents_python(ts)\n\n    assert numba_result == python_result\n\n\ndef test_using_tree_index_from_jit_function():\n    # Test we can use from a numba jitted function\n\n    ts = msprime.sim_ancestry(\n        samples=10, sequence_length=100, recombination_rate=1, random_seed=42\n    )\n\n    @numba.njit\n    def _coalescent_nodes_numba(numba_ts, num_nodes, edges_parent):\n        is_coalescent = np.zeros(num_nodes, dtype=np.int8)\n        num_children = np.zeros(num_nodes, dtype=np.int64)\n        tree_index = numba_ts.tree_index()\n        while tree_index.next():\n            for j in range(tree_index.out_range.start, tree_index.out_range.stop):\n                e = tree_index.out_range.order[j]\n                num_children[edges_parent[e]] -= 1\n            for j in range(tree_index.in_range.start, tree_index.in_range.stop):\n                e = tree_index.in_range.order[j]\n                p = edges_parent[e]\n                num_children[p] += 1\n                if num_children[p] == 2:\n                    is_coalescent[p] = True\n        return is_coalescent\n\n    def coalescent_nodes_python(ts):\n        is_coalescent = np.zeros(ts.num_nodes, dtype=bool)\n        num_children = np.zeros(ts.num_nodes, dtype=int)\n        for _, edges_out, edges_in in ts.edge_diffs():\n            for e in edges_out:\n                num_children[e.parent] -= 1\n            for e in edges_in:\n                num_children[e.parent] += 1\n                if num_children[e.parent] == 2:\n                    # Num_children will always be exactly two once, even arity is greater\n                    is_coalescent[e.parent] = True\n        return is_coalescent\n\n    numba_ts = jit_numba.jitwrap(ts)\n    C1 = coalescent_nodes_python(ts)\n    C2 = _coalescent_nodes_numba(numba_ts, ts.num_nodes, ts.edges_parent)\n\n    nt.assert_array_equal(C1, C2)\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_jit_diversity(ts):\n    if ts.num_samples < 1:\n        pytest.skip(\n            \"Tree sequence must have at least one sample for diversity calculation\"\n        )\n\n    @numba.njit\n    def diversity(numba_ts):\n        # Cache arrays to avoid repeated attribute access in\n        # tight loops\n        edge_child = numba_ts.edges_child\n        edge_parent = numba_ts.edges_parent\n        node_times = numba_ts.nodes_time\n        node_flags = numba_ts.nodes_flags\n\n        if numba_ts.num_samples <= 1:\n            return 0.0\n\n        parent = np.full(numba_ts.num_nodes, -1, dtype=np.int32)\n        branch_length = np.zeros(numba_ts.num_nodes, dtype=np.float64)\n        state = np.zeros(numba_ts.num_nodes, dtype=np.int32)\n        summary = np.zeros(numba_ts.num_nodes, dtype=np.float64)\n\n        n = float(numba_ts.num_samples)\n        two_over_denom = 2.0 / (n * (n - 1.0))\n        sample_summary = 2.0 / n\n\n        # Retrieve this constant outside the loop\n        # to avoid repeated attribute access\n        NODE_IS_SAMPLE = tskit.NODE_IS_SAMPLE\n        # Find the sample nodes and initialize their states\n        for node in range(numba_ts.num_nodes):\n            if node_flags[node] & NODE_IS_SAMPLE:\n                state[node] = 1.0\n                summary[node] = sample_summary\n\n        result = 0.0\n        running_sum = 0.0\n        tree_index = numba_ts.tree_index()\n\n        # Now iterate through the trees\n        while tree_index.next():\n            # Process the outgoing edges\n            for j in range(tree_index.out_range.start, tree_index.out_range.stop):\n                h = tree_index.out_range.order[j]\n                child = edge_child[h]\n                child_parent = edge_parent[h]\n\n                running_sum -= branch_length[child] * summary[child]\n                parent[child] = -1\n                branch_length[child] = 0.0\n\n                u = child_parent\n                parent_u = parent[u]\n                while u != -1:\n                    running_sum -= branch_length[u] * summary[u]\n                    state[u] -= state[child]\n                    summary[u] = state[u] * (n - state[u]) * two_over_denom\n                    running_sum += branch_length[u] * summary[u]\n                    u = parent_u\n                    if u != -1:\n                        parent_u = parent[u]\n\n            # Process the incoming edges\n            for j in range(tree_index.in_range.start, tree_index.in_range.stop):\n                h = tree_index.in_range.order[j]\n                child = edge_child[h]\n                child_parent = edge_parent[h]\n\n                parent[child] = child_parent\n                branch_length[child] = node_times[child_parent] - node_times[child]\n                running_sum += branch_length[child] * summary[child]\n\n                u = child_parent\n                parent_u = parent[u]\n                while u != -1:\n                    running_sum -= branch_length[u] * summary[u]\n                    state[u] += state[child]\n                    summary[u] = state[u] * (n - state[u]) * two_over_denom\n                    running_sum += branch_length[u] * summary[u]\n                    u = parent_u\n                    if u != -1:\n                        parent_u = parent[u]\n\n            result += running_sum * (tree_index.interval[1] - tree_index.interval[0])\n\n        return result / numba_ts.sequence_length\n\n    numba_ts = jit_numba.jitwrap(ts)\n    diversity_numba = diversity(numba_ts)\n    diversity_python = ts.diversity(mode=\"branch\")\n\n    assert diversity_numba == pytest.approx(diversity_python, rel=1e-5)\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_jitwrap_properties(ts):\n    numba_ts = jit_numba.jitwrap(ts)\n\n    assert numba_ts.num_trees == ts.num_trees\n    assert numba_ts.num_edges == ts.num_edges\n    assert numba_ts.sequence_length == ts.sequence_length\n    assert numba_ts.num_nodes == ts.num_nodes\n    assert numba_ts.num_samples == ts.num_samples\n    assert numba_ts.num_sites == ts.num_sites\n    assert numba_ts.num_mutations == ts.num_mutations\n\n    nt.assert_array_equal(numba_ts.edges_left, ts.edges_left)\n    nt.assert_array_equal(numba_ts.edges_right, ts.edges_right)\n    nt.assert_array_equal(numba_ts.edges_parent, ts.edges_parent)\n    nt.assert_array_equal(numba_ts.edges_child, ts.edges_child)\n    assert numba_ts.edges_left.dtype == np.float64\n    assert numba_ts.edges_right.dtype == np.float64\n    assert numba_ts.edges_parent.dtype == np.int32\n    assert numba_ts.edges_child.dtype == np.int32\n    nt.assert_array_equal(numba_ts.nodes_time, ts.nodes_time)\n    nt.assert_array_equal(numba_ts.nodes_flags, ts.nodes_flags)\n    nt.assert_array_equal(numba_ts.nodes_population, ts.nodes_population)\n    nt.assert_array_equal(numba_ts.nodes_individual, ts.nodes_individual)\n    assert numba_ts.nodes_time.dtype == np.float64\n    assert numba_ts.nodes_flags.dtype == np.uint32\n    assert numba_ts.nodes_population.dtype == np.int32\n    assert numba_ts.nodes_individual.dtype == np.int32\n    nt.assert_array_equal(numba_ts.individuals_flags, ts.individuals_flags)\n    assert numba_ts.individuals_flags.dtype == np.uint32\n    nt.assert_array_equal(numba_ts.sites_position, ts.sites_position)\n    assert numba_ts.sites_position.dtype == np.float64\n    nt.assert_array_equal(numba_ts.sites_ancestral_state, ts.sites_ancestral_state)\n    assert numba_ts.sites_ancestral_state.dtype.kind == \"U\"  # Unicode string\n    nt.assert_array_equal(numba_ts.mutations_site, ts.mutations_site)\n    nt.assert_array_equal(numba_ts.mutations_node, ts.mutations_node)\n    nt.assert_array_equal(numba_ts.mutations_parent, ts.mutations_parent)\n    nt.assert_array_equal(numba_ts.mutations_time, ts.mutations_time)\n    assert numba_ts.mutations_site.dtype == np.int32\n    assert numba_ts.mutations_node.dtype == np.int32\n    assert numba_ts.mutations_parent.dtype == np.int32\n    assert numba_ts.mutations_time.dtype == np.float64\n    nt.assert_array_equal(numba_ts.mutations_derived_state, ts.mutations_derived_state)\n    assert numba_ts.mutations_derived_state.dtype.kind == \"U\"  # Unicode string\n    nt.assert_array_equal(\n        numba_ts.mutations_inherited_state, ts.mutations_inherited_state\n    )\n    assert numba_ts.mutations_inherited_state.dtype.kind == \"U\"  # Unicode string\n    nt.assert_array_equal(\n        numba_ts.indexes_edge_insertion_order, ts.indexes_edge_insertion_order\n    )\n    nt.assert_array_equal(\n        numba_ts.indexes_edge_removal_order, ts.indexes_edge_removal_order\n    )\n    assert numba_ts.indexes_edge_insertion_order.dtype == np.int32\n    assert numba_ts.indexes_edge_removal_order.dtype == np.int32\n    assert numba_ts.breakpoints.dtype == np.float64\n    nt.assert_array_equal(numba_ts.breakpoints, ts.breakpoints(as_array=True))\n\n\ndef test_numba_edge_range():\n\n    order = np.array([1, 3, 2, 0], dtype=np.int32)\n    edge_range = jit_numba.EdgeRange(start=1, stop=3, order=order)\n\n    assert edge_range.start == 1\n    assert edge_range.stop == 3\n    nt.assert_array_equal(edge_range.order, order)\n\n\ndef test_numba_tree_index_set_null(ts_fixture):\n\n    numba_ts = jit_numba.jitwrap(ts_fixture)\n    tree_index = numba_ts.tree_index()\n\n    # Move to a valid position first\n    tree_index.next()\n    initial_interval = tree_index.interval\n    assert tree_index.index != -1\n    assert initial_interval != (0, 0)\n\n    # Test set_null\n    tree_index.set_null()\n    assert tree_index.index == -1\n    assert tree_index.interval == (0, 0)\n\n\ndef test_numba_tree_index_constants(ts_fixture):\n\n    numba_ts = jit_numba.jitwrap(ts_fixture)\n    tree_index = numba_ts.tree_index()\n\n    # Initial direction should be 0\n    assert tree_index.direction == tskit.NULL\n\n    # After next(), direction should be FORWARD\n    tree_index.next()\n    assert tree_index.direction == jit_numba.FORWARD\n    assert tree_index.direction == 1\n\n    # After prev(), direction should be REVERSE\n    tree_index.prev()\n    assert tree_index.direction == jit_numba.REVERSE\n    assert tree_index.direction == -1\n\n    # Test mixed direction\n    tree_index.set_null()\n    tree_index.prev()\n    assert tree_index.direction == jit_numba.REVERSE\n    tree_index.next()\n    assert tree_index.direction == jit_numba.FORWARD\n\n\ndef test_numba_tree_index_edge_cases():\n\n    # Test with empty tree sequence\n    tables = tskit.TableCollection(sequence_length=1.0)\n    empty_ts = tables.tree_sequence()\n    numba_ts = jit_numba.jitwrap(empty_ts)\n    tree_index = numba_ts.tree_index()\n\n    # Should have exactly one tree\n    assert tree_index.next()\n    assert tree_index.index == 0\n    assert tree_index.interval == (0.0, 1.0)\n    assert not tree_index.next()  # No more trees\n    assert tree_index.index == -1\n\n    # Test with single tree (with edges)\n    ts = msprime.sim_ancestry(samples=2, random_seed=42)  # No recombination\n    numba_ts = jit_numba.jitwrap(ts)\n    tree_index = numba_ts.tree_index()\n\n    # Should have exactly one tree\n    assert tree_index.next()\n    assert tree_index.index == 0\n    assert not tree_index.next()  # No more trees\n    assert tree_index.index == -1\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_jit_descendant_span(ts):\n    if ts.num_nodes == 0:\n        pytest.skip(\"Tree sequence must have at least one node\")\n\n    @numba.njit\n    def descendant_span(numba_ts, u):\n        child_index = numba_ts.child_index()\n        edges_left = numba_ts.edges_left\n        edges_right = numba_ts.edges_right\n        edges_child = numba_ts.edges_child\n\n        total_descending = np.zeros(numba_ts.num_nodes)\n        stack = [(u, 0.0, numba_ts.sequence_length)]\n\n        while len(stack) > 0:\n            node, left, right = stack.pop()\n            total_descending[node] += right - left\n\n            # Find all child edges for this node\n            for e in range(child_index[node, 0], child_index[node, 1]):\n                e_left = edges_left[e]\n                e_right = edges_right[e]\n\n                # Check if edge overlaps with current interval\n                if e_right > left and right > e_left:\n                    inter_left = max(e_left, left)\n                    inter_right = min(e_right, right)\n                    e_child = edges_child[e]\n                    stack.append((e_child, inter_left, inter_right))\n\n        return total_descending\n\n    def descendant_span_tree(ts, u):\n        total_descending = np.zeros(ts.num_nodes)\n        for tree in ts.trees():\n            descendants = tree.preorder(u)\n            total_descending[descendants] += tree.span\n        return total_descending\n\n    numba_ts = jit_numba.jitwrap(ts)\n    for u in range(ts.num_nodes):\n        d1 = descendant_span(numba_ts, u)\n        d2 = descendant_span_tree(ts, u)\n        nt.assert_array_almost_equal(d1, d2, decimal=10)\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_jit_descendant_edges(ts):\n    if ts.num_nodes == 0:\n        pytest.skip(\"Tree sequence must have at least one node\")\n\n    @numba.njit\n    def descendant_edges(numba_ts, u):\n        \"\"\"\n        Returns a boolean array which is only True for edges that\n        are descendants of node u.\n        \"\"\"\n        edge_select = np.zeros(numba_ts.num_edges, dtype=np.bool_)\n        child_index = numba_ts.child_index()\n        edges_left = numba_ts.edges_left\n        edges_right = numba_ts.edges_right\n        edges_child = numba_ts.edges_child\n\n        # The stack stores (node_id, left_coord, right_coord)\n        stack = [(u, 0.0, numba_ts.sequence_length)]\n\n        while len(stack) > 0:\n            node, left, right = stack.pop()\n\n            start, stop = child_index[node]\n            for e in range(start, stop):\n                e_left = edges_left[e]\n                e_right = edges_right[e]\n\n                if e_right > left and right > e_left:\n                    edge_select[e] = True\n                    inter_left = max(e_left, left)\n                    inter_right = min(e_right, right)\n                    e_child = edges_child[e]\n                    stack.append((e_child, inter_left, inter_right))\n\n        return edge_select\n\n    def descendant_edges_tskit(ts, start_node):\n        D = np.zeros(ts.num_edges, dtype=bool)\n        for tree in ts.trees():\n            for v in tree.preorder(start_node):\n                # We want the edges *below* the start_node, so we skip the node itself.\n                if v != start_node:\n                    D[tree.edge(v)] = True\n        return D\n\n    numba_ts = jit_numba.jitwrap(ts)\n    for u in range(ts.num_nodes):\n        d1 = descendant_edges(numba_ts, u)\n        d2 = descendant_edges_tskit(ts, u)\n        nt.assert_array_equal(d1, d2)\n\n\n@pytest.mark.parametrize(\"ts\", tsutil.get_example_tree_sequences())\ndef test_jit_ancestral_edges(ts):\n    if ts.num_nodes == 0:\n        pytest.skip(\"Tree sequence must have at least one node\")\n\n    @numba.njit\n    def ancestral_edges(numba_ts, u):\n        \"\"\"\n        Returns a boolean array which is only True for edges that are\n        ancestors of node u.\n        \"\"\"\n        edge_select = np.zeros(numba_ts.num_edges, dtype=np.bool_)\n        parent_index = numba_ts.parent_index()\n        edges_left = numba_ts.edges_left\n        edges_right = numba_ts.edges_right\n        edges_parent = numba_ts.edges_parent\n\n        # The stack stores (node_id, left_coord, right_coord)\n        stack = [(u, 0.0, numba_ts.sequence_length)]\n\n        while len(stack) > 0:\n            node, left, right = stack.pop()\n\n            start, stop = parent_index.index_range[node]\n            for i in range(start, stop):\n                e = parent_index.edge_index[i]\n                e_left = edges_left[e]\n                e_right = edges_right[e]\n\n                if e_right > left and right > e_left:\n                    edge_select[e] = True\n                    inter_left = max(e_left, left)\n                    inter_right = min(e_right, right)\n                    e_parent = edges_parent[e]\n                    stack.append((e_parent, inter_left, inter_right))\n\n        return edge_select\n\n    def ancestral_edges_tskit(ts, start_node):\n        A = np.zeros(ts.num_edges, dtype=bool)\n        for tree in ts.trees():\n            curr_node = start_node\n            parent = tree.parent(curr_node)\n            while parent != tskit.NULL:\n                edge_id = tree.edge(curr_node)\n                A[edge_id] = True\n                curr_node = parent\n                parent = tree.parent(curr_node)\n        return A\n\n    numba_ts = jit_numba.jitwrap(ts)\n    for u in range(ts.num_nodes):\n        a1 = ancestral_edges(numba_ts, u)\n        a2 = ancestral_edges_tskit(ts, u)\n        nt.assert_array_equal(a1, a2)\n"
  },
  {
    "path": "python/tests/test_ld_matrix.py",
    "content": "# MIT License\n#\n# Copyright (c) 2023-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for two-locus statistics\n\"\"\"\n\nimport contextlib\nimport io\nfrom collections.abc import Callable, Generator\nfrom dataclasses import dataclass\nfrom itertools import combinations_with_replacement, permutations, product\nfrom typing import Any\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\nimport tskit.util as util\nfrom tests import tsutil\nfrom tests.tsutil import get_example_tree_sequences\n\n\n@contextlib.contextmanager\ndef suppress_overflow_div0_warning():\n    with np.errstate(over=\"ignore\", invalid=\"ignore\", divide=\"ignore\"):\n        yield\n\n\nclass BitSet:\n    \"\"\"BitSet object, which stores values in arrays of unsigned integers.\n    The rows represent all possible values a bit can take, and the rows\n    represent each item that can be stored in the array.\n\n    :param num_bits: The number of values that a single row can contain.\n    :param length: The number of rows.\n    \"\"\"\n\n    DTYPE = np.uint32  # Data type to be stored in the bitset\n    CHUNK_SIZE = DTYPE(32)  # Size of integer field to store the data in\n\n    def __init__(self: \"BitSet\", num_bits: int, length: int) -> None:\n        self.row_len = num_bits // self.CHUNK_SIZE\n        self.row_len += 1 if num_bits % self.CHUNK_SIZE else 0\n        self.row_len = int(self.row_len)\n        self.data = np.zeros(self.row_len * length, dtype=self.DTYPE)\n\n    def intersect(\n        self: \"BitSet\", self_row: int, other: \"BitSet\", other_row: int, out: \"BitSet\"\n    ) -> None:\n        \"\"\"Intersect a row from the current array instance with a row from\n        another BitSet and store it in an output bit array of length 1.\n\n        NB: we don't specify the row in the output array, it is expected\n        to be length 1.\n\n        :param self_row: Row from the current array instance to be intersected.\n        :param other: Other BitSet to intersect with.\n        :param other_row: Row from the other BitSet instance.\n        :param out: BitArray to store the result.\n        \"\"\"\n        self_offset = self_row * self.row_len\n        other_offset = other_row * self.row_len\n\n        for i in range(self.row_len):\n            out.data[i] = self.data[i + self_offset] & other.data[i + other_offset]\n\n    def difference(\n        self: \"BitSet\", self_row: int, other: \"BitSet\", other_row: int\n    ) -> None:\n        \"\"\"Take the difference between the current array instance and another\n        array instance. Store the result in the specified row of the current\n        instance.\n\n        :param self_row: Row from the current array from which to subtract.\n        :param other: Other BitSet to subtract from the current instance.\n        :param other_row: Row from the other BitSet instance.\n        \"\"\"\n        self_offset = self_row * self.row_len\n        other_offset = other_row * self.row_len\n\n        for i in range(self.row_len):\n            self.data[i + self_offset] &= ~(other.data[i + other_offset])\n\n    def union(self: \"BitSet\", self_row: int, other: \"BitSet\", other_row: int) -> None:\n        \"\"\"Take the union between the current array instance and another\n        array instance. Store the result in the specified row of the current\n        instance.\n\n        :param self_row: Row from the current array with which to union.\n        :param other: Other BitSet to union with the current instance.\n        :param other_row: Row from the other BitSet instance.\n        \"\"\"\n        self_offset = self_row * self.row_len\n        other_offset = other_row * self.row_len\n\n        for i in range(self.row_len):\n            self.data[i + self_offset] |= other.data[i + other_offset]\n\n    def add(self: \"BitSet\", row: int, bit: int) -> None:\n        \"\"\"Add a single bit to the row of a bit array\n\n        :param row: Row to be modified.\n        :param bit: Bit to be added.\n        \"\"\"\n        offset = row * self.row_len\n        i = bit // self.CHUNK_SIZE\n        self.data[i + offset] |= self.DTYPE(1) << (bit - (self.CHUNK_SIZE * i))\n\n    def get_items(self: \"BitSet\", row: int) -> Generator[int, None, None]:\n        \"\"\"Get the items stored in the row of a bitset\n        Uses a de Bruijn sequence lookup table to determine the lowest bit set.\n        See the wikipedia article for more info: https://w.wiki/BYiF\n\n        :param row: Row from the array to list from.\n        :returns: A generator of integers stored in the array.\n        \"\"\"\n        lookup = [0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27,\n                  13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]  # fmt: skip\n        m = np.uint32(125613361)\n        offset = row * self.row_len\n        for i in range(self.row_len):\n            v = self.data[i + offset]\n            if v == 0:\n                continue\n            else:\n                # v & -v operations rely on integer overflow\n                with np.errstate(over=\"ignore\"):\n                    lsb = v & -v  # isolate the least significant bit\n                    while lsb:  # while there are bits remaining\n                        yield lookup[(lsb * m) >> 27] + (i * self.CHUNK_SIZE)\n                        v ^= lsb  # unset the lsb\n                        lsb = v & -v\n\n    def contains(self: \"BitSet\", row: int, bit: int) -> bool:\n        \"\"\"Test if a bit is contained within a bit array row\n\n        :param row: Row to test.\n        :param bit: Bit to check.\n        :returns: True if the bit is set in the row, else false.\n        \"\"\"\n        i = bit // self.CHUNK_SIZE\n        offset = row * self.row_len\n        return bool(\n            self.data[i + offset] & (self.DTYPE(1) << (bit - (self.CHUNK_SIZE * i)))\n        )\n\n    def count(self: \"BitSet\", row: int) -> int:\n        \"\"\"Count all of the set bits in a specified row. Uses a SWAR\n        algorithm to count in parallel with a constant number (12) of operations.\n\n        NB: we have to cast all values to our unsigned dtype to avoid type promotion\n\n        Details here:\n        # https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel\n\n        :param row: Row to count.\n        :returns: Count of all of the set bits.\n        \"\"\"\n        count = 0\n        offset = row * self.row_len\n        D = self.DTYPE\n\n        for i in range(offset, offset + self.row_len):\n            v = self.data[i]\n            v = v - ((v >> D(1)) & D(0x55555555))\n            v = (v & D(0x33333333)) + ((v >> D(2)) & D(0x33333333))\n            # this operation relies on integer overflow\n            with np.errstate(over=\"ignore\"):\n                count += ((v + (v >> D(4)) & D(0xF0F0F0F)) * D(0x1010101)) >> D(24)\n\n        return count\n\n    def count_naive(self: \"BitSet\", row: int) -> int:\n        \"\"\"Naive counting algorithm implementing the same functionality as the count\n        method. Useful for testing correctness, uses the same number of operations\n        as set bits.\n\n        Details here:\n        # https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive\n\n        :param row: Row to count.\n        :returns: Count of all of the set bits.\n        \"\"\"\n        count = 0\n        offset = row * self.row_len\n\n        for i in range(offset, offset + self.row_len):\n            v = self.data[i]\n            while v:\n                v &= v - self.DTYPE(1)\n                count += self.DTYPE(1)\n        return int(count)\n\n\ndef norm_hap_weighted(\n    result_dim: int,\n    hap_weights: np.ndarray,\n    n_a: int,\n    n_b: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    \"\"\"Create a vector of normalizing coefficients, length of the number of\n    sample sets. In this normalization strategy, we weight each allele's\n    statistic by the proportion of the haplotype present.\n\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param hap_weights: Proportion of each two-locus haplotype.\n    :param n_a: Number of alleles at the A locus.\n    :param n_b: Number of alleles at the B locus.\n    :param result: Result vector to store the normalizing coefficients in.\n    :param params: Params of summary function.\n    \"\"\"\n    del n_a, n_b  # handle unused params\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(result_dim):\n        n = sample_set_sizes[k]\n        result[k] = hap_weights[0, k] / n\n\n\ndef norm_hap_weighted_ij(\n    result_dim: int,\n    hap_weights: np.ndarray,\n    n_a: int,\n    n_b: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    \"\"\"\n    Create a vector of normalizing coefficients, length of the number of\n    index tuples. Each allele's statistic will be weighted by the average\n    of the proportion of AB haplotypes in each population present in the\n    index tuple.\n\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param hap_weights: Proportion of each two-locus haplotype.\n    :param n_a: Number of alleles at the A locus.\n    :param n_b: Number of alleles at the B locus.\n    :param result: Result vector to store the normalizing coefficients in.\n    :param params: Params of summary function.\n    \"\"\"\n    del n_a, n_b  # handle unused params\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    set_indexes = params[\"set_indexes\"]\n\n    for k in range(result_dim):\n        i = set_indexes[k][0]\n        j = set_indexes[k][1]\n        ni = sample_set_sizes[i]\n        nj = sample_set_sizes[j]\n        wAB_i = hap_weights[0, i]\n        wAB_j = hap_weights[0, j]\n        result[k] = (wAB_i + wAB_j) / (ni + nj)\n        # result[k] = (wAB_i / ni / 2) + (wAB_j / nj / 2)\n\n\ndef norm_total_weighted(\n    result_dim: int,\n    hap_weights: np.ndarray,\n    n_a: int,\n    n_b: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    \"\"\"Create a vector of normalizing coefficients, length of the number of\n    sample sets. In this normalization strategy, we weight each allele's\n    statistic by the product of the allele frequencies\n\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param hap_weights: Proportion of each two-locus haplotype.\n    :param n_a: Number of alleles at the A locus.\n    :param n_b: Number of alleles at the B locus.\n    :param result: Result vector to store the normalizing coefficients in.\n    :param params: Params of summary function.\n    \"\"\"\n    del hap_weights, params  # handle unused params\n    for k in range(result_dim):\n        result[k] = 1 / (n_a * n_b)\n\n\ndef check_order_bounds_dups(values, max_value):\n    \"\"\"Validate the specified values.\n\n    We require that values are:\n\n    1) Within the boundaries of the max value in the tree sequence\n    2) Sorted\n    3) Non-repeating\n\n    Raises an exception if any error is found.\n\n    :param values: 1d array of values to validate.\n    :param max_value: The upper bound for the provided values.\n    \"\"\"\n    if len(values) == 0:\n        return\n    i = 0\n    for i in range(len(values) - 1):\n        if values[i] < 0 or values[i] >= max_value:\n            raise ValueError(f\"Value out of bounds: {values[i]}\")\n        if values[i] >= values[i + 1]:\n            raise ValueError(f\"Value not sorted: {values[i], values[i + 1]}\")\n    if values[-1] < 0 or values[-1] >= max_value:\n        raise ValueError(f\"Value out of bounds: {values[i + 1]}\")\n\n\ndef get_site_row_col_indices(\n    row_sites: np.ndarray, col_sites: np.ndarray\n) -> tuple[list[int], list[int], list[int]]:\n    \"\"\"Co-iterate over the row and column sites, keeping a sorted union of\n    site values and an index into the unique list of sites for both the row\n    and column sites. This function produces a list of sites of interest and\n    row and column indexes into this list of sites.\n\n    NB: This routine requires that the site lists are sorted and deduplicated.\n\n    :param row_sites: List of sites that will be represented in the output\n                      matrix rows.\n    :param col_sites: List of sites that will be represented in the output\n                      matrix columns.\n    :returns: Tuple of lists of sites, row, and column indices.\n    \"\"\"\n    r = 0\n    c = 0\n    s = 0\n    sites = []\n    col_idx = []\n    row_idx = []\n\n    while r < len(row_sites) and c < len(col_sites):\n        if row_sites[r] < col_sites[c]:\n            sites.append(row_sites[r])\n            row_idx.append(s)\n            s += 1\n            r += 1\n        elif row_sites[r] > col_sites[c]:\n            sites.append(col_sites[c])\n            col_idx.append(s)\n            s += 1\n            c += 1\n        else:\n            sites.append(row_sites[r])\n            row_idx.append(s)\n            col_idx.append(s)\n            s += 1\n            r += 1\n            c += 1\n    while r < len(row_sites):\n        sites.append(row_sites[r])\n        row_idx.append(s)\n        s += 1\n        r += 1\n    while c < len(col_sites):\n        sites.append(col_sites[c])\n        col_idx.append(s)\n        s += 1\n        c += 1\n\n    return sites, row_idx, col_idx\n\n\ndef get_all_samples_bits(num_samples: int) -> BitSet:\n    \"\"\"Get the bits for all samples in the tree sequence. This is achieved\n    by creating a length 1 bitset and adding every sample's bit to it.\n\n    :param num_samples: Number of samples contained in the tree sequence.\n    :returns: Length 1 BitSet containing all samples in the tree sequence.\n    \"\"\"\n    all_samples = BitSet(num_samples, 1)\n    for i in range(num_samples):\n        all_samples.add(0, i)\n    return all_samples\n\n\ndef get_allele_samples(\n    site: tskit.Site, site_offset: int, mut_samples: BitSet, allele_samples: BitSet\n) -> int:\n    \"\"\"Given a BitSet that has been arranged so that we have every sample under\n    a given mutation's node, create the final output where we know which samples\n    should belong under each mutation, considering the mutation's parentage,\n    back mutations, and ancestral state.\n\n    To this end, we iterate over each mutation and store the samples under the\n    focal mutation in the output BitSet (allele_samples). Then, we check the\n    parent of the focal mutation (either a mutation or the ancestral allele),\n    and we subtract the samples in the focal mutation from the parent allele's\n    samples.\n\n    :param site: Focal site for which to adjust mutation data.\n    :param site_offset: Offset into allele_samples for our focal site.\n    :param mut_samples: BitSet containing the samples under each mutation in the\n                        focal site.\n    :param allele_samples: Output BitSet, initially passed in with all of the\n                           tree sequence samples set in the ancestral allele\n                           state.\n    :returns: number of alleles actually encountered (adjusting for back-mutation).\n    \"\"\"\n    alleles = []\n    num_alleles = 1\n    alleles.append(site.ancestral_state)\n\n    for m, mut in enumerate(site.mutations):\n        try:\n            allele = alleles.index(mut.derived_state)\n        except ValueError:\n            allele = len(alleles)\n            alleles.append(mut.derived_state)\n            num_alleles += 1\n        allele_samples.union(allele + site_offset, mut_samples, m)\n        # now to find the parent allele from which we must subtract\n        alt_allele_state = site.ancestral_state\n        if mut.parent != tskit.NULL:\n            parent_mut = site.mutations[mut.parent - site.mutations[0].id]\n            alt_allele_state = parent_mut.derived_state\n        alt_allele = alleles.index(alt_allele_state)\n        # subtract focal allele's samples from the alt allele\n        allele_samples.difference(\n            alt_allele + site_offset, allele_samples, allele + site_offset\n        )\n\n    return num_alleles\n\n\ndef get_mutation_samples(\n    ts: tskit.TreeSequence, sites: list[int], sample_index_map: np.ndarray\n) -> tuple[np.ndarray, np.ndarray, BitSet]:\n    \"\"\"For a given set of sites, generate a BitSet of all samples posessing\n    each allelic state for each site. This includes the ancestral state, along\n    with any mutations contained in the site.\n\n    We achieve this goal by starting at the tree containing the first site in\n    our list, then we walk along each tree until we've encountered the last\n    tree containing the last site in our list. Along the way, we perform a\n    preorder traversal from the node of each mutation in a given site, storing\n    the samples under that particular node. After we've stored all of the samples\n    for each allele at a site, we adjust each allele's samples by removing\n    samples that have a different allele at a child mutation down the tree (see\n    get_allele_samples for more details).\n\n    We also gather some ancillary data while we iterate over the sites: the\n    number of alleles for each site, and the offset of each site. The number of\n    alleles at each site includes the count of mutations + the ancestral allele.\n    The offeset for each site indicates how many array entries we must skip (ie\n    how many alleles exist before a specific site's entry) in order to address\n    the data for a given site.\n\n    :param ts: Tree sequence to gather data from.\n    :param sites: Subset of sites to consider when gathering data.\n    :param sample_index_map: Mapping from node id to sample id\n    :returns: Tuple of the number of alleles per site, site offsets, and the\n              BitSet of all samples in each allelic state.\n    \"\"\"\n    num_alleles = np.zeros(len(sites), dtype=np.uint64)\n    site_offsets = np.zeros(len(sites), dtype=np.uint64)\n    all_samples = get_all_samples_bits(ts.num_samples)\n    allele_samples = BitSet(\n        ts.num_samples, sum(len(ts.site(i).mutations) + 1 for i in sites)\n    )\n\n    site_offset = 0\n    site_idx = 0\n    for site_idx, site_id in enumerate(sites):\n        site = ts.site(site_id)\n        tree = ts.at(site.position)\n        # initialize the ancestral allele with all samples\n        allele_samples.union(site_offset, all_samples, 0)\n        # store samples for each mutation in mut_samples\n        mut_samples = BitSet(ts.num_samples, len(site.mutations))\n        for m, mut in enumerate(site.mutations):\n            for node in tree.preorder(mut.node):\n                if ts.node(node).is_sample():\n                    mut_samples.add(m, sample_index_map[node])\n        # account for mutation parentage, subtract samples from mutation parents\n        num_alleles[site_idx] = get_allele_samples(\n            site, site_offset, mut_samples, allele_samples\n        )\n        # increment the offset for ancestral + mutation alleles\n        site_offsets[site_idx] = site_offset\n        site_offset += len(site.mutations) + 1\n\n    return num_alleles, site_offsets, allele_samples\n\n\nSummaryFunc = Callable[[int, np.ndarray, int, np.ndarray, dict[str, Any]], None]\nNormFunc = Callable[[int, np.ndarray, int, int, np.ndarray, dict[str, Any]], None]\n\n\ndef compute_general_two_site_stat_result(\n    row_site_offset: int,\n    col_site_offset: int,\n    num_row_alleles: int,\n    num_col_alleles: int,\n    num_samples: int,\n    allele_samples: BitSet,\n    state_dim: int,\n    sample_sets: BitSet,\n    result_dim: int,\n    func: SummaryFunc,\n    norm_func: NormFunc,\n    params: dict[str, Any],\n    polarised: bool,\n    result: np.ndarray,\n) -> None:\n    \"\"\"For a given pair of sites, compute the summary statistic for the allele\n    frequencies for each allelic state of the two pairs.\n\n    :param row_site_offset: Offset of the row site's data in the allele_samples.\n    :param col_site_offset: Offset of the col site's data in the allele_samples.\n    :param num_row_alleles: Number of alleles in the row site.\n    :param num_col_alleles: Number of alleles in the col site.\n    :param num_samples: Number of samples in tree sequence.\n    :param allele_samples: BitSet containing the samples with each allelic state\n                           for each site of interest.\n    :param state_dim: Number of sample sets.\n    :param sample_sets: BitSet of sample sets to be intersected with the samples\n                        contained within each allele.\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param func: Summary function used to compute each two-locus statistic.\n    :param norm_func: Function used to generate the normalization coefficients\n                      for each statistic.\n    :param params: Parameters to pass to the norm and summary function.\n    :param polarised: If true, skip the computation of the statistic for the\n                      ancestral state.\n    :param result: Vector of the results matrix to populate. We will produce one\n                   value per sample set, hence the vector of length state_dim.\n    \"\"\"\n    ss_A_samples = BitSet(num_samples, 1)\n    ss_B_samples = BitSet(num_samples, 1)\n    ss_AB_samples = BitSet(num_samples, 1)\n    AB_samples = BitSet(num_samples, 1)\n    weights = np.zeros((3, state_dim), np.float64)\n    norm = np.zeros(result_dim, np.float64)\n    result_tmp = np.zeros(result_dim, np.float64)\n\n    polarised_val = 1 if polarised else 0\n    for mut_a in range(polarised_val, num_row_alleles):\n        a = int(mut_a + row_site_offset)\n        for mut_b in range(polarised_val, num_col_alleles):\n            b = int(mut_b + col_site_offset)\n            allele_samples.intersect(a, allele_samples, b, AB_samples)\n            for k in range(state_dim):\n                allele_samples.intersect(a, sample_sets, k, ss_A_samples)\n                allele_samples.intersect(b, sample_sets, k, ss_B_samples)\n                AB_samples.intersect(0, sample_sets, k, ss_AB_samples)\n\n                w_AB = ss_AB_samples.count(0)\n                w_A = ss_A_samples.count(0)\n                w_B = ss_B_samples.count(0)\n\n                weights[0, k] = w_AB\n                weights[1, k] = w_A - w_AB  # w_Ab\n                weights[2, k] = w_B - w_AB  # w_aB\n\n            func(state_dim, weights, result_dim, result_tmp, params)\n\n            norm_func(\n                result_dim,\n                weights,\n                num_row_alleles - polarised_val,\n                num_col_alleles - polarised_val,\n                norm,\n                params,\n            )\n\n            for k in range(result_dim):\n                result[k] += result_tmp[k] * norm[k]\n\n\ndef two_site_count_stat(\n    ts: tskit.TreeSequence,\n    func: SummaryFunc,\n    norm_func: NormFunc,\n    result_dim: int,\n    num_sample_sets: int,\n    sample_set_sizes: np.ndarray,\n    sample_sets: BitSet,\n    sample_index_map: np.ndarray,\n    row_sites: np.ndarray,\n    col_sites: np.ndarray,\n    indexes: np.ndarray,\n    polarised: bool,\n) -> np.ndarray:\n    \"\"\"Outer function that generates the high-level intermediates used in the\n    computation of our two-locus statistics. First, we compute the row and\n    column indices for our unique list of sites, then we get each sample for\n    each allele in our list of specified sites.\n\n    With those intermediates in hand, we iterate over the row and column indices\n    to compute comparisons between each of the specified lists of sites. We pass\n    a vector of results to the computation, which will compute a single result\n    for each sample set, inserting that into our result matrix.\n\n    :param ts: Tree sequence to gather data from.\n    :param func: Function used to compute each two-locus statistic.\n    :param norm_func: Function used to generate the normalization coefficients\n                      for each statistic.\n    :param result_dim: The dimensions of the output array. For one-way stats,\n                       this will be the number of sample sets. For two-way stats,\n                       the number of index tuples.\n    :param num_sample_sets: Number of sample sets that we will consider.\n    :param sample_set_sizes: Number of samples in each sample set.\n    :param sample_sets: BitSet of samples to compute stats for. We will only\n                        consider these samples in our computations, resulting\n                        in stats that are computed on subsets of the samples\n                        on the tree sequence.\n    :param sample_index_map: Mapping from node id to sample id\n    :param row_sites: Sites contained in the rows of the output matrix.\n    :param col_sites: Sites contained in the columns of the output matrix.\n    :param indexes: List of sample set indexes on which to compute statistics. The\n                    arity (and hence the length of each index group) is dictated\n                    by the summary function.\n    :param polarised: If true, skip the computation of the statistic for the\n                      ancestral state.\n    :returns: 3D array of results, dimensions (sample_sets, row_sites, col_sites).\n    \"\"\"\n    params = {\"sample_set_sizes\": sample_set_sizes, \"set_indexes\": indexes}\n    result = np.zeros((result_dim, len(row_sites), len(col_sites)), dtype=np.float64)\n\n    state_dim = num_sample_sets\n\n    sites, row_idx, col_idx = get_site_row_col_indices(row_sites, col_sites)\n    num_alleles, site_offsets, allele_samples = get_mutation_samples(\n        ts, sites, sample_index_map\n    )\n\n    for row, row_site in enumerate(row_idx):\n        for col, col_site in enumerate(col_idx):\n            compute_general_two_site_stat_result(\n                site_offsets[row_site],\n                site_offsets[col_site],\n                num_alleles[row_site],\n                num_alleles[col_site],\n                ts.num_samples,\n                allele_samples,\n                state_dim,\n                sample_sets,\n                result_dim,\n                func,\n                norm_func,\n                params,\n                polarised,\n                result[:, row, col],\n            )\n\n    return result\n\n\ndef get_index_repeats(indices):\n    \"\"\"In a list of indices, find the repeat values. The first value\n    is offset by the first index and ranges to the last index.\n    For instance, [4, 4, 5, 6, 8] becomes [2, 1, 1, 0, 1].\n    The list must be sorted and ordered.\n\n    :param indices: List of indices to count\n    :returns: Counts of index repeats\n    \"\"\"\n    counts = np.zeros(indices[-1] - indices[0] + 1, dtype=np.int32)\n    idx = indices[0]\n    count = 1\n    for i in range(1, len(indices)):\n        if indices[i] == indices[i - 1]:\n            count += 1\n        else:\n            counts[idx - indices[0]] = count\n            count = 1\n            idx = indices[i]\n    counts[idx - indices[0]] = count\n    return counts\n\n\ndef two_branch_count_stat(\n    ts: tskit.TreeSequence,\n    func: SummaryFunc,\n    norm_func,\n    state_dim: int,\n    result_dim: int,\n    sample_set_sizes: np.ndarray,\n    sample_sets: BitSet,\n    sample_index_map: np.ndarray,\n    row_trees: np.ndarray,\n    col_trees: np.ndarray,\n    indexes: np.ndarray,\n    polarised: bool,\n) -> np.ndarray:\n    \"\"\"\n    Compute a tree X tree LD matrix by walking along the tree sequence and\n    computing haplotype counts. This method incrementally adds and removes\n    branches from a tree sequence and updates the stat based on sample additions\n    and removals. We bifurcate the tree with a given branch on each locus and\n    intersect the samples under each branch to produce haplotype counts. It\n    is possible to subset the output matrix with genomic positions. Positions\n    lying on the same tree will receive the same LD value in the output matrix.\n\n    :param ts: Tree sequence to gather data from.\n    :param func: Function used to compute each two-locus statistic.\n    :param norm_func: Not (YET) applicable for branch stats: TODO?\n    :param state_dim: Number of sample sets.\n    :param result_dim: The dimensions of the output array. For one-way stats,\n                       this will be the number of sample sets. For two-way stats,\n                       the number of index tuples.\n    :param sample_set_sizes: Number of samples in each sample set.\n    :param sample_sets: BitSet of samples to compute stats for. We will only\n                        consider these samples in our computations, resulting\n                        in stats that are computed on subsets of the samples\n                        on the tree sequence.\n    :param sample_index_map: Mapping from node id to sample id\n    :param row_trees: Trees contained in the rows of the output matrix (repeats ok)\n    :param col_trees: Trees contained in the rows of the output matrix (repeats ok)\n    :param indexes: List of sample set indexes on which to compute statistics. The\n                    arity (and hence the length of each index group) is dictated\n                    by the summary function.\n    :param polarised: If true, skip the computation of the statistic for the\n                      ancestral state.\n    :returns: 3D array of results, dimensions (sample_sets, row_sites, col_sites).\n    \"\"\"\n    params = {\"sample_set_sizes\": sample_set_sizes, \"set_indexes\": indexes}\n    result = np.zeros((result_dim, len(row_trees), len(col_trees)), dtype=np.float64)\n    row_repeats = get_index_repeats(row_trees)\n    col_repeats = get_index_repeats(col_trees)\n\n    stat = np.zeros(result_dim, dtype=np.float64)\n    # State is initialized at tree -1\n    l_state = TreeState(ts, sample_sets, state_dim, sample_index_map)\n    r_state = TreeState(ts, sample_sets, state_dim, sample_index_map)\n\n    # Even if we're skipping trees, we must iterate over the range to keep the\n    # running total of the statistic consistent.\n    row = 0\n    for r in range(row_trees[-1] + 1 - row_trees[0]):\n        # zero out stat and r_state at the beginning of each row\n        stat = np.zeros_like(stat)\n        r_state = TreeState(ts, sample_sets, state_dim, sample_index_map)\n        l_state.advance(r + row_trees[0])\n        # use null TreeState to advance l_state, conveniently we just zerod r_state\n        _, l_state = compute_branch_stat(\n            ts, func, stat, params, state_dim, result_dim, r_state, l_state\n        )\n        col = 0\n        for c in range(col_trees[-1] + 1 - col_trees[0]):\n            r_state.advance(c + col_trees[0])\n            stat, r_state = compute_branch_stat(\n                ts, func, stat, params, state_dim, result_dim, l_state, r_state\n            )\n            # Fill in repeated values for all sample sets\n            for i in range(row_repeats[r]):\n                for j in range(col_repeats[c]):\n                    result[:, i + row, j + col] = stat\n            col += col_repeats[c]\n        row += row_repeats[r]\n    return result\n\n\ndef sample_sets_to_bit_array(\n    ts: tskit.TreeSequence, sample_sets: list[list[int]] | list[np.ndarray]\n) -> tuple[np.ndarray, np.ndarray, BitSet]:\n    \"\"\"Convert the list of sample ids to a bit array. This function takes\n    sample identifiers and maps them to their enumerated integer values, then\n    stores these values in a bit array. We produce a BitArray and a numpy\n    array of integers that specify how many samples there are in each sample set.\n\n    NB: this function's type signature is of type integer, but I believe this\n        could be expanded to Any, currently untested so the integer\n        specification remains.\n\n    :param ts: Tree sequence to gather data from.\n    :param sample_sets: List of sample identifiers to store in bit array.\n    :returns: Tuple containing numpy array of sample set sizes and the sample\n              set BitSet.\n    \"\"\"\n    sample_sets_bits = BitSet(ts.num_samples, len(sample_sets))\n    sample_index_map = -np.ones(ts.num_nodes, dtype=np.int32)\n    sample_set_sizes = np.zeros(len(sample_sets), dtype=np.uint64)\n\n    sample_count = 0\n    for node in ts.nodes():\n        if node.flags & tskit.NODE_IS_SAMPLE:\n            sample_index_map[node.id] = sample_count\n            sample_count += 1\n\n    for k, sample_set in enumerate(sample_sets):\n        sample_set_sizes[k] = len(sample_set)\n        for sample in sample_set:\n            sample_index = sample_index_map[sample]\n            if sample_index == tskit.NULL:\n                raise ValueError(f\"Sample out of bounds: {sample}\")\n            if sample_sets_bits.contains(k, sample_index):\n                raise ValueError(f\"Duplicate sample detected: {sample}\")\n            sample_sets_bits.add(k, sample_index)\n\n    return sample_index_map, sample_set_sizes, sample_sets_bits\n\n\ndef positions_to_tree_indices(bp, positions):\n    \"\"\"Given a set of breakpoints and positions, provide an array of tree\n    indices that correspond with positions. We have already validated that the\n    bounds of the positions are correct and that they are sorted, and\n    deduplicated.\n\n    :param bp: Breakpoints of the tree sequence\n    :param positions: Positions to search over\n    :returns: Array of tree indices\n    \"\"\"\n    tree_idx = 0\n    tree_indices = -np.ones_like(positions, dtype=np.int32)\n\n    for i in range(len(positions)):\n        while bp[tree_idx + 1] <= positions[i]:\n            tree_idx += 1\n        tree_indices[i] = tree_idx\n\n    return tree_indices\n\n\ndef two_locus_count_stat(\n    ts,\n    summary_func,\n    norm_func,\n    polarised,\n    mode,\n    sites=None,\n    positions=None,\n    sample_sets=None,\n    indexes=None,\n):\n    \"\"\"Outer wrapper for two site general stat functionality. Perform some input\n    validation, get the site index and allele state, then compute the LD matrix.\n\n    :param ts: Tree sequence to gather data from.\n    :param summary_func: Function used to compute each two-locus statistic.\n    :param norm_func: Function used to generate the normalization coefficients\n                      for each statistic.\n    :param polarised: If true, skip the computation of the statistic for the\n                      ancestral state.\n    :param mode: Whether or not to compute \"site\" or \"branch\" statistics.\n    :param sites: List of two lists containing [row_sites, column_sites].\n    :param positions: List of two lists containing [row_positions, col_positions],\n                      which are genomic positions to compute LD on.\n    :param sample_sets: List of lists of samples to compute stats for. We will\n                        only consider these samples in our computations,\n                        resulting in stats that are computed on subsets of the\n                        samples on the tree sequence.\n    :param indexes: List of sample set indexes on which to compute statistics. The\n                    arity (and hence the length of each index group) is dictated\n                    by the summary function.\n    :returns: 3d numpy array containing LD for (sample_set,row_site,column_site)\n              unless one or no sample sets are specified, then 2d array\n              containing LD for (row_site,column_site).\n    \"\"\"\n    if sample_sets is None:\n        sample_sets = ts.samples()\n\n    drop_dim = False\n    if indexes is None:\n        try:\n            sample_sets = np.array(sample_sets, dtype=np.uint64)\n        except ValueError:\n            pass\n        else:\n            if sample_sets.ndim == 1:\n                sample_sets = [sample_sets]\n                drop_dim = True\n        result_dim = len(sample_sets)\n    else:\n        indexes = util.safe_np_int_cast(indexes, np.int32)\n        if len(indexes.shape) == 1:\n            indexes = indexes.reshape((1, indexes.shape[0]))\n            drop_dim = True\n        if len(indexes.shape) != 2 or indexes.shape[1] != 2:\n            raise ValueError(\n                \"Indexes must be convertable to a 2D numpy array with 2 columns\"\n            )\n        result_dim = len(indexes)\n\n    sample_index_map, ss_sizes, ss_bits = sample_sets_to_bit_array(ts, sample_sets)\n    num_sample_sets = len(ss_sizes)\n    # If indexes are specified, we are using two-way statistics\n    if indexes is not None:\n        indexes = tskit.util.safe_np_int_cast(indexes, np.int32)\n        idx_lens = {len(i) for i in indexes}\n        if idx_lens != {2}:\n            raise ValueError(f\"Sample set indexes must be length 2, lengths: {idx_lens}\")\n        check_sample_stat_inputs(num_sample_sets, 2, result_dim, indexes)\n    if mode == \"site\":\n        if positions is not None:\n            raise ValueError(\"Cannot specify positions in site mode\")\n        if sites is None:\n            row_sites = np.arange(ts.num_sites, dtype=np.int32)\n            col_sites = np.arange(ts.num_sites, dtype=np.int32)\n        elif len(sites) == 2:\n            row_sites = np.asarray(sites[0])\n            col_sites = np.asarray(sites[1])\n        elif len(sites) == 1:\n            row_sites = np.asarray(sites[0])\n            col_sites = row_sites\n        else:\n            raise ValueError(\n                f\"Sites must be a length 1 or 2 list, got a length {len(sites)} list\"\n            )\n        check_order_bounds_dups(row_sites, ts.num_sites)\n        check_order_bounds_dups(col_sites, ts.num_sites)\n        result = two_site_count_stat(\n            ts,\n            summary_func,\n            norm_func,\n            result_dim,\n            num_sample_sets,\n            ss_sizes,\n            ss_bits,\n            sample_index_map,\n            row_sites,\n            col_sites,\n            indexes,\n            polarised,\n        )\n    elif mode == \"branch\":\n        if sites is not None:\n            raise ValueError(\"Cannot specify sites in branch mode\")\n        if positions is None:\n            row_trees = np.arange(ts.num_trees, dtype=np.int32)\n            col_trees = np.arange(ts.num_trees, dtype=np.int32)\n        elif len(positions) == 2:\n            breakpoints = ts.breakpoints(as_array=True)\n            row_positions = np.asarray(positions[0])\n            col_positions = np.asarray(positions[1])\n            check_order_bounds_dups(row_positions, breakpoints[-1])\n            check_order_bounds_dups(col_positions, breakpoints[-1])\n            row_trees = positions_to_tree_indices(breakpoints, row_positions)\n            col_trees = positions_to_tree_indices(breakpoints, col_positions)\n        elif len(positions) == 1:\n            breakpoints = ts.breakpoints(as_array=True)\n            row_positions = np.asarray(positions[0])\n            col_positions = row_positions\n            check_order_bounds_dups(row_positions, breakpoints[-1])\n            row_trees = positions_to_tree_indices(breakpoints, row_positions)\n            col_trees = row_trees\n        else:\n            raise ValueError(\n                \"Positions must be a length 1 or 2 list, \"\n                f\"got a length {len(positions)} list\"\n            )\n        result = two_branch_count_stat(\n            ts,\n            summary_func,\n            None,\n            num_sample_sets,\n            result_dim,\n            ss_sizes,\n            ss_bits,\n            sample_index_map,\n            row_trees,\n            col_trees,\n            indexes,\n            False,\n        )\n    else:\n        raise ValueError(f\"Unknown mode: {mode}\")\n\n    # If there is one result dimension, return a 2d array\n    if drop_dim is True:\n        return result.reshape(result.shape[1:3])\n    return result\n\n\ndef r2_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    \"\"\"Summary function for the r2 statistic. We first compute the proportion of\n    AB, A, and B haplotypes, then we compute the r2 statistic, storing the outputs\n    in the result vector, one entry per sample set.\n\n    :param state_dim: Number of sample sets.\n    :param state: Counts of 3 haplotype configurations for each sample set.\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param result: Vector of length state_dim to store the results in.\n    :param params: Parameters for the summary function.\n    \"\"\"\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / n\n        p_Ab = state[1, k] / n\n        p_aB = state[2, k] / n\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        D = p_AB - (p_A * p_B)\n        denom = p_A * p_B * (1 - p_A) * (1 - p_B)\n\n        with suppress_overflow_div0_warning():\n            result[k] = (D * D) / denom\n\n\ndef r2_ij_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    set_indexes = params[\"set_indexes\"]\n    for k in range(result_dim):\n        i = set_indexes[k][0]\n        j = set_indexes[k][1]\n        n = sample_set_sizes[i]\n        pAB = state[0, i] / n\n        pAb = state[1, i] / n\n        paB = state[2, i] / n\n        pA = pAB + pAb\n        pB = pAB + paB\n        D_i = pAB - pA * pB\n        denom_i = np.sqrt(pA * (1 - pA) * pB * (1 - pB))\n\n        n = sample_set_sizes[j]\n        pAB = state[0, j] / n\n        pAb = state[1, j] / n\n        paB = state[2, j] / n\n        pA = pAB + pAb\n        pB = pAB + paB\n        D_j = pAB - pA * pB\n        denom_j = np.sqrt(pA * (1 - pA) * pB * (1 - pB))\n\n        with suppress_overflow_div0_warning():\n            result[k] = (D_i * D_j) / (denom_i * denom_j)\n\n\ndef D_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / float(n)\n        p_Ab = state[1, k] / float(n)\n        p_aB = state[2, k] / float(n)\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        result[k] = p_AB - (p_A * p_B)\n\n\ndef D2_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / float(n)\n        p_Ab = state[1, k] / float(n)\n        p_aB = state[2, k] / float(n)\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        result[k] = p_AB - (p_A * p_B)\n        result[k] = result[k] * result[k]\n\n\ndef D_prime_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / float(n)\n        p_Ab = state[1, k] / float(n)\n        p_aB = state[2, k] / float(n)\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        D = p_AB - (p_A * p_B)\n        with suppress_overflow_div0_warning():\n            if D >= 0:\n                result[k] = D / min(p_A * (1 - p_B), (1 - p_A) * p_B)\n            else:\n                result[k] = D / min(p_A * p_B, (1 - p_A) * (1 - p_B))\n\n\ndef r_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / n\n        p_Ab = state[1, k] / n\n        p_aB = state[2, k] / n\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        D = p_AB - (p_A * p_B)\n        denom = p_A * p_B * (1 - p_A) * (1 - p_B)\n\n        with suppress_overflow_div0_warning():\n            result[k] = D / np.sqrt(denom)\n\n\ndef Dz_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / n\n        p_Ab = state[1, k] / n\n        p_aB = state[2, k] / n\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        D = p_AB - (p_A * p_B)\n\n        result[k] = D * (1 - 2 * p_A) * (1 - 2 * p_B)\n\n\ndef pi2_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n) -> None:\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        p_AB = state[0, k] / n\n        p_Ab = state[1, k] / n\n        p_aB = state[2, k] / n\n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n\n        result[k] = p_A * (1 - p_A) * p_B * (1 - p_B)\n\n\n# Unbiased estimators of pi2, dz, and d2. These are derived in Ragsdale 2019\n# (https://doi.org/10.1093/molbev/msz265) and can be used in place of the method\n# outlined by McVean 2002. The reason for using haplotype counts in the branch\n# methods is that we can compute statistics that cannot be represented by tMRCA\n# covariance. With these unbiased estimators, we still reproduce the values\n# estimated with tMRCA covariance.\n\n# TODO: update these summary functions to have the same function signature as\n#       the summary functions defined above.\n\n\ndef pi2_unbiased_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n):\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        w_AB = state[0, k]\n        w_Ab = state[1, k]\n        w_aB = state[2, k]\n        w_ab = n - (w_AB + w_Ab + w_aB)\n        with suppress_overflow_div0_warning():\n            result[k] = (1 / (n * (n - 1) * (n - 2) * (n - 3))) * (\n                ((w_AB + w_Ab) * (w_aB + w_ab) * (w_AB + w_aB) * (w_Ab + w_ab))\n                - ((w_AB * w_ab) * (w_AB + w_ab + (3 * w_Ab) + (3 * w_aB) - 1))\n                - ((w_Ab * w_aB) * (w_Ab + w_aB + (3 * w_AB) + (3 * w_ab) - 1))\n            )\n\n\ndef Dz_unbiased_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n):\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        w_AB = state[0, k]\n        w_Ab = state[1, k]\n        w_aB = state[2, k]\n        w_ab = n - (w_AB + w_Ab + w_aB)\n        with suppress_overflow_div0_warning():\n            result[k] = (1 / (n * (n - 1) * (n - 2) * (n - 3))) * (\n                (\n                    ((w_AB * w_ab) - (w_Ab * w_aB))\n                    * (w_aB + w_ab - w_AB - w_Ab)\n                    * (w_Ab + w_ab - w_AB - w_aB)\n                )\n                - ((w_AB * w_ab) * (w_AB + w_ab - w_Ab - w_aB - 2))\n                - ((w_Ab * w_aB) * (w_Ab + w_aB - w_AB - w_ab - 2))\n            )\n\n\ndef D2_unbiased_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n):\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    for k in range(state_dim):\n        n = sample_set_sizes[k]\n        w_AB = state[0, k]\n        w_Ab = state[1, k]\n        w_aB = state[2, k]\n        w_ab = n - (w_AB + w_Ab + w_aB)\n        with suppress_overflow_div0_warning():\n            result[k] = (1 / (n * (n - 1) * (n - 2) * (n - 3))) * (\n                ((w_aB**2) * (w_Ab - 1) * w_Ab)\n                + ((w_ab - 1) * w_ab * (w_AB - 1) * w_AB)\n                - (w_aB * w_Ab * (w_Ab + (2 * w_ab * w_AB) - 1))\n            )\n\n\ndef D2_ij_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n):\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    set_indexes = params[\"set_indexes\"]\n    for k in range(result_dim):\n        i = set_indexes[k][0]\n        j = set_indexes[k][1]\n\n        n = sample_set_sizes[i]\n        p_AB = state[0, i] / n\n        p_Ab = state[1, i] / n\n        p_aB = state[2, i] / n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n        D_i = p_AB - (p_A * p_B)\n\n        n = sample_set_sizes[j]\n        p_AB = state[0, j] / n\n        p_Ab = state[1, j] / n\n        p_aB = state[2, j] / n\n        p_A = p_AB + p_Ab\n        p_B = p_AB + p_aB\n        D_j = p_AB - (p_A * p_B)\n\n        result[k] = D_i * D_j\n\n\ndef D2_ij_unbiased_summary_func(\n    state_dim: int,\n    state: np.ndarray,\n    result_dim: int,\n    result: np.ndarray,\n    params: dict[str, Any],\n):\n    sample_set_sizes = params[\"sample_set_sizes\"]\n    set_indexes = params[\"set_indexes\"]\n\n    for k in range(result_dim):\n        i = set_indexes[k][0]\n        j = set_indexes[k][1]\n        # We require disjoint sample sets because we test equality here\n        if i == j:\n            n = sample_set_sizes[i]\n            w_AB = state[0, i]\n            w_Ab = state[1, i]\n            w_aB = state[2, i]\n            w_ab = n - (w_AB + w_Ab + w_aB)\n            with suppress_overflow_div0_warning():\n                result[k] = (\n                    (\n                        w_AB * (w_AB - 1) * w_ab * (w_ab - 1)\n                        + w_Ab * (w_Ab - 1) * w_aB * (w_aB - 1)\n                        - 2 * w_AB * w_Ab * w_aB * w_ab\n                    )\n                    / n\n                    / (n - 1)\n                    / (n - 2)\n                    / (n - 3)\n                )\n        else:\n            n_i = sample_set_sizes[i]\n            w_AB_i = state[0, i]\n            w_Ab_i = state[1, i]\n            w_aB_i = state[2, i]\n            w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i)\n\n            n_j = sample_set_sizes[j]\n            w_AB_j = state[0, j]\n            w_Ab_j = state[1, j]\n            w_aB_j = state[2, j]\n            w_ab_j = n_j - (w_AB_j + w_Ab_j + w_aB_j)\n\n            with suppress_overflow_div0_warning():\n                result[k] = (\n                    (w_Ab_i * w_aB_i - w_AB_i * w_ab_i)\n                    * (w_Ab_j * w_aB_j - w_AB_j * w_ab_j)\n                    / n_i\n                    / (n_i - 1)\n                    / n_j\n                    / (n_j - 1)\n                )\n\n\nSUMMARY_FUNCS = {\n    \"r\": r_summary_func,\n    \"r2\": r2_summary_func,\n    \"D\": D_summary_func,\n    \"D2\": D2_summary_func,\n    \"D_prime\": D_prime_summary_func,\n    \"pi2\": pi2_summary_func,\n    \"Dz\": Dz_summary_func,\n    \"D2_unbiased\": D2_unbiased_summary_func,\n    \"Dz_unbiased\": Dz_unbiased_summary_func,\n    \"pi2_unbiased\": pi2_unbiased_summary_func,\n}\n\nTWO_WAY_SUMMARY_FUNCS = {\n    \"r2\": r2_ij_summary_func,\n    \"D2\": D2_ij_summary_func,\n    \"D2_unbiased\": D2_ij_unbiased_summary_func,\n}\n\nNORM_METHOD = {\n    D_summary_func: norm_total_weighted,\n    D_prime_summary_func: norm_total_weighted,\n    D2_summary_func: norm_total_weighted,\n    Dz_summary_func: norm_total_weighted,\n    pi2_summary_func: norm_total_weighted,\n    r_summary_func: norm_total_weighted,\n    r2_summary_func: norm_hap_weighted,\n    D2_unbiased_summary_func: norm_total_weighted,\n    Dz_unbiased_summary_func: norm_total_weighted,\n    pi2_unbiased_summary_func: norm_total_weighted,\n    r2_ij_summary_func: norm_hap_weighted_ij,\n    D2_ij_summary_func: norm_total_weighted,\n    D2_ij_unbiased_summary_func: norm_total_weighted,\n}\n\nPOLARIZATION = {\n    D_summary_func: True,\n    D_prime_summary_func: True,\n    D2_summary_func: False,\n    Dz_summary_func: False,\n    pi2_summary_func: False,\n    r_summary_func: True,\n    r2_summary_func: False,\n    D2_unbiased_summary_func: False,\n    Dz_unbiased_summary_func: False,\n    pi2_unbiased_summary_func: False,\n    r2_ij_summary_func: None,\n    D2_ij_summary_func: None,\n    D2_ij_unbiased_summary_func: None,\n}\n\n\ndef check_set_indexes(\n    num_sets: int, num_set_indexes: int, tuple_size: int, set_indexes: np.ndarray\n):\n    for i in range(num_set_indexes):\n        for j in range(tuple_size):\n            if set_indexes[i, j] < 0 or set_indexes[i, j] >= num_sets:\n                raise ValueError(f\"Bad sample set index: {set_indexes[i, j]}\")\n\n\ndef check_sample_stat_inputs(\n    num_sample_sets: int,\n    tuple_size: int,\n    num_index_tuples: int,\n    index_tuples: np.ndarray,\n):\n    if num_sample_sets < tuple_size:\n        raise ValueError(\n            \"Insufficient number of sample sets: \"\n            f\"num_sample_sets: {num_sample_sets} tuple_size: {tuple_size}\"\n        )\n    if num_index_tuples < 1:\n        raise ValueError(f\"Insufficient number of index tuples: {num_index_tuples}\")\n    check_set_indexes(num_sample_sets, num_index_tuples, tuple_size, index_tuples)\n\n\ndef ld_matrix(\n    ts,\n    sample_sets=None,\n    sites=None,\n    positions=None,\n    stat=\"r2\",\n    indexes=None,\n    mode=\"site\",\n):\n    if indexes is not None:\n        summary_func = TWO_WAY_SUMMARY_FUNCS[stat]\n    else:\n        summary_func = SUMMARY_FUNCS[stat]\n    return two_locus_count_stat(\n        ts,\n        summary_func,\n        NORM_METHOD[summary_func],\n        POLARIZATION[summary_func],\n        mode,\n        sites=sites,\n        positions=positions,\n        indexes=indexes,\n        sample_sets=sample_sets,\n    )\n\n\ndef get_paper_ex_ts():\n    \"\"\"Generate the tree sequence example from the tskit paper\n\n    Data taken from the tests:\n    https://github.com/tskit-dev/tskit/blob/61a844a/c/tests/testlib.c#L55-L96\n\n    :returns: Tree sequence\n    \"\"\"\n    nodes = \"\"\"\\\n    is_sample time population individual\n    1  0       -1   0\n    1  0       -1   0\n    1  0       -1   1\n    1  0       -1   1\n    0  0.071   -1   -1\n    0  0.090   -1   -1\n    0  0.170   -1   -1\n    0  0.202   -1   -1\n    0  0.253   -1   -1\n    \"\"\"\n\n    edges = \"\"\"\\\n    left   right   parent  child\n    2 10 4 2\n    2 10 4 3\n    0 10 5 1\n    0 2  5 3\n    2 10 5 4\n    0 7  6 0,5\n    7 10 7 0,5\n    0 2  8 2,6\n    \"\"\"\n\n    sites = \"\"\"\\\n    position ancestral_state\n    1      0\n    4.5    0\n    8.5    0\n    \"\"\"\n\n    mutations = \"\"\"\\\n    site node derived_state\n    0      2   1\n    1      0   1\n    2      5   1\n    \"\"\"\n\n    individuals = \"\"\"\\\n    flags  location   parents\n    0      0.2,1.5    -1,-1\n    0      0.0,0.0    -1,-1\n    \"\"\"\n\n    return tskit.load_text(\n        nodes=io.StringIO(nodes),\n        edges=io.StringIO(edges),\n        sites=io.StringIO(sites),\n        individuals=io.StringIO(individuals),\n        mutations=io.StringIO(mutations),\n        strict=False,\n    )\n\n\n# fmt:off\n# true r2 values for the tree sequence from the tskit paper\nPAPER_EX_TRUTH_MATRIX = np.array(\n    [[1.0,        0.11111111, 0.11111111],  # noqa: E241\n     [0.11111111, 1.0,        1.0],  # noqa: E241\n     [0.11111111, 1.0,        1.0]]  # noqa: E241\n)\nPAPER_EX_BRANCH_TRUTH_MATRIX = np.array(\n    [[ 1.06666667e-03, -1.26666667e-04, -1.26666667e-04],  # noqa: E241,E201\n     [-1.26666667e-04,  6.01666667e-05,  6.01666667e-05],  # noqa: E241\n     [-1.26666667e-04,  6.01666667e-05,  6.01666667e-05]]  # noqa: E241\n)\n# fmt:on\n\n\ndef get_matrix_partitions(n):\n    \"\"\"Generate all partitions for square matricies, then combine with replacement\n    and return all possible pairs of all partitions.\n\n    TODO: only works for square matricies, would need to generate two lists of\n    partitions to get around this\n\n    :param n: length of one dimension of the !square! matrix.\n    :returns: combinations of partitions.\n    \"\"\"\n    parts = []\n    for part in tskit.combinatorics.rule_asc(n):\n        for g in set(permutations(part, len(part))):\n            p = []\n            i = iter(range(n))\n            for item in g:\n                p.append([next(i) for _ in range(item)])\n            parts.append(p)\n    combos = []\n    for a, b in combinations_with_replacement({tuple(j) for i in parts for j in i}, 2):\n        combos.append((a, b))\n        combos.append((b, a))\n    combos = [[list(a), list(b)] for a, b in set(combos)]\n    return combos\n\n\n# Generate all partitions of the LD matrix, then pass into test_subset\n@pytest.mark.parametrize(\"partition\", get_matrix_partitions(len(PAPER_EX_TRUTH_MATRIX)))\ndef test_subset_sites(partition):\n    \"\"\"Given a partition of the truth matrix, check that we can successfully\n    compute the LD matrix for that given partition, effectively ensuring that\n    our handling of site subsets is correct.\n\n    :param partition: length 2 list of [row_sites, column_sites].\n    \"\"\"\n    a, b = partition\n    ts = get_paper_ex_ts()\n    np.testing.assert_allclose(\n        ld_matrix(ts, sites=partition),\n        PAPER_EX_TRUTH_MATRIX[a[0] : a[-1] + 1, b[0] : b[-1] + 1],\n    )\n    np.testing.assert_equal(\n        ld_matrix(ts, sites=partition), ts.ld_matrix(sites=partition)\n    )\n\n\n@pytest.mark.parametrize(\n    \"partition\", get_matrix_partitions(len(PAPER_EX_BRANCH_TRUTH_MATRIX))\n)\ndef test_subset_positions(partition):\n    \"\"\"Given a partition of the truth matrix, check that we can successfully\n    compute the LD matrix for that given partition, effectively ensuring that\n    our handling of positions is correct. We use the midpoint inside of the\n    tree interval as the position for a particular tree.\n\n    :param partition: length 2 list of [row_positions, column_positions].\n    \"\"\"\n    a, b = partition\n    ts = get_paper_ex_ts()\n    bp = ts.breakpoints(as_array=True)\n    mid = (bp[1:] + bp[:-1]) / 2\n    np.testing.assert_allclose(\n        ld_matrix(ts, mode=\"branch\", stat=\"D2_unbiased\", positions=[mid[a], mid[b]]),\n        PAPER_EX_BRANCH_TRUTH_MATRIX[a[0] : a[-1] + 1, b[0] : b[-1] + 1],\n    )\n    np.testing.assert_allclose(\n        ts.ld_matrix(mode=\"branch\", stat=\"D2_unbiased\", positions=[mid[a], mid[b]]),\n        PAPER_EX_BRANCH_TRUTH_MATRIX[a[0] : a[-1] + 1, b[0] : b[-1] + 1],\n    )\n\n\n@pytest.mark.parametrize(\n    \"positions,truth\",\n    [\n        ([0, 1, 2, 3, 4, 5, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8]),\n        ([0], [0]),\n        ([8], [8]),\n        ([1], [1]),\n        ([1, 2, 3], [1, 2, 3]),\n        ([], []),\n    ],\n)\ndef test_positions_to_tree_indices(positions, truth):\n    breakpoints = np.arange(10, dtype=np.float64)\n    np.testing.assert_equal(positions_to_tree_indices(breakpoints, positions), truth)\n\n\ndef test_bad_positions():\n    with pytest.raises(IndexError, match=\"out of bounds\"):\n        breakpoints = np.arange(10, dtype=np.float64)\n        positions_to_tree_indices(breakpoints, breakpoints)\n\n\n@pytest.mark.parametrize(\"sites\", [[0, 1, 2], [1, 2], [0, 1], [0], [1]])\ndef test_subset_sites_one_list(sites):\n    \"\"\"Test the case where we only pass only one list of sites to compute. This\n    should return a square matrix comparing the sites to themselves.\n    \"\"\"\n    ts = get_paper_ex_ts()\n    np.testing.assert_equal(ld_matrix(ts, sites=[sites]), ts.ld_matrix(sites=[sites]))\n\n\n@pytest.mark.parametrize(\"tree_index\", [[0, 1, 2], [1, 2], [0, 1], [0], [1]])\ndef test_subset_positions_one_list(tree_index):\n    \"\"\"Test the case where we only pass only one list of positions to compute. This\n    should return a square matrix comparing the positions to themselves.\n    \"\"\"\n    ts = get_paper_ex_ts()\n    bp = ts.breakpoints(as_array=True)\n    mid = (bp[1:] + bp[:-1]) / 2\n    np.testing.assert_allclose(\n        ld_matrix(ts, mode=\"branch\", stat=\"D2_unbiased\", positions=[mid[tree_index]]),\n        PAPER_EX_BRANCH_TRUTH_MATRIX[\n            tree_index[0] : tree_index[-1] + 1, tree_index[0] : tree_index[-1] + 1\n        ],\n    )\n    np.testing.assert_allclose(\n        ts.ld_matrix(mode=\"branch\", stat=\"D2_unbiased\", positions=[mid[tree_index]]),\n        PAPER_EX_BRANCH_TRUTH_MATRIX[\n            tree_index[0] : tree_index[-1] + 1, tree_index[0] : tree_index[-1] + 1\n        ],\n    )\n\n\n@pytest.mark.parametrize(\n    \"tree_index\",\n    [\n        ([0, 0, 1, 2], [1, 2]),\n        ([0, 0, 0, 2], [0, 2]),\n        ([1, 1, 1], [1]),\n        ([2, 2], [1]),\n        ([0, 2, 2, 2], [0, 0, 0]),\n        ([0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]),\n    ],\n)\ndef test_repeated_position_elements(tree_index):\n    \"\"\"Test that we repeat positions in the LD matrix when we have multiple positions\n    that overlap the same tree when specifying positions in branch mode.\n    \"\"\"\n    ts = get_paper_ex_ts()\n    l, r = tree_index\n    bp = ts.breakpoints(as_array=True)\n    val, count = np.unique(l, return_counts=True)\n    l_pos = np.hstack(\n        [np.linspace(bp[v], bp[v + 1], count[i] + 2)[1:-1] for i, v in enumerate(val)]\n    )\n    val, count = np.unique(r, return_counts=True)\n    r_pos = np.hstack(\n        [np.linspace(bp[v], bp[v + 1], count[i] + 2)[1:-1] for i, v in enumerate(val)]\n    )\n    assert (positions_to_tree_indices(bp, l_pos) == l).all()\n    assert (positions_to_tree_indices(bp, r_pos) == r).all()\n\n    truth = PAPER_EX_BRANCH_TRUTH_MATRIX[\n        [i for i, _ in product(l, r)], [i for _, i in product(l, r)]\n    ].reshape(len(l), len(r))\n\n    np.testing.assert_allclose(\n        truth,\n        ld_matrix(ts, mode=\"branch\", stat=\"D2_unbiased\", positions=[l_pos, r_pos]),\n    )\n    np.testing.assert_allclose(\n        truth,\n        ts.ld_matrix(mode=\"branch\", stat=\"D2_unbiased\", positions=[l_pos, r_pos]),\n    )\n\n\n# Generate all partitions of the samples, producing pairs of sample sets\n@pytest.mark.parametrize(\n    \"partition\", get_matrix_partitions(get_paper_ex_ts().num_samples)\n)\ndef test_sample_sets(partition):\n    \"\"\"Test all partitions of sample sets, ensuring that we are correctly\n    computing stats for various subsets of the samples in a given tree.\n\n    :param partition: length 2 list of [ss_1, ss_2].\n    \"\"\"\n    ts = get_paper_ex_ts()\n    np.testing.assert_allclose(\n        ld_matrix(ts, sample_sets=partition), ts.ld_matrix(sample_sets=partition)\n    )\n\n\ndef test_compare_to_ld_calculator():\n    ts = msprime.sim_ancestry(\n        samples=4, recombination_rate=0.2, sequence_length=10, random_seed=1\n    )\n    ts = msprime.sim_mutations(ts, rate=0.5, random_seed=1, discrete_genome=False)\n    ld_calc = tskit.LdCalculator(ts)\n    np.testing.assert_array_almost_equal(ld_calc.get_r2_matrix(), ts.ld_matrix())\n\n\n@pytest.mark.parametrize(\n    \"stat\",\n    sorted(SUMMARY_FUNCS.keys()),\n)\ndef test_multiallelic_with_back_mutation(stat):\n    ts = msprime.sim_ancestry(\n        samples=4, recombination_rate=0.2, sequence_length=10, random_seed=1\n    )\n    ts = msprime.sim_mutations(ts, rate=0.5, random_seed=1)\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat), ts.ld_matrix(stat=stat)\n    )\n\n\n@pytest.mark.slow\n@pytest.mark.parametrize(\n    \"ts\",\n    [\n        ts\n        for ts in get_example_tree_sequences()\n        if ts.id not in {\"no_samples\", \"empty_ts\"}\n    ],\n)\n# TODO: port unbiased summary functions\n@pytest.mark.parametrize(\n    \"stat\",\n    sorted(SUMMARY_FUNCS.keys()),\n)\ndef test_ld_matrix(ts, stat):\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat), ts.ld_matrix(stat=stat)\n    )\n\n\n@pytest.mark.parametrize(\n    \"ts\",\n    [ts for ts in get_example_tree_sequences() if ts.id in {\"no_samples\", \"empty_ts\"}],\n)\ndef test_ld_empty_examples(ts):\n    with pytest.raises(ValueError, match=\"at least one element\"):\n        ts.ld_matrix()\n    with pytest.raises(ValueError, match=\"at least one element\"):\n        ts.ld_matrix(mode=\"branch\")\n\n\ndef test_input_validation():\n    ts = get_paper_ex_ts()\n    with pytest.raises(ValueError, match=\"Unknown two-locus statistic\"):\n        ts.ld_matrix(stat=\"bad_stat\")\n\n    with pytest.raises(ValueError, match=\"must be a list of\"):\n        ts.ld_matrix(sites=[\"abc\"])\n    with pytest.raises(ValueError, match=\"must be a list of\"):\n        ts.ld_matrix(sites=[1, 2, 3])\n    with pytest.raises(ValueError, match=\"must be a length 1 or 2 list\"):\n        ts.ld_matrix(sites=[[1, 2], [2, 3], [3, 4]])\n    with pytest.raises(ValueError, match=\"must be a length 1 or 2 list\"):\n        ts.ld_matrix(sites=[[1, 2], [2, 3], [3, 4]])\n    with pytest.raises(ValueError, match=\"must be a length 1 or 2 list\"):\n        ts.ld_matrix(sites=[])\n\n    with pytest.raises(ValueError, match=\"must be a list of\"):\n        ts.ld_matrix(positions=[\"abc\"], mode=\"branch\")\n    with pytest.raises(ValueError, match=\"must be a list of\"):\n        ts.ld_matrix(positions=[1.0, 2.0, 3.0], mode=\"branch\")\n    with pytest.raises(ValueError, match=\"must be a length 1 or 2 list\"):\n        ts.ld_matrix(positions=[[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], mode=\"branch\")\n    with pytest.raises(ValueError, match=\"must be a length 1 or 2 list\"):\n        ts.ld_matrix(positions=[], mode=\"branch\")\n\n    with pytest.raises(\n        ValueError, match=\"Sample sets must contain at least one element\"\n    ):\n        ts.ld_matrix(sample_sets=[[1, 2, 3], []], indexes=[])\n    with pytest.raises(\n        ValueError, match=\"Indexes must be convertable to a 2D numpy array\"\n    ):\n        ts.ld_matrix(\n            sample_sets=[ts.samples(), ts.samples()], indexes=[[1, 2, 3], [2, 3, 4]]\n        )\n\n\n@dataclass\nclass TreeState:\n    \"\"\"\n    Class for storing tree state from one iteration to the next. This object\n    enables easy copying of the state for computing a matrix.\n    \"\"\"\n\n    pos: tsutil.TreeIndexes  # current position in the tree sequence\n    parent: np.ndarray  # parent node of a given node (connected by an edge)\n    branch_len: np.ndarray  # length of the branch above a particular child node\n    node_samples: BitSet  # samples that exist under a given node, this is a\n    # bitset with a row for each node and sample set. Rows are grouped by node,\n    # for example:\n    # node sample_set\n    # 0    0\n    # 0    1\n    # 1    0\n    # 1    1\n    edges_out: list[int]  # list of edges removed during iteration\n    edges_in: list[int]  # list of edges added during iteration\n\n    def __init__(self, ts, sample_sets, num_sample_sets, sample_index_map):\n        self.pos = tsutil.TreeIndexes(ts)\n        self.parent = -np.ones(ts.num_nodes, dtype=np.int64)\n        self.branch_len = np.zeros(ts.num_nodes, dtype=np.float64)\n        self.node_samples = BitSet(ts.num_samples, ts.num_nodes * num_sample_sets)\n        # Create a bit array to store all samples under each node for each sample set.\n        # We initialize with the samples under the sample nodes.\n        for n in range(ts.num_nodes):\n            for k in range(num_sample_sets):\n                if sample_sets.contains(k, sample_index_map[n]):\n                    self.node_samples.add((num_sample_sets * n) + k, sample_index_map[n])\n        # these are empty for the uninitialized state (index = -1)\n        self.edges_in = []\n        self.edges_out = []\n\n    def advance(self, index):\n        \"\"\"\n        Advance tree to next tree position. If the tree is still uninitialized,\n        seeks may be performed to an arbitrary position. Since we need to\n        compute stats over contiguous ranges of trees, once we've seeked to a\n        position, we step forward by one tree. Finally, we set `edges_in` and\n        `edges_out` to be consumed by the downstream stats function.\n\n        :param index: Tree index to advance to\n        \"\"\"\n\n        # if initialized or seeking to the first position from the beginning, jump\n        # forward one tree\n        if self.pos.index != tskit.NULL or index == 0:\n            if index != 0:\n                assert index == self.pos.index + 1, \"only one step allowed\"\n            assert self.pos.next(), \"out of bounds\"\n            edges_out = [\n                self.pos.out_range.order[j]\n                for j in range(self.pos.out_range.start, self.pos.out_range.stop)\n            ]\n            edges_in = [\n                self.pos.in_range.order[j]\n                for j in range(self.pos.in_range.start, self.pos.in_range.stop)\n            ]\n            self.edges_out = edges_out\n            self.edges_in = edges_in\n            return\n\n        # if uninitialized (no current position), and seeking to an arbitrary point\n        # in the tree, use seek_forward\n        edges_out, edges_in = [], []\n        self.pos.seek_forward(index)\n        left = self.pos.interval.left\n        # since we're starting from an uninitialized tree, we only add edges\n        for j in range(self.pos.in_range.start, self.pos.in_range.stop):\n            e = self.pos.in_range.order[j]\n            # skip over edges that are not in the current tree\n            if self.pos.ts.edges_left[e] <= left < self.pos.ts.edges_right[e]:\n                edges_in.append(e)\n\n        self.edges_out = edges_out\n        self.edges_in = edges_in\n        return\n\n\ndef compute_branch_stat_update(\n    c,\n    A_state,\n    B_state,\n    state_dim,\n    result_dim,\n    sign,\n    stat_func,\n    num_samples,\n    result,\n    params,\n):\n    \"\"\"Compute an update to the two-locus statistic for a single subset of the\n    tree being modified, relative to all subsets of the fixed tree.\n\n    :param c: Child node of the edge we're modifying\n    :param A_state: State for the tree contributing to the A samples (fixed)\n    :param B_state: State for the tree contributing to the B samples (modified)\n    :param state_dim: Number of sample sets.\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param sign: The sign of the update\n    :param stat_func: Function used to compute the two-locus statistic\n    :param num_samples: Number of samples in the tree sequence\n    :param result: Vector of LD results, length of number of sample sets\n    :param params: Params of summary function.\n    \"\"\"\n    b_len = B_state.branch_len[c] * sign\n    if b_len == 0:\n        return result\n\n    AB_samples = BitSet(num_samples, 1)\n    weights = np.zeros((3, state_dim), dtype=np.int64)\n    result_tmp = np.zeros(result_dim, np.float64)\n\n    for n in np.where(A_state.branch_len > 0)[0]:\n        a_len = A_state.branch_len[n]\n        for k in range(state_dim):\n            row = (state_dim * n) + k\n            c_row = (state_dim * c) + k\n            # Samples under the modified edge and the current fixed tree node are AB\n            A_state.node_samples.intersect(row, B_state.node_samples, c_row, AB_samples)\n\n            w_AB = AB_samples.count(0)\n            w_A = A_state.node_samples.count(row)\n            w_B = B_state.node_samples.count(c_row)\n\n            weights[0, k] = w_AB\n            weights[1, k] = w_A - w_AB  # w_Ab\n            weights[2, k] = w_B - w_AB  # w_aB\n\n        stat_func(state_dim, weights, result_dim, result_tmp, params)\n        for k in range(result_dim):\n            result[k] += result_tmp[k] * a_len * b_len\n\n\ndef compute_branch_stat(\n    ts: tskit.TreeSequence,\n    stat_func,\n    stat,\n    params,\n    state_dim,\n    result_dim,\n    l_state: TreeState,\n    r_state: TreeState,\n):\n    \"\"\"Step between trees in a tree sequence, updating our two-locus statistic\n    as we add or remove edges. Since we're computing statistics for two loci, we\n    have a focal tree that remains constant, and a tree that is updated to\n    represent the tree we're comparing to. The lefthand tree is held constant\n    and the righthand tree is modified. The statistic is updated as we add and\n    remove branches, and when we reach the point where the righthand tree is\n    fully updated, the statistic will have been updated to the two-locus\n    statistic between both trees.\n\n    For instance, if we pass in the l_state for tree 0 and the r_state for tree\n    0, we will update the r_state until r_state contains the information for\n    tree 1. Then, the statistic will represent the LD between tree 1 and tree 2.\n\n    Currenty, iteration happens in the forward direction.\n\n    :param ts: The underlying tree sequence object that we're iterating across.\n    :param stat_func: A function that computes the two locus statistic, given\n                      haplotype counts.\n    :param stat: The two-locus statistic computed between two trees.\n    :param params: Params of summary function.\n    :param state_dim: Number of sample sets.\n    :param result_dim: Number of dimensions in output. Dependent on arity of stat.\n    :param l_state: The lefthand constant state\n    :param r_state: The righthand state to be updated\n    :returns: A tuple containing the statistic between the two trees after\n              branch updates and the righthand tree state.\n    \"\"\"\n    num_samples = ts.num_samples\n    time = ts.tables.nodes.time\n    updates = BitSet(ts.num_nodes, 1)\n\n    # Identify modified nodes\n    for e in r_state.edges_out + r_state.edges_in:\n        p = ts.edges_parent[e]\n        c = ts.edges_child[e]\n        # identify affected nodes above child\n        while p != tskit.NULL:\n            updates.add(0, c)\n            c = p\n            p = r_state.parent[p]\n\n    # Subtract the whole contribution from child node\n    for c in updates.get_items(0):\n        compute_branch_stat_update(\n            c,\n            l_state,\n            r_state,\n            state_dim,\n            result_dim,\n            -1,\n            stat_func,\n            num_samples,\n            stat,\n            params,\n        )\n\n    # Sample Removal\n    for e in r_state.edges_out:\n        p = ts.edges_parent[e]\n        ec = ts.edges_child[e]\n        # update samples under nodes, propagate upwards\n        while p != tskit.NULL:\n            for k in range(state_dim):\n                r_state.node_samples.difference(\n                    state_dim * p + k, r_state.node_samples, state_dim * ec + k\n                )\n            p = r_state.parent[p]\n        # set the parent to prevent upwards iteration\n        r_state.branch_len[ec] = 0\n        r_state.parent[ec] = tskit.NULL\n\n    # Sample Addition\n    for e in r_state.edges_in:\n        p = ts.edges_parent[e]\n        ec = c = ts.edges_child[e]\n        r_state.branch_len[c] = time[p] - time[c]\n        r_state.parent[c] = p\n        # update samples under nodes, store modified node, propagate upwards\n        while p != tskit.NULL:\n            updates.add(0, c)\n            for k in range(state_dim):\n                r_state.node_samples.union(\n                    state_dim * p + k, r_state.node_samples, state_dim * ec + k\n                )\n            c = p\n            p = r_state.parent[p]\n\n    # Update all affected child nodes (fully subtracted, deferred from addition)\n    for c in updates.get_items(0):\n        compute_branch_stat_update(\n            c,\n            l_state,\n            r_state,\n            state_dim,\n            result_dim,\n            +1,\n            stat_func,\n            num_samples,\n            stat,\n            params,\n        )\n\n    return stat, r_state\n\n\n@pytest.mark.parametrize(\n    \"ts\",\n    [\n        ts\n        for ts in get_example_tree_sequences()\n        if ts.id\n        not in {\n            \"no_samples\",\n            \"empty_ts\",\n            # We must skip these cases so that tests run in a reasonable\n            # amount of time. To get more complete testing, these filters\n            # can be commented out. (runtime ~1hr)\n            \"gap_0\",\n            \"gap_0.1\",\n            \"gap_0.5\",\n            \"gap_0.75\",\n            \"n=2_m=32_rho=0\",\n            \"n=10_m=1_rho=0\",\n            \"n=10_m=1_rho=0.1\",\n            \"n=10_m=2_rho=0\",\n            \"n=10_m=2_rho=0.1\",\n            \"n=10_m=32_rho=0\",\n            \"n=10_m=32_rho=0.1\",\n            \"n=10_m=32_rho=0.5\",\n            # we keep one n=100 case to ensure bit arrays are working\n            \"n=100_m=1_rho=0.1\",\n            \"n=100_m=1_rho=0.5\",\n            \"n=100_m=2_rho=0\",\n            \"n=100_m=2_rho=0.1\",\n            \"n=100_m=2_rho=0.5\",\n            \"n=100_m=32_rho=0\",\n            \"n=100_m=32_rho=0.1\",\n            \"n=100_m=32_rho=0.5\",\n            \"all_fields\",\n            \"back_mutations\",\n            \"multichar\",\n            \"multichar_no_metadata\",\n            \"bottleneck_n=100_mutated\",\n        }\n    ],\n)\n@pytest.mark.parametrize(\"stat\", sorted(SUMMARY_FUNCS.keys()))\ndef test_branch_ld_matrix(ts, stat):\n    np.testing.assert_array_almost_equal(\n        ts.ld_matrix(stat=stat, mode=\"branch\"), ld_matrix(ts, stat=stat, mode=\"branch\")\n    )\n\n\ndef get_test_branch_sample_set_test_cases():\n    p_dict = {ps.id: ps for ps in get_example_tree_sequences()}\n    return [\n        pytest.param(\n            p_dict[\"n=100_m=1_rho=0\"].values[0],\n            [[51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67]],\n            id=\"n=100_m=1_rho=0\",\n        ),\n        pytest.param(\n            p_dict[\"all_nodes_samples\"].values[0],\n            [[2, 4, 5, 6]],\n            id=\"all_nodes_samples\",\n        ),\n        pytest.param(\n            p_dict[\"bottleneck_n=10_mutated\"].values[0],\n            [[1, 2, 4, 9]],\n            id=\"bottleneck_n=10_mutated\",\n        ),\n        pytest.param(p_dict[\"gap_at_end\"].values[0], [[1, 3, 5, 8]], id=\"gap_at_end\"),\n    ]\n\n\n@pytest.mark.parametrize(\"ts,sample_set\", get_test_branch_sample_set_test_cases())\n@pytest.mark.parametrize(\"stat\", sorted(SUMMARY_FUNCS.keys()))\ndef test_branch_ld_matrix_sample_sets(ts, sample_set, stat):\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat, mode=\"branch\", sample_sets=sample_set),\n        ts.ld_matrix(stat=stat, mode=\"branch\", sample_sets=sample_set),\n    )\n\n\ndef get_test_branch_2pop_test_cases():\n    p_dict = {ps.id: ps for ps in get_example_tree_sequences()}\n    return [\n        pytest.param(\n            p_dict[\"n=100_m=1_rho=0\"].values[0],\n            [\n                [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67],\n                [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67],\n            ],\n            id=\"n=100_m=1_rho=0\",\n        ),\n        pytest.param(\n            p_dict[\"all_nodes_samples\"].values[0],\n            [[2, 4, 5, 6], [2, 4, 5, 6]],\n            id=\"all_nodes_samples\",\n        ),\n        pytest.param(\n            p_dict[\"bottleneck_n=10_mutated\"].values[0],\n            [[1, 2, 4, 9], [1, 2, 4, 9]],\n            id=\"bottleneck_n=10_mutated\",\n        ),\n        pytest.param(\n            p_dict[\"gap_at_end\"].values[0],\n            [[1, 3, 5, 8], [1, 3, 5, 8]],\n            id=\"gap_at_end\",\n        ),\n    ]\n\n\n@pytest.mark.parametrize(\"ts,sample_set\", get_test_branch_2pop_test_cases())\n@pytest.mark.parametrize(\n    \"stat\", sorted([f for f in TWO_WAY_SUMMARY_FUNCS.keys() if \"unbiased\" not in f])\n)\ndef test_branch_ld_matrix_2pop_sample_sets(ts, sample_set, stat):\n    oneway_result = ts.ld_matrix(stat=stat, mode=\"branch\", sample_sets=sample_set[0])\n    # biased two-way statistics between two identical sample sets are equal to\n    # results from the one-way statistic.\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat, mode=\"branch\", sample_sets=sample_set, indexes=(0, 1)),\n        oneway_result,\n    )\n\n\n@pytest.mark.parametrize(\"ts,sample_set\", get_test_branch_2pop_test_cases())\n@pytest.mark.parametrize(\n    \"stat\", sorted([f for f in TWO_WAY_SUMMARY_FUNCS.keys() if \"unbiased\" in f])\n)\ndef test_branch_ld_matrix_2pop_sample_sets_unbiased(ts, sample_set, stat):\n    oneway_result = ts.ld_matrix(stat=stat, mode=\"branch\", sample_sets=sample_set[0])\n    # If the indexes are the same between two identical sample sets, we recover\n    # the one-way statistic. We do not make any assertions about sample disjointedness\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat, mode=\"branch\", sample_sets=sample_set, indexes=(0, 0)),\n        oneway_result,\n    )\n\n\ndef gen_dims_test_cases(ts, mode):\n    ss = ts.samples()\n    dim = ts.num_sites if mode == \"site\" else ts.num_trees\n    base = (dim, dim)\n    return [\n        {\"name\": f\"{mode}_default\", \"ld_params\": {\"mode\": mode}, \"shape\": base},\n        {\n            \"name\": f\"{mode}_dim_drop\",\n            \"ld_params\": {\"mode\": mode, \"sample_sets\": ss},\n            \"shape\": base,\n        },\n        {\n            \"name\": f\"{mode}_no_dim_drop\",\n            \"ld_params\": {\"mode\": mode, \"sample_sets\": [ss]},\n            \"shape\": (1, *base),\n        },\n        {\n            \"name\": f\"{mode}_two_sample_sets\",\n            \"ld_params\": {\"mode\": mode, \"sample_sets\": [ss, ss]},\n            \"shape\": (2, *base),\n        },\n        {\n            \"name\": f\"{mode}_two_way_dim_drop\",\n            \"ld_params\": {\"mode\": mode, \"sample_sets\": [ss, ss], \"indexes\": (0, 1)},\n            \"shape\": base,\n        },\n        {\n            \"name\": f\"{mode}_two_way_no_dim_drop\",\n            \"ld_params\": {\"mode\": mode, \"sample_sets\": [ss, ss], \"indexes\": [(0, 1)]},\n            \"shape\": (1, *base),\n        },\n        {\n            \"name\": f\"{mode}_two_way_three_set_indexes\",\n            \"ld_params\": {\n                \"mode\": mode,\n                \"sample_sets\": [ss, ss],\n                \"indexes\": [(0, 0), (0, 1), (1, 1)],\n            },\n            \"shape\": (3, *base),\n        },\n    ]\n\n\ndef get_test_dims_test_cases():\n    test_cases = {\n        \"empty_tree\",\n        \"all_nodes_samples\",\n        \"n=3_m=32_rho=0.5\",\n        \"rev_node_order\",\n        \"internal_nodes_samples\",\n        \"mixed_internal_leaf_samples\",\n    }\n    for ts_case in [t for t in get_example_tree_sequences() if t.id in test_cases]:\n        ts = ts_case.values[0]\n        for dim_case in gen_dims_test_cases(ts, \"site\"):\n            name = \"_\".join([dim_case[\"name\"], ts_case.id])\n            yield pytest.param(ts, dim_case[\"ld_params\"], dim_case[\"shape\"], id=name)\n        for dim_case in gen_dims_test_cases(ts, \"branch\"):\n            name = \"_\".join([dim_case[\"name\"], ts_case.id])\n            yield pytest.param(ts, dim_case[\"ld_params\"], dim_case[\"shape\"], id=name)\n\n\n@pytest.mark.parametrize(\"ts,params,shape\", get_test_dims_test_cases())\ndef test_dims(ts, params, shape):\n    assert ts.ld_matrix(**params).shape == ld_matrix(ts, **params).shape == shape\n\n\n@pytest.mark.parametrize(\"ts,sample_sets\", get_test_branch_2pop_test_cases())\n@pytest.mark.parametrize(\"stat\", sorted(TWO_WAY_SUMMARY_FUNCS.keys()))\ndef test_two_way_branch_ld_matrix(ts, sample_sets, stat):\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, sample_sets=sample_sets, indexes=[(0, 0), (0, 1), (1, 1)]),\n        ts.ld_matrix(sample_sets=sample_sets, indexes=[(0, 0), (0, 1), (1, 1)]),\n    )\n\n\n@pytest.mark.slow\n@pytest.mark.parametrize(\n    \"ts\",\n    [\n        ts\n        for ts in get_example_tree_sequences()\n        if ts.id not in {\"no_samples\", \"empty_ts\"}\n    ],\n)\n@pytest.mark.parametrize(\n    \"stat\",\n    sorted(TWO_WAY_SUMMARY_FUNCS.keys()),\n)\ndef test_two_way_site_ld_matrix(ts, stat):\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat), ts.ld_matrix(stat=stat)\n    )\n    ss = [ts.samples()] * 3\n    np.testing.assert_array_almost_equal(\n        ld_matrix(ts, stat=stat, sample_sets=ss, indexes=[(0, 0), (0, 1), (1, 1)]),\n        ts.ld_matrix(stat=stat, sample_sets=ss, indexes=[(0, 0), (0, 1), (1, 1)]),\n    )\n\n\n@pytest.mark.parametrize(\n    \"genotypes,sample_sets,expected\",\n    [\n        (\n            # these genotypes are rows from a genotype matrix (sites x samples)\n            correlated := np.array(\n                [\n                    [0, 1, 1, 0, 2, 2, 1, 0, 2, 0, 1, 2],\n                    [1, 2, 2, 1, 0, 0, 2, 1, 0, 1, 2, 0],\n                ],\n            ),\n            (np.array([0, 1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10, 11])),\n            np.float64(1.0),\n        ),\n        (\n            correlated,\n            (np.array([0, 1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])),\n            np.float64(1.0),\n        ),\n        (\n            correlated,\n            (np.array([0, 1, 2, 3, 4, 5]), np.array([6, 7, 8, 9])),\n            np.float64(1.0),\n        ),\n        (\n            correlated,\n            (np.array([0, 1, 2, 3, 4, 5]), np.array([6, 7, 8])),\n            np.float64(1.0),\n        ),\n        (\n            correlated,\n            (np.array([0, 1, 2, 3, 4, 5]), np.array([6, 7])),\n            np.float64(np.nan),\n        ),\n        (\n            correlated,\n            (np.array([0, 1, 2, 3, 4, 5]), np.array([6])),\n            np.float64(np.nan),\n        ),\n        (\n            anticorrelated := np.array(\n                [\n                    [0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 3],\n                    [1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2],\n                ]\n            ),\n            (\n                np.array([0, 2, 4, 6, 8, 10, 12, 14]),\n                np.array([1, 3, 5, 7, 9, 11, 13, 15]),\n            ),\n            np.float64(1.0),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1, 3, 5, 7, 9, 11, 13])),\n            np.float64(1.0),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1, 3, 5, 7, 9, 11])),\n            np.float64(np.nan),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1, 3, 5, 7, 9])),\n            np.float64(np.nan),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1, 3, 5, 7])),\n            np.float64(np.nan),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1, 3, 5])),\n            np.float64(np.nan),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1, 3])),\n            np.float64(np.nan),\n        ),\n        (\n            anticorrelated,\n            (np.array([0, 2, 4, 6, 8, 10, 12, 14]), np.array([1])),\n            np.float64(np.nan),\n        ),\n    ],\n)\ndef test_multipopulation_r2_varying_unequal_set_sizes(genotypes, sample_sets, expected):\n    a, b = genotypes\n    state_dim = len(sample_sets)\n    state = np.zeros((3, state_dim), dtype=int)\n    result = np.zeros((max(a) + 1, max(b) + 1, 1))\n    norm = np.zeros_like(result)\n    params = dict(sample_set_sizes=list(map(len, sample_sets)), set_indexes=[(0, 1)])\n    for i, j in np.ndindex(result.shape[:2]):\n        for k, ss in enumerate(sample_sets):\n            A = a[ss] == i\n            B = b[ss] == j\n            state[:, k] = (A & B).sum(), (A & ~B).sum(), (~A & B).sum()\n        r2_ij_summary_func(state_dim, state, 1, result[i, j], params)\n        norm_hap_weighted_ij(1, state, max(a) + 1, max(b) + 1, norm[i, j], params)\n\n    np.testing.assert_allclose((result * norm).sum(), expected)\n"
  },
  {
    "path": "python/tests/test_metadata.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for metadata handling.\n\"\"\"\n\nimport collections\nimport io\nimport json\nimport os\nimport pickle\nimport pprint\nimport struct\nimport tempfile\nimport unittest\nfrom unittest.mock import patch\n\nimport msgpack\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\nimport tskit.exceptions as exceptions\nimport tskit.metadata as metadata\n\n\nclass TestMetadataRoundTrip(unittest.TestCase):\n    \"\"\"\n    Tests that we can encode metadata under various formats.\n    \"\"\"\n\n    def setUp(self):\n        fd, self.temp_file = tempfile.mkstemp(prefix=\"msp_meta_test_\")\n        os.close(fd)\n\n    def tearDown(self):\n        os.unlink(self.temp_file)\n\n    def test_json(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tables = ts.dump_tables()\n        nodes = tables.nodes\n        # For each node, we create some Python metadata that can be JSON encoded.\n        metadata = [\n            {\"one\": j, \"two\": 2 * j, \"three\": list(range(j))} for j in range(len(nodes))\n        ]\n        encoded, offset = tskit.pack_strings(map(json.dumps, metadata))\n        nodes.set_columns(\n            flags=nodes.flags,\n            time=nodes.time,\n            population=nodes.population,\n            metadata_offset=offset,\n            metadata=encoded,\n        )\n        assert np.array_equal(nodes.metadata_offset, offset)\n        assert np.array_equal(nodes.metadata, encoded)\n        ts1 = tables.tree_sequence()\n        for j, node in enumerate(ts1.nodes()):\n            decoded_metadata = json.loads(node.metadata.decode())\n            assert decoded_metadata == metadata[j]\n        ts1.dump(self.temp_file)\n        ts2 = tskit.load(self.temp_file)\n        assert ts1.tables.nodes == ts2.tables.nodes\n\n    def test_pickle(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tables = ts.dump_tables()\n        # For each node, we create some Python metadata that can be pickled\n        metadata = [\n            {\"one\": j, \"two\": 2 * j, \"three\": list(range(j))}\n            for j in range(ts.num_nodes)\n        ]\n        encoded, offset = tskit.pack_bytes(list(map(pickle.dumps, metadata)))\n        tables.nodes.set_columns(\n            flags=tables.nodes.flags,\n            time=tables.nodes.time,\n            population=tables.nodes.population,\n            metadata_offset=offset,\n            metadata=encoded,\n        )\n        assert np.array_equal(tables.nodes.metadata_offset, offset)\n        assert np.array_equal(tables.nodes.metadata, encoded)\n        ts1 = tables.tree_sequence()\n        for j, node in enumerate(ts1.nodes()):\n            decoded_metadata = pickle.loads(node.metadata)\n            assert decoded_metadata == metadata[j]\n        ts1.dump(self.temp_file)\n        ts2 = tskit.load(self.temp_file)\n        assert ts1.tables.nodes == ts2.tables.nodes\n\n\nclass ExampleMetadata:\n    \"\"\"\n    Simple class that we can pickle/unpickle in metadata.\n    \"\"\"\n\n    def __init__(self, one=None, two=None):\n        self.one = one\n        self.two = two\n\n\nclass TestMetadataPickleDecoding:\n    \"\"\"\n    Tests in which use pickle.pickle to decode metadata in nodes, sites and mutations.\n    \"\"\"\n\n    def test_nodes(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        metadata = ExampleMetadata(one=\"node1\", two=\"node2\")\n        pickled = pickle.dumps(metadata)\n        tables.nodes.add_row(time=0.125, metadata=pickled)\n        ts = tables.tree_sequence()\n        node = ts.node(0)\n        assert node.time == 0.125\n        assert node.metadata == pickled\n        unpickled = pickle.loads(node.metadata)\n        assert unpickled.one == metadata.one\n        assert unpickled.two == metadata.two\n\n    def test_sites(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        metadata = ExampleMetadata(one=\"node1\", two=\"node2\")\n        pickled = pickle.dumps(metadata)\n        tables.sites.add_row(position=0.1, ancestral_state=\"A\", metadata=pickled)\n        ts = tables.tree_sequence()\n        site = ts.site(0)\n        assert site.position == 0.1\n        assert site.ancestral_state == \"A\"\n        assert site.metadata == pickled\n        unpickled = pickle.loads(site.metadata)\n        assert unpickled.one == metadata.one\n        assert unpickled.two == metadata.two\n\n    def test_mutations(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        metadata = ExampleMetadata(one=\"node1\", two=\"node2\")\n        pickled = pickle.dumps(metadata)\n        tables.nodes.add_row(time=0)\n        tables.sites.add_row(position=0.1, ancestral_state=\"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\", metadata=pickled)\n        ts = tables.tree_sequence()\n        mutation = ts.site(0).mutations[0]\n        assert mutation.site == 0\n        assert mutation.node == 0\n        assert mutation.derived_state == \"T\"\n        assert mutation.metadata == pickled\n        unpickled = pickle.loads(mutation.metadata)\n        assert unpickled.one == metadata.one\n        assert unpickled.two == metadata.two\n\n\nclass TestLoadTextMetadata:\n    \"\"\"\n    Tests that use the load_text interface.\n    \"\"\"\n\n    def test_individuals(self):\n        individuals = io.StringIO(\n            \"\"\"\\\n        id  flags location     parents  metadata\n        0   1     0.0,1.0,0.0  -1,-1    abc\n        1   1     1.0,2.0      0,0      XYZ+\n        2   0     2.0,3.0,0.0  0,1      !@#$%^&*()\n        \"\"\"\n        )\n        i = tskit.parse_individuals(\n            individuals, strict=False, encoding=\"utf8\", base64_metadata=False\n        )\n        expected = [\n            (1, [0.0, 1.0, 0.0], [-1, -1], \"abc\"),\n            (1, [1.0, 2.0], [0, 0], \"XYZ+\"),\n            (0, [2.0, 3.0, 0.0], [0, 1], \"!@#$%^&*()\"),\n        ]\n        for a, b in zip(expected, i):\n            assert a[0] == b.flags\n            assert len(a[1]) == len(b.location)\n            for x, y in zip(a[1], b.location):\n                assert x == y\n            assert len(a[2]) == len(b.parents)\n            for x, y in zip(a[2], b.parents):\n                assert x == y\n        assert a[3].encode(\"utf8\") == b.metadata\n\n    def test_nodes(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time    metadata\n        0   1           0   abc\n        1   1           0   XYZ+\n        2   0           1   !@#$%^&*()\n        \"\"\"\n        )\n        n = tskit.parse_nodes(\n            nodes, strict=False, encoding=\"utf8\", base64_metadata=False\n        )\n        expected = [\"abc\", \"XYZ+\", \"!@#$%^&*()\"]\n        for a, b in zip(expected, n):\n            assert a.encode(\"utf8\") == b.metadata\n\n    @pytest.mark.parametrize(\n        \"base64_metadata,metadata_text,expected\",\n        [(True, \"YWJj\", b\"abc\"), (False, \"plain\", b\"plain\")],\n    )\n    def test_edges_metadata(self, base64_metadata, metadata_text, expected):\n        edges = io.StringIO(\n            f\"\"\"\\\n        left    right    parent    child    metadata\n        0.0     1.0      2         0,1      {metadata_text}\n        \"\"\"\n        )\n        table = tskit.parse_edges(\n            edges, strict=False, encoding=\"utf8\", base64_metadata=base64_metadata\n        )\n        assert len(table) == 2\n        for row in table:\n            assert row.metadata == expected\n\n    def test_edges_without_metadata_column(self):\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right    parent    child\n        0.0     1.0      2         3\n        \"\"\"\n        )\n        table = tskit.parse_edges(edges, strict=False, encoding=\"utf8\")\n        assert len(table) == 1\n        assert table[0].metadata == b\"\"\n\n    def test_sites(self):\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state metadata\n        0.1 A   abc\n        0.5 C   XYZ+\n        0.8 G   !@#$%^&*()\n        \"\"\"\n        )\n        s = tskit.parse_sites(\n            sites, strict=False, encoding=\"utf8\", base64_metadata=False\n        )\n        expected = [\"abc\", \"XYZ+\", \"!@#$%^&*()\"]\n        for a, b in zip(expected, s):\n            assert a.encode(\"utf8\") == b.metadata\n\n    def test_mutations(self):\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   metadata\n        0   2   C   mno\n        0   3   G   )(*&^%$#@!\n        \"\"\"\n        )\n        m = tskit.parse_mutations(\n            mutations, strict=False, encoding=\"utf8\", base64_metadata=False\n        )\n        expected = [\"mno\", \")(*&^%$#@!\"]\n        for a, b in zip(expected, m):\n            assert a.encode(\"utf8\") == b.metadata\n\n    def test_populations(self):\n        populations = io.StringIO(\n            \"\"\"\\\n        id    metadata\n        0     mno\n        1     )(*&^%$#@!\n        \"\"\"\n        )\n        p = tskit.parse_populations(\n            populations, strict=False, encoding=\"utf8\", base64_metadata=False\n        )\n        expected = [\"mno\", \")(*&^%$#@!\"]\n        for a, b in zip(expected, p):\n            assert a.encode(\"utf8\") == b.metadata\n\n    @pytest.mark.parametrize(\n        \"base64_metadata,expected\", [(True, [\"pop\", \"gen\"]), (False, [\"cG9w\", \"Z2Vu\"])]\n    )\n    def test_migrations(self, base64_metadata, expected):\n        migrations = io.StringIO(\n            \"\"\"\\\n        left    right    node    source    dest    time    metadata\n        10    100    0    3    4    123.0    cG9w\n        150    360    1    1    2    307.0    Z2Vu\n        \"\"\"\n        )\n        m = tskit.parse_migrations(\n            migrations, strict=False, encoding=\"utf8\", base64_metadata=base64_metadata\n        )\n        for a, b in zip(expected, m):\n            assert a.encode(\"utf8\") == b.metadata\n\n\nclass TestMetadataModule:\n    \"\"\"\n    Tests that use the metadata module\n    \"\"\"\n\n    def test_metadata_schema(self):\n        # Bad jsonschema\n        with pytest.raises(exceptions.MetadataSchemaValidationError):\n            metadata.MetadataSchema(\n                {\"codec\": \"json\", \"additionalProperties\": \"THIS ISN'T RIGHT\"},\n            )\n        # Bad codec\n        with pytest.raises(exceptions.MetadataSchemaValidationError):\n            metadata.MetadataSchema({\"codec\": \"morse-code\"})\n        # Missing codec\n        with pytest.raises(exceptions.MetadataSchemaValidationError):\n            metadata.MetadataSchema({})\n        schema = {\n            \"codec\": \"json\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\"one\": {\"type\": \"string\"}, \"two\": {\"type\": \"number\"}},\n            \"required\": [\"one\", \"two\"],\n            \"additionalProperties\": False,\n        }\n        ms = metadata.MetadataSchema(schema)\n        assert repr(ms) == tskit.canonical_json(schema)\n        # Missing required properties\n        with pytest.raises(exceptions.MetadataValidationError):\n            ms.validate_and_encode_row({})\n\n    def test_schema_str(self):\n        schema = {\n            \"codec\": \"json\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\"one\": {\"type\": \"string\"}, \"two\": {\"type\": \"number\"}},\n            \"required\": [\"one\", \"two\"],\n            \"additionalProperties\": False,\n        }\n        assert (\n            str(metadata.MetadataSchema(schema))\n            == f\"tskit.MetadataSchema(\\n{pprint.pformat(schema)}\\n)\"\n        )\n\n    def test_register_codec(self):\n        class TestCodec(metadata.AbstractMetadataCodec):\n            pass\n\n        metadata.register_metadata_codec(TestCodec, \"test\")\n        assert TestCodec == metadata.codec_registry[\"test\"]\n\n    def test_parse(self):\n        # Empty string gives MetaDataSchema with None codec\n        ms = metadata.parse_metadata_schema(\"\")\n        assert isinstance(ms, metadata.MetadataSchema)\n        assert ms.schema is None\n        assert ms.asdict() is None\n\n        # json gives MetaDataSchema with json codec\n        ms = metadata.parse_metadata_schema(json.dumps({\"codec\": \"json\"}))\n        assert isinstance(ms, metadata.MetadataSchema)\n        assert ms.schema == {\"codec\": \"json\"}\n        assert ms.asdict() == {\"codec\": \"json\"}\n        # check we get a copy\n        assert ms.asdict() is not ms._schema\n\n        # Bad JSON gives error\n        with pytest.raises(ValueError):\n            metadata.parse_metadata_schema(json.dumps({\"codec\": \"json\"})[:-1])\n\n    def test_canonical_string(self):\n        schema = collections.OrderedDict(\n            codec=\"json\",\n            title=\"Example Metadata\",\n            type=\"object\",\n            properties=collections.OrderedDict(\n                one={\"type\": \"string\"}, two={\"type\": \"number\"}\n            ),\n            required=[\"one\", \"two\"],\n            additionalProperties=False,\n        )\n        schema2 = collections.OrderedDict(\n            type=\"object\",\n            properties=collections.OrderedDict(\n                two={\"type\": \"number\"}, one={\"type\": \"string\"}\n            ),\n            required=[\"one\", \"two\"],\n            additionalProperties=False,\n            title=\"Example Metadata\",\n            codec=\"json\",\n        )\n        assert json.dumps(schema) != json.dumps(schema2)\n        assert repr(metadata.MetadataSchema(schema)) == repr(\n            metadata.MetadataSchema(schema2)\n        )\n\n    def test_equality(self):\n        schema = metadata.MetadataSchema(\n            {\n                \"codec\": \"json\",\n                \"title\": \"Example Metadata\",\n                \"type\": \"object\",\n                \"properties\": {\"one\": {\"type\": \"string\"}, \"two\": {\"type\": \"number\"}},\n                \"required\": [\"one\", \"two\"],\n                \"additionalProperties\": False,\n            }\n        )\n        schema_same = metadata.MetadataSchema(\n            collections.OrderedDict(\n                type=\"object\",\n                properties=collections.OrderedDict(\n                    two={\"type\": \"number\"}, one={\"type\": \"string\"}\n                ),\n                required=[\"one\", \"two\"],\n                additionalProperties=False,\n                title=\"Example Metadata\",\n                codec=\"json\",\n            )\n        )\n        schema_diff = metadata.MetadataSchema(\n            {\n                \"codec\": \"json\",\n                \"title\": \"Example Metadata\",\n                \"type\": \"object\",\n                \"properties\": {\"one\": {\"type\": \"string\"}, \"two\": {\"type\": \"string\"}},\n                \"required\": [\"one\", \"two\"],\n                \"additionalProperties\": False,\n            }\n        )\n        assert schema == schema\n        assert not (schema != schema)\n        assert schema == schema_same\n        assert not (schema != schema_same)\n        assert schema != schema_diff\n        assert not (schema == schema_diff)\n\n    def test_bad_top_level_type(self):\n        for bad_type in [\"array\", \"boolean\", \"integer\", \"null\", \"number\", \"string\"]:\n            schema = {\n                \"codec\": \"json\",\n                \"type\": bad_type,\n            }\n            with pytest.raises(exceptions.MetadataSchemaValidationError):\n                metadata.MetadataSchema(schema)\n\n    @pytest.mark.parametrize(\"codec\", [\"struct\", \"json\"])\n    def test_null_union_top_level(self, codec):\n        schema = {\n            \"codec\": f\"{codec}\",\n            \"type\": [\"object\", \"null\"],\n            \"properties\": {\n                \"one\": {\n                    \"type\": \"string\",\n                    \"binaryFormat\": \"1024s\",\n                    \"nullTerminated\": True,\n                },\n                \"two\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n            },\n        }\n        ms = metadata.MetadataSchema(schema)\n        row_data = {\"one\": \"tree\", \"two\": 5}\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data\n        assert ms.decode_row(ms.validate_and_encode_row(None)) is None\n\n    def test_null_codec(self):\n        ms = metadata.MetadataSchema(None)\n        assert repr(ms) == \"\"\n        row = b\"Some binary data that tskit can't interpret \"\n        # Encode/decode are no-ops\n        assert row == ms.validate_and_encode_row(row)\n        assert row == ms.decode_row(row)\n        # Only bytes validate\n        with pytest.raises(TypeError):\n            ms.validate_and_encode_row({})\n\n    def test_json_codec(self):\n        schema = {\n            \"codec\": \"json\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\"one\": {\"type\": \"string\"}, \"two\": {\"type\": \"number\"}},\n            \"required\": [\"one\", \"two\"],\n            \"additionalProperties\": False,\n        }\n        ms = metadata.MetadataSchema(schema)\n        # Valid row data\n        row_data = {\"one\": \"tree\", \"two\": 5}\n        assert (\n            ms.validate_and_encode_row(row_data)\n            == tskit.canonical_json(row_data).encode()\n        )\n        assert ms.decode_row(json.dumps(row_data).encode()) == row_data\n        # Round trip\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data\n        # Test canonical encoding\n        row_data = collections.OrderedDict(one=\"tree\", two=5)\n        row_data2 = collections.OrderedDict(two=5, one=\"tree\")\n        assert json.dumps(row_data) != json.dumps(row_data2)\n        assert ms.validate_and_encode_row(row_data) == ms.validate_and_encode_row(\n            row_data2\n        )\n\n    def test_msgpack_codec(self):\n        class MsgPackCodec(metadata.AbstractMetadataCodec):\n            def __init__(self, schema):\n                pass\n\n            def encode(self, obj):\n                return msgpack.dumps(obj)\n\n            def decode(self, encoded):\n                return msgpack.loads(encoded)\n\n        metadata.register_metadata_codec(MsgPackCodec, \"msgpack\")\n\n        schema = {\n            \"codec\": \"msgpack\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\"one\": {\"type\": \"string\"}, \"two\": {\"type\": \"number\"}},\n            \"required\": [\"one\", \"two\"],\n            \"additionalProperties\": False,\n        }\n        ms = metadata.MetadataSchema(schema)\n        # Valid row data\n        row_data = {\"one\": \"tree\", \"two\": 5}\n        assert ms.validate_and_encode_row(row_data) == msgpack.dumps(row_data)\n        assert ms.decode_row(msgpack.dumps(row_data)) == row_data\n        # Round trip\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data\n\n\nclass TestJSONCodec:\n    def test_simple_default(self):\n        schema = {\n            \"codec\": \"json\",\n            \"type\": \"object\",\n            \"properties\": {\"number\": {\"type\": \"number\", \"default\": 5}},\n        }\n        ms = tskit.MetadataSchema(schema)\n        assert ms.decode_row(b\"\") == {\"number\": 5}\n        assert ms.decode_row(ms.validate_and_encode_row({})) == {\"number\": 5}\n        assert ms.decode_row(ms.validate_and_encode_row({\"number\": 42})) == {\n            \"number\": 42\n        }\n\n    def test_nested_default_error(self):\n        schema = {\n            \"codec\": \"json\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"obj\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"nested_obj_no_default\": {\n                            \"type\": \"object\",\n                            \"properties\": {},\n                        },\n                        \"nested_obj\": {\n                            \"type\": \"object\",\n                            \"properties\": {},\n                            \"default\": {\"foo\": \"bar\"},\n                        },\n                    },\n                }\n            },\n        }\n        with pytest.raises(\n            tskit.MetadataSchemaValidationError,\n            match=\"Defaults can only be specified at the top level for JSON codec\",\n        ):\n            tskit.MetadataSchema(schema)\n\n    def test_bad_type_error(self):\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        with pytest.raises(\n            exceptions.MetadataEncodingError,\n            match=\"Could not encode metadata of type TableCollection\",\n        ):\n            ms.validate_and_encode_row(tskit.TableCollection(1))\n\n    def test_skip_validation(self):\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        assert ms._bypass_validation\n        with patch.object(ms, \"_validate_row\", return_value=True) as mocked_validate:\n            ms.validate_and_encode_row({})\n            assert mocked_validate.call_count == 0\n\n    def test_dont_skip_validation(self):\n        ms = tskit.MetadataSchema({\"codec\": \"json\", \"properties\": {\"foo\": {}}})\n        assert not ms._bypass_validation\n        with patch.object(ms, \"_validate_row\", return_value=True) as mocked_validate:\n            ms.validate_and_encode_row({})\n            assert mocked_validate.call_count == 1\n\n    def test_dont_skip_validation_other_codecs(self):\n        ms = tskit.MetadataSchema(\n            {\n                \"codec\": \"struct\",\n                \"type\": \"object\",\n                \"properties\": {\n                    \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            }\n        )\n        assert not ms._bypass_validation\n        with patch.object(ms, \"_validate_row\", return_value=True) as mocked_validate:\n            ms.validate_and_encode_row({\"int\": 1})\n            assert mocked_validate.call_count == 1\n\n    def test_zero_length(self):\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        assert ms.decode_row(b\"\") == {}\n\n\nclass TestJSONStructCodec:\n    def test_requires_subschemas(self):\n        with pytest.raises(\n            tskit.MetadataSchemaValidationError,\n            match=\"requires 'json' and 'struct' schema mappings\",\n        ):\n            tskit.MetadataSchema({\"codec\": \"json+struct\"})\n\n    def test_disallow_duplicate_keys(self):\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\"type\": \"object\", \"properties\": {\"x\": {\"type\": \"number\"}}},\n            \"struct\": {\n                \"type\": \"object\",\n                \"properties\": {\"x\": {\"type\": \"number\", \"binaryFormat\": \"i\"}},\n            },\n        }\n        with pytest.raises(\n            tskit.MetadataSchemaValidationError, match=\"must not share property names\"\n        ):\n            tskit.MetadataSchema(schema)\n\n    def test_round_trip_with_struct_and_json(self):\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"label\": {\"type\": \"string\"},\n                    \"count\": {\"type\": \"number\"},\n                    \"stuff\": {\"type\": \"array\"},\n                },\n                \"required\": [\"label\"],\n            },\n            \"struct\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"b0\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                    \"b1\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                    \"xyz\": {\n                        \"type\": \"array\",\n                        \"arrayLengthFormat\": \"H\",\n                        \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                    },\n                },\n            },\n        }\n        ms = tskit.MetadataSchema(schema)\n        for v in [[], [0, 2, 12], [5] * 1000]:\n            row = {\n                \"label\": \"abcdef xyz\",\n                \"count\": 7,\n                \"b0\": 123,\n                \"b1\": 0,\n                \"stuff\": [1, 3, 2, -1.5, \"abc\", None],\n                \"xyz\": v,\n                \"another_thing\": \"since JSON is permissive this is allowed\",\n            }\n            encoded = ms.validate_and_encode_row(row)\n            out = ms.decode_row(encoded)\n            assert out == row\n\n    def schema_with_binary(self, num_binary_ints):\n        # produces a json+struct schema having num_binary_ints integers\n        # encoded in binary, labeled b0, ... bX\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"label\": {\"type\": \"string\"},\n                    \"count\": {\"type\": \"number\"},\n                },\n                \"required\": [\"label\"],\n            },\n            \"struct\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    f\"b{j}\": {\n                        \"type\": \"integer\",\n                        \"binaryFormat\": \"i\",\n                    }\n                    for j in range(num_binary_ints)\n                },\n            },\n        }\n        return tskit.MetadataSchema(schema)\n\n    @pytest.mark.parametrize(\"k\", (0, 1, 5, 1001))\n    def test_byte_alignment(self, k):\n        # We want to test whether the binary portion begins byte-aligned.\n        # To verify this, we (somewhat pedantically) let:\n        # X = (bytes to encode the json without any binary)\n        # Y = (bytes to encode the same json with k ints in binary)\n        # and then:\n        # (a) if padding is correct, X should be divisible by 8;\n        # but to make sure that in fact the binary portion starts after X bytes,\n        # we also check that:\n        # (b) Y-X is equal to k * (bytes per int)\n        ms = self.schema_with_binary(k)\n        ms0 = self.schema_with_binary(0)\n        bytes_per_int = len(struct.pack(\"i\", 0))\n        for s in [\n            \"\",\n            \"a\",\n            \"ab\",\n            \"abc\",\n            \"abcd\",\n            \"abcde\",\n            \"abcdef\",\n            \"abcdefg\",\n            \"abcdefgh\",\n            \" \" * 1000 + \"foo\" + \" \" * 1000,\n        ]:\n            row = {\"label\": s, \"count\": 7}\n            encoded0 = ms0.validate_and_encode_row(row)\n            row.update({f\"b{j}\": j for j in range(k)})\n            encoded = ms.validate_and_encode_row(row)\n            out = ms.decode_row(encoded)\n            assert out == row\n            # validate byte alignment\n            assert len(encoded0) % 8 == 0\n            assert len(encoded) - len(encoded0) == k * bytes_per_int\n\n    def test_json_defaults_applied(self):\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\n                \"type\": \"object\",\n                \"properties\": {\"number\": {\"type\": \"number\", \"default\": 5}},\n            },\n            \"struct\": {\"type\": \"object\", \"properties\": {}},\n        }\n        ms = tskit.MetadataSchema(schema)\n        assert ms.decode_row(ms.validate_and_encode_row({})) == {\"number\": 5}\n        assert ms.decode_row(ms.validate_and_encode_row({\"number\": 9})) == {\"number\": 9}\n\n    def test_nested_default_error(self):\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"obj\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"nested_obj_no_default\": {\n                                \"type\": \"object\",\n                                \"properties\": {},\n                            },\n                            \"nested_obj\": {\n                                \"type\": \"object\",\n                                \"properties\": {},\n                                \"default\": {\"foo\": \"bar\"},\n                            },\n                        },\n                    }\n                },\n            },\n            \"struct\": {\"type\": \"object\", \"properties\": {}},\n        }\n        with pytest.raises(\n            tskit.MetadataSchemaValidationError,\n            match=\"Defaults can only be specified at the top level for JSON codec\",\n        ):\n            tskit.MetadataSchema(schema)\n\n    def test_decode_without_magic_errors(self):\n        ms = tskit.MetadataSchema(\n            {\n                \"codec\": \"json+struct\",\n                \"json\": {\"type\": \"object\", \"properties\": {}},\n                \"struct\": {\"type\": \"object\", \"properties\": {}},\n            }\n        )\n        with pytest.raises(ValueError, match=\"missing magic header\"):\n            ms.decode_row(b\"{}\")\n\n    def test_decode_version_mismatch(self):\n        ms = tskit.MetadataSchema(\n            {\n                \"codec\": \"json+struct\",\n                \"json\": {\"type\": \"object\", \"properties\": {}},\n                \"struct\": {\"type\": \"object\", \"properties\": {}},\n            }\n        )\n        header = metadata.JSONStructCodec._HDR.pack(\n            metadata.JSONStructCodec.MAGIC,\n            metadata.JSONStructCodec.VERSION + 1,\n            len(b\"{}\"),\n            0,\n        )\n        with pytest.raises(\n            ValueError,\n            match=\"Unsupported json\\\\+struct version\",\n        ):\n            ms.decode_row(header + b\"{}\")\n\n    def test_decode_truncated_lengths(self):\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\"type\": \"object\", \"properties\": {}},\n            \"struct\": {\"type\": \"object\", \"properties\": {}},\n        }\n        ms = tskit.MetadataSchema(schema)\n        header = metadata.JSONStructCodec._HDR.pack(\n            metadata.JSONStructCodec.MAGIC, metadata.JSONStructCodec.VERSION, 5, 0\n        )\n        with pytest.raises(ValueError, match=\"declared lengths exceed buffer size\"):\n            ms.decode_row(header + b\"abc\")\n\n        header = metadata.JSONStructCodec._HDR.pack(\n            metadata.JSONStructCodec.MAGIC, metadata.JSONStructCodec.VERSION, 1, 3\n        )\n        with pytest.raises(ValueError, match=\"declared lengths exceed buffer size\"):\n            ms.decode_row(header + b\"a\")\n\n    def test_missing_struct_property_fails_validation(self):\n        schema = {\n            \"codec\": \"json+struct\",\n            \"json\": {\"type\": \"object\", \"properties\": {}},\n            \"struct\": {\n                \"type\": \"object\",\n                \"properties\": {\"payload\": {\"type\": \"integer\", \"binaryFormat\": \"i\"}},\n            },\n        }\n        ms = tskit.MetadataSchema(schema)\n        with pytest.raises(tskit.MetadataValidationError, match=\"required property\"):\n            ms.validate_and_encode_row({})\n\n\nclass TestStructCodec:\n    def encode_decode(self, method_name, sub_schema, obj, buffer):\n        assert (\n            getattr(metadata.StructCodec, f\"{method_name}_encode\")(sub_schema)(obj)\n            == buffer\n        )\n        assert (\n            getattr(metadata.StructCodec, f\"{method_name}_decode\")(sub_schema)(\n                iter(buffer)\n            )\n            == obj\n        )\n\n    def test_order_schema(self):\n        # Make a guaranteed-unordered nested, schema\n        schema = {\n            \"codec\": \"struct\",\n            \"title\": \"Example Struct-encoded Metadata\",\n            \"type\": \"object\",\n            \"properties\": collections.OrderedDict(\n                [\n                    (\"d\", {\"type\": \"number\", \"binaryFormat\": \"L\"}),\n                    (\"a\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\n                        \"f\",\n                        {\n                            \"type\": \"array\",\n                            \"items\": {\n                                \"type\": \"object\",\n                                \"properties\": collections.OrderedDict(\n                                    [\n                                        (\n                                            \"m\",\n                                            {\n                                                \"type\": \"number\",\n                                                \"index\": 0,\n                                                \"binaryFormat\": \"L\",\n                                            },\n                                        ),\n                                        (\n                                            \"n\",\n                                            {\n                                                \"type\": \"string\",\n                                                \"index\": -1000,\n                                                \"binaryFormat\": \"10s\",\n                                            },\n                                        ),\n                                        (\n                                            \"l\",\n                                            {\n                                                \"type\": \"string\",\n                                                \"index\": 1000,\n                                                \"binaryFormat\": \"10s\",\n                                            },\n                                        ),\n                                    ]\n                                ),\n                            },\n                        },\n                    ),\n                    (\"c\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\n                        \"h\",\n                        {\n                            \"type\": \"object\",\n                            \"properties\": collections.OrderedDict(\n                                [\n                                    (\n                                        \"i\",\n                                        {\n                                            \"type\": \"string\",\n                                            \"index\": 1000,\n                                            \"binaryFormat\": \"10s\",\n                                        },\n                                    ),\n                                    (\n                                        \"j\",\n                                        {\n                                            \"type\": \"string\",\n                                            \"index\": 567,\n                                            \"binaryFormat\": \"10s\",\n                                        },\n                                    ),\n                                    (\n                                        \"k\",\n                                        {\n                                            \"type\": \"number\",\n                                            \"index\": 567.5,\n                                            \"binaryFormat\": \"L\",\n                                        },\n                                    ),\n                                ]\n                            ),\n                        },\n                    ),\n                    (\"e\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\"g\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\"b\", {\"type\": \"number\", \"binaryFormat\": \"L\"}),\n                ]\n            ),\n            \"required\": [\"one\", \"two\"],\n            \"additionalProperties\": False,\n        }\n        schema_sorted = {\n            \"codec\": \"struct\",\n            \"title\": \"Example Struct-encoded Metadata\",\n            \"type\": \"object\",\n            \"properties\": collections.OrderedDict(\n                [\n                    (\"a\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\"b\", {\"type\": \"number\", \"binaryFormat\": \"L\"}),\n                    (\"c\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\"d\", {\"type\": \"number\", \"binaryFormat\": \"L\"}),\n                    (\"e\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\n                        \"f\",\n                        {\n                            \"type\": \"array\",\n                            \"items\": {\n                                \"type\": \"object\",\n                                \"properties\": collections.OrderedDict(\n                                    [\n                                        (\n                                            \"n\",\n                                            {\n                                                \"type\": \"string\",\n                                                \"index\": -1000,\n                                                \"binaryFormat\": \"10s\",\n                                            },\n                                        ),\n                                        (\n                                            \"m\",\n                                            {\n                                                \"type\": \"number\",\n                                                \"index\": 0,\n                                                \"binaryFormat\": \"L\",\n                                            },\n                                        ),\n                                        (\n                                            \"l\",\n                                            {\n                                                \"type\": \"string\",\n                                                \"index\": 1000,\n                                                \"binaryFormat\": \"10s\",\n                                            },\n                                        ),\n                                    ]\n                                ),\n                            },\n                        },\n                    ),\n                    (\"g\", {\"type\": \"string\", \"binaryFormat\": \"10s\"}),\n                    (\n                        \"h\",\n                        {\n                            \"type\": \"object\",\n                            \"properties\": collections.OrderedDict(\n                                [\n                                    (\n                                        \"j\",\n                                        {\n                                            \"type\": \"string\",\n                                            \"index\": 567,\n                                            \"binaryFormat\": \"10s\",\n                                        },\n                                    ),\n                                    (\n                                        \"k\",\n                                        {\n                                            \"type\": \"number\",\n                                            \"index\": 567.5,\n                                            \"binaryFormat\": \"L\",\n                                        },\n                                    ),\n                                    (\n                                        \"i\",\n                                        {\n                                            \"type\": \"string\",\n                                            \"index\": 1000,\n                                            \"binaryFormat\": \"10s\",\n                                        },\n                                    ),\n                                ]\n                            ),\n                        },\n                    ),\n                ]\n            ),\n            \"required\": [\"one\", \"two\"],\n            \"additionalProperties\": False,\n        }\n        assert metadata.StructCodec.order_by_index(schema) == schema_sorted\n\n    def test_make_encode_and_decode(self):\n        self.encode_decode(\n            \"make\",\n            {\n                \"type\": \"array\",\n                \"arrayLengthFormat\": \"B\",\n                \"items\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n            },\n            list(range(5)),\n            b\"\\x05\\x00\\x01\\x02\\x03\\x04\",\n        )\n        self.encode_decode(\n            \"make\",\n            {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"a\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n                    \"b\": {\"type\": \"string\", \"binaryFormat\": \"5p\"},\n                },\n            },\n            {\"a\": 5, \"b\": \"FOO\"},\n            b\"\\x05\\x03FOO\\x00\",\n        )\n        self.encode_decode(\n            \"make\",\n            {\"type\": \"string\", \"binaryFormat\": \"10p\"},\n            \"FOOBAR\",\n            b\"\\x06FOOBAR\\x00\\x00\\x00\",\n        )\n        self.encode_decode(\"make\", {\"type\": \"null\"}, None, b\"\")\n        self.encode_decode(\n            \"make\", {\"type\": \"boolean\", \"binaryFormat\": \"?\"}, True, b\"\\x01\"\n        )\n        self.encode_decode(\n            \"make\", {\"type\": \"integer\", \"binaryFormat\": \"b\"}, -128, b\"\\x80\"\n        )\n        self.encode_decode(\n            \"make\",\n            {\"type\": \"number\", \"binaryFormat\": \"f\"},\n            42.424198150634766,\n            b\"a\\xb2)B\",\n        )\n\n    def test_make_array_encode_and_decode(self):\n        # Default array length format is 'L'\n        self.encode_decode(\n            \"make_array\",\n            {\"type\": \"array\", \"items\": {\"type\": \"number\", \"binaryFormat\": \"b\"}},\n            list(range(5)),\n            b\"\\x05\\x00\\x00\\x00\\x00\\x01\\x02\\x03\\x04\",\n        )\n        self.encode_decode(\n            \"make_array\",\n            {\n                \"type\": \"array\",\n                \"arrayLengthFormat\": \"H\",\n                \"items\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n            },\n            list(range(6)),\n            b\"\\x06\\x00\\x00\\x01\\x02\\x03\\x04\\x05\",\n        )\n        self.encode_decode(\n            \"make_array\",\n            {\n                \"type\": \"array\",\n                \"arrayLengthFormat\": \"B\",\n                \"items\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n            },\n            [],\n            b\"\\x00\",\n        )\n        sub_schema = {\n            \"type\": \"array\",\n            \"arrayLengthFormat\": \"B\",\n            \"items\": {\n                \"type\": \"array\",\n                \"arrayLengthFormat\": \"B\",\n                \"items\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n            },\n        }\n        self.encode_decode(\"make_array\", sub_schema, [], b\"\\x00\")\n        self.encode_decode(\"make_array\", sub_schema, [[]], b\"\\x01\\x00\")\n        self.encode_decode(\n            \"make_array\", sub_schema, [[3, 4], [5]], b\"\\x02\\x02\\x03\\x04\\x01\\x05\"\n        )\n\n    def test_make_array_no_length_encoding_exhaust_buffer(self):\n        self.encode_decode(\n            \"make_array\",\n            {\n                \"type\": \"array\",\n                \"noLengthEncodingExhaustBuffer\": True,\n                \"items\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n            },\n            list(range(5)),\n            b\"\\x00\\x01\\x02\\x03\\x04\",\n        )\n\n        self.encode_decode(\n            \"make_array\",\n            {\n                \"type\": \"array\",\n                \"noLengthEncodingExhaustBuffer\": True,\n                \"items\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"a\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n                        \"b\": {\"type\": \"number\", \"binaryFormat\": \"Q\"},\n                        \"c\": {\"type\": \"number\", \"binaryFormat\": \"?\"},\n                        \"d\": {\"type\": \"string\", \"binaryFormat\": \"5p\"},\n                    },\n                },\n            },\n            [\n                {\n                    \"a\": 5 + i,\n                    \"b\": 18446744073709551615 - i,\n                    \"c\": (i // 2) == 0,\n                    \"d\": \"FOO\",\n                }\n                for i in range(10)\n            ],\n            b\"\\x05\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x03FOO\\x00\"\n            b\"\\x06\\xfe\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x03FOO\\x00\"\n            b\"\\x07\\xfd\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x08\\xfc\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x09\\xfb\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x0a\\xfa\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x0b\\xf9\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x0c\\xf8\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x0d\\xf7\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\"\n            b\"\\x0e\\xf6\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x00\\x03FOO\\x00\",\n        )\n\n        # Other struct errors should still be raised\n        schema = {\n            \"type\": \"array\",\n            \"noLengthEncodingExhaustBuffer\": True,\n            \"items\": {\"type\": \"number\", \"binaryFormat\": \"I'M NOT VALID\"},\n        }\n        with pytest.raises(struct.error):\n            metadata.StructCodec.make_array_encode(schema)(5)\n        with pytest.raises(struct.error):\n            metadata.StructCodec.make_array_decode(schema)(5)\n\n    def test_make_object_encode_and_decode(self):\n        self.encode_decode(\"make_object\", {\"type\": \"object\", \"properties\": {}}, {}, b\"\")\n        self.encode_decode(\n            \"make_object\",\n            {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"a\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n                    \"b\": {\"type\": \"number\", \"binaryFormat\": \"Q\"},\n                    \"c\": {\"type\": \"number\", \"binaryFormat\": \"?\"},\n                    \"d\": {\"type\": \"string\", \"binaryFormat\": \"5p\"},\n                },\n            },\n            {\"a\": 5, \"b\": 18446744073709551615, \"c\": True, \"d\": \"FOO\"},\n            b\"\\x05\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x03FOO\\x00\",\n        )\n        self.encode_decode(\n            \"make_object\",\n            {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"obj\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"a\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n                            \"b\": {\"type\": \"number\", \"binaryFormat\": \"Q\"},\n                            \"c\": {\"type\": \"number\", \"binaryFormat\": \"?\"},\n                            \"d\": {\"type\": \"string\", \"binaryFormat\": \"5p\"},\n                        },\n                    },\n                },\n            },\n            {\"obj\": {\"a\": 5, \"b\": 18446744073709551615, \"c\": True, \"d\": \"FOO\"}},\n            b\"\\x05\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x03FOO\\x00\",\n        )\n\n    def test_make_string_encode_and_decode(self):\n        # Single byte\n        self.encode_decode(\n            \"make_string\", {\"type\": \"string\", \"binaryFormat\": \"c\"}, \"a\", b\"a\"\n        )\n        # With \"s\" encoding exactly the right size comes back fine\n        self.encode_decode(\n            \"make_string\", {\"type\": \"string\", \"binaryFormat\": \"4s\"}, \"abcd\", b\"abcd\"\n        )\n        # If too small gets truncated\n        assert (\n            metadata.StructCodec.make_string_encode(\n                {\"type\": \"string\", \"binaryFormat\": \"2s\"}\n            )(\"abcd\")\n            == b\"ab\"\n        )\n        # If too large gets padded - have to test separately as encode and decode are not\n        # inverse of each other in this case\n        assert (\n            metadata.StructCodec.make_string_encode(\n                {\"type\": \"string\", \"binaryFormat\": \"6s\"}\n            )(\"abcd\")\n            == b\"abcd\\x00\\x00\"\n        )\n        # Too large getting decoded returns padding\n        assert (\n            metadata.StructCodec.make_string_decode(\n                {\"type\": \"string\", \"binaryFormat\": \"6s\"}\n            )(b\"abcd\\x00\\x00\")\n            == \"abcd\\x00\\x00\"\n        )\n        assert (\n            metadata.StructCodec.make_string_decode(\n                {\"type\": \"string\", \"binaryFormat\": \"6s\", \"nullTerminated\": False}\n            )(b\"abcd\\x00\\x00\")\n            == \"abcd\\x00\\x00\"\n        )\n        # Unless we specify that the field is null-teminated\n        self.encode_decode(\n            \"make_string\",\n            {\"type\": \"string\", \"binaryFormat\": \"6s\", \"nullTerminated\": True},\n            \"abcd\",\n            b\"abcd\\x00\\x00\",\n        )\n        # For \"p\" the padding is not returned, even if nullTerminated is False\n        self.encode_decode(\n            \"make_string\",\n            {\"type\": \"string\", \"binaryFormat\": \"8p\"},\n            \"abcd\",\n            b\"\\x04abcd\\x00\\x00\\x00\",\n        )\n\n        # Unicode\n        self.encode_decode(\n            \"make_string\",\n            {\"type\": \"string\", \"binaryFormat\": \"6s\", \"nullTerminated\": True},\n            \"💩\",\n            b\"\\xf0\\x9f\\x92\\xa9\\x00\\x00\",\n        )\n        self.encode_decode(\n            \"make_string\",\n            {\n                \"type\": \"string\",\n                \"binaryFormat\": \"8s\",\n                \"nullTerminated\": True,\n                \"stringEncoding\": \"utf-16\",\n            },\n            \"💩\",\n            b\"\\xff\\xfe=\\xd8\\xa9\\xdc\\x00\\x00\",\n        )\n        self.encode_decode(\n            \"make_string\",\n            {\"type\": \"string\", \"binaryFormat\": \"9p\", \"stringEncoding\": \"utf-32\"},\n            \"💩\",\n            b\"\\x08\\xff\\xfe\\x00\\x00\\xa9\\xf4\\x01\\x00\",\n        )\n\n    def test_make_null_encode_and_decode(self):\n        self.encode_decode(\"make_null\", {\"type\": \"null\"}, None, b\"\")\n        self.encode_decode(\n            \"make_null\", {\"type\": \"null\", \"binaryFormat\": \"x\"}, None, b\"\\x00\"\n        )\n        self.encode_decode(\n            \"make_null\", {\"type\": \"null\", \"binaryFormat\": \"3x\"}, None, b\"\\x00\\x00\\x00\"\n        )\n\n    def test_make_numeric_encode_and_decode(self):\n        self.encode_decode(\n            \"make_numeric\",\n            {\"type\": \"number\", \"binaryFormat\": \"f\"},\n            42.424198150634766,\n            b\"a\\xb2)B\",\n        )\n        self.encode_decode(\n            \"make_numeric\", {\"type\": \"integer\", \"binaryFormat\": \"b\"}, 42, b\"*\"\n        )\n\n    def test_null_union_top_level(self):\n        # This nested with mutiple values tests that the buffer length check has not\n        # caused a list to past to sub-decoders\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": [\"object\", \"null\"],\n            \"properties\": {\n                \"o\": {\n                    \"type\": \"object\",\n                    \"properties\": {\"x\": {\"type\": \"number\", \"binaryFormat\": \"d\"}},\n                },\n                \"a\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                \"b\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n        }\n        ms = metadata.MetadataSchema(schema)\n        row_data = {\"o\": {\"x\": 5.5}, \"a\": 4, \"b\": 7}\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data\n        assert ms.decode_row(ms.validate_and_encode_row(None)) is None\n\n    def test_default_values(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"b\", \"default\": 42},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n        }\n        ms = metadata.MetadataSchema(schema)\n        row_data = {\"float\": 5.5}\n        assert ms.validate_and_encode_row(row_data) == b\"\\x00\\x00\\x00\\x00\\x00\\x00\\x16@*\"\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == {\n            \"float\": 5.5,\n            \"int\": 42,\n        }\n\n    def test_defaults_object_or_null(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": [\"object\", \"null\"],\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"b\", \"default\": 42},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n        }\n        ms = metadata.MetadataSchema(schema)\n        row_data = {\"float\": 5.5}\n        assert ms.validate_and_encode_row(row_data) == b\"\\x00\\x00\\x00\\x00\\x00\\x00\\x16@*\"\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == {\n            \"float\": 5.5,\n            \"int\": 42,\n        }\n        assert ms.validate_and_encode_row(None) == b\"\"\n        assert ms.decode_row(b\"\") is None\n\n    def test_add_property_to_schema(self):\n        schema = metadata.MetadataSchema(\n            {\n                \"codec\": \"struct\",\n                \"type\": [\"object\", \"null\"],\n                \"name\": \"Mutation metadata\",\n                \"properties\": {\n                    \"s\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                },\n                \"additionalProperties\": False,\n            }\n        )\n        schema_with_additional = schema.schema\n        schema_with_additional[\"properties\"][\"a\"] = {\n            \"type\": \"number\",\n            \"binaryFormat\": \"d\",\n        }\n        metadata.MetadataSchema(schema_with_additional)\n\n\nclass TestStructCodecRoundTrip:\n    def round_trip(self, schema, row_data):\n        ms = metadata.MetadataSchema(schema)\n        assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data\n\n    def test_simple_types(self):\n        for type_, binaryFormat, value in (\n            (\"number\", \"i\", 5),\n            (\"number\", \"d\", 5.5),\n            (\"string\", \"10p\", \"foobar\"),\n            (\"boolean\", \"?\", True),\n            (\"boolean\", \"?\", False),\n            (\"null\", \"10x\", None),\n        ):\n            schema = {\n                \"codec\": \"struct\",\n                \"type\": \"object\",\n                \"properties\": {type_: {\"type\": type_, \"binaryFormat\": binaryFormat}},\n            }\n            self.round_trip(schema, {type_: value})\n\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"null\": {\"type\": \"null\"}},\n        }\n        self.round_trip(schema, {\"null\": None})\n\n    def test_flat_object(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                \"null\": {\"type\": \"null\", \"binaryFormat\": \"3x\"},\n                \"str\": {\"type\": \"string\", \"binaryFormat\": \"10p\"},\n                \"bool\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n            },\n        }\n        self.round_trip(\n            schema, {\"null\": None, \"bool\": True, \"float\": 5.5, \"int\": 5, \"str\": \"42\"}\n        )\n\n    def test_nested_object(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                \"str\": {\"type\": \"string\", \"binaryFormat\": \"10p\"},\n                \"bool\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n                \"obj\": {\n                    \"index\": 5,\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                        \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                        \"str\": {\"type\": \"string\", \"binaryFormat\": \"5p\"},\n                        \"bool\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n                    },\n                },\n            },\n        }\n        self.round_trip(\n            schema,\n            {\n                \"bool\": True,\n                \"float\": 5.5,\n                \"int\": 5,\n                \"str\": \"42\",\n                \"obj\": {\"float\": 5.78, \"int\": 9, \"bool\": False, \"str\": \"41\"},\n            },\n        )\n\n    def test_flat_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": []})\n        self.round_trip(schema, {\"array\": [1]})\n        self.round_trip(schema, {\"array\": [1, 6, -900]})\n\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": []})\n        self.round_trip(schema, {\"array\": [1.5]})\n        self.round_trip(schema, {\"array\": [1.5, 6.7, -900.00001]})\n\n    def test_nested_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"items\": {\n                        \"type\": \"array\",\n                        \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                    },\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": [[]]})\n        self.round_trip(schema, {\"array\": [[], []]})\n        self.round_trip(schema, {\"array\": [[1]]})\n        self.round_trip(schema, {\"array\": [[1, 6, -900]]})\n        self.round_trip(schema, {\"array\": [[0, 987, 234903], [1, 6, -900]]})\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"items\": {\n                        \"type\": \"array\",\n                        \"items\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                    },\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": [[]]})\n        self.round_trip(schema, {\"array\": [[], []]})\n        self.round_trip(schema, {\"array\": [[1.67]]})\n        self.round_trip(schema, {\"array\": [[1.34, 6.56422, -900.0000006]]})\n        self.round_trip(\n            schema, {\"array\": [[0.0, 987.123, 234903.123], [1.1235, 6, -900]]}\n        )\n\n    def test_array_of_objects(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"items\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                            \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                            \"padding\": {\"type\": \"null\", \"binaryFormat\": \"5x\"},\n                            \"str\": {\"type\": \"string\", \"binaryFormat\": \"10p\"},\n                            \"bool\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n                        },\n                    },\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": []})\n        self.round_trip(\n            schema,\n            {\n                \"array\": [\n                    {\n                        \"padding\": None,\n                        \"float\": 5.78,\n                        \"int\": 9,\n                        \"bool\": False,\n                        \"str\": \"41\",\n                    }\n                ]\n            },\n        )\n        self.round_trip(\n            schema,\n            {\n                \"array\": [\n                    {\n                        \"padding\": None,\n                        \"float\": 5.78,\n                        \"int\": 9,\n                        \"bool\": False,\n                        \"str\": \"41\",\n                    },\n                    {\n                        \"str\": \"FOO\",\n                        \"int\": 7,\n                        \"bool\": True,\n                        \"float\": 45.7,\n                        \"padding\": None,\n                    },\n                ],\n            },\n        )\n\n    def test_object_with_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                \"arr\": {\n                    \"index\": 2,\n                    \"type\": \"array\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            },\n        }\n        self.round_trip(schema, {\"int\": 5, \"arr\": []})\n        self.round_trip(schema, {\"int\": 5, \"arr\": [5]})\n        self.round_trip(schema, {\"arr\": [5, 6, 7], \"int\": 5})\n\n    def test_array_length_format(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"arrayLengthFormat\": \"B\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"H\"},\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": []})\n        self.round_trip(schema, {\"array\": [1]})\n        self.round_trip(schema, {\"array\": list(range(255))})\n\n    def test_string_encoding(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"string\": {\n                    \"type\": \"string\",\n                    \"stringEncoding\": \"utf-16\",\n                    \"binaryFormat\": \"40p\",\n                }\n            },\n        }\n        self.round_trip(schema, {\"string\": \"Test string\"})\n\n    def test_ordering_of_fields(self):\n        row_data = {\n            \"null\": None,\n            \"bool\": True,\n            \"float\": -1.8440714901698642e18,\n            \"int\": 5,\n            \"str\": \"foo\",\n        }\n        alpha_ordered_encoded = b\"\\x01\\xaa\\xbb\\xcc\\xdd\\x05\\x00\\x00\\x00\\x03foo\"\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"null\": {\"type\": \"null\", \"binaryFormat\": \"3x\"},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"f\"},\n                \"bool\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"b\"},\n                \"str\": {\"type\": \"string\", \"binaryFormat\": \"4p\"},\n            },\n        }\n        alpha_ordered_encoded = b\"\\x01\\xaa\\xbb\\xcc\\xdd\\x05\\x00\\x00\\x00\\x03foo\"\n        ms = metadata.MetadataSchema(schema)\n        assert ms.validate_and_encode_row(row_data) == alpha_ordered_encoded\n        assert ms.decode_row(alpha_ordered_encoded) == row_data\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"null\": {\"type\": \"null\", \"binaryFormat\": \"3x\", \"index\": 0},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"f\", \"index\": 1},\n                \"bool\": {\"type\": \"boolean\", \"binaryFormat\": \"?\", \"index\": 2},\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"b\", \"index\": 3},\n                \"str\": {\"type\": \"string\", \"binaryFormat\": \"4p\", \"index\": 4},\n            },\n        }\n        index_order_encoded = b\"\\x00\\x00\\x00\\xaa\\xbb\\xcc\\xdd\\x01\\x05\\x03foo\"\n        ms = metadata.MetadataSchema(schema)\n        assert ms.validate_and_encode_row(row_data) == index_order_encoded\n        assert ms.decode_row(index_order_encoded) == row_data\n\n    def test_fixed_length_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"length\": 3,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": [1, 2, 3]})\n\n        # Test with complex fixed-length arrays\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"length\": 2,\n                    \"items\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                            \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                        },\n                    },\n                }\n            },\n        }\n        self.round_trip(\n            schema, {\"array\": [{\"int\": 1, \"float\": 1.1}, {\"int\": 2, \"float\": 2.2}]}\n        )\n\n        # Test fixed-length nested arrays\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"length\": 2,\n                    \"items\": {\n                        \"type\": \"array\",\n                        \"length\": 3,\n                        \"items\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                    },\n                }\n            },\n        }\n        self.round_trip(schema, {\"array\": [[1.1, 1.2, 1.3], [2.1, 2.2, 2.3]]})\n\n    def test_mixed_fixed_and_variable_arrays(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"fixed_array\": {\n                    \"type\": \"array\",\n                    \"length\": 3,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n                \"variable_array\": {\n                    \"type\": \"array\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            },\n        }\n        self.round_trip(\n            schema, {\"fixed_array\": [1, 2, 3], \"variable_array\": [4, 5, 6, 7]}\n        )\n        self.round_trip(schema, {\"fixed_array\": [1, 2, 3], \"variable_array\": []})\n\n        # Nested case - array of objects where each object has\n        # both fixed and variable-length arrays\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"objects\": {\n                    \"type\": \"array\",\n                    \"items\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"fixed\": {\n                                \"type\": \"array\",\n                                \"length\": 2,\n                                \"items\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                            },\n                            \"variable\": {\n                                \"type\": \"array\",\n                                \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                            },\n                        },\n                    },\n                }\n            },\n        }\n        self.round_trip(\n            schema,\n            {\n                \"objects\": [\n                    {\"fixed\": [1.1, 2.2], \"variable\": [1, 2, 3]},\n                    {\"fixed\": [3.3, 4.4], \"variable\": [4]},\n                    {\"fixed\": [5.5, 6.6], \"variable\": []},\n                ]\n            },\n        )\n\n    def test_edge_case_zero_length_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"empty_fixed\": {\n                    \"type\": \"array\",\n                    \"length\": 0,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                }\n            },\n        }\n        self.round_trip(schema, {\"empty_fixed\": []})\n\n        # Can't provide non-empty array when length=0\n        ms = metadata.MetadataSchema(schema)\n        with pytest.raises(\n            ValueError, match=\"Array length 1 does not match schema fixed length 0\"\n        ):\n            ms.validate_and_encode_row({\"empty_fixed\": [1]})\n\n        # Complex object with zero-length array\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"name\": {\"type\": \"string\", \"binaryFormat\": \"10p\"},\n                \"empty_fixed\": {\n                    \"type\": \"array\",\n                    \"length\": 0,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n                \"value\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n        }\n        self.round_trip(schema, {\"name\": \"test\", \"empty_fixed\": [], \"value\": 42.0})\n\n\nclass TestStructCodecErrors:\n    def encode(self, schema, row_data):\n        ms = metadata.MetadataSchema(schema)\n        ms.validate_and_encode_row(row_data)\n\n    def test_missing_and_extra_property(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataValidationError, match=\"'int' is a required property\"\n        ):\n            self.encode(schema, {\"float\": 5.5})\n        with pytest.raises(\n            exceptions.MetadataValidationError,\n            match=\"Additional properties are not allowed\",\n        ):\n            self.encode(\n                schema, {\"float\": 5.5, \"int\": 9, \"extra\": \"I really shouldn't be here\"}\n            )\n\n    def test_bad_schema_union_type(self):\n        schema = {\"codec\": \"struct\", \"type\": [\"object\", \"number\"], \"binaryFormat\": \"d\"}\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"is not one of\"\n        ):\n            metadata.MetadataSchema(schema)\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"test\": {\"type\": [\"number\", \"string\"], \"binaryFormat\": \"d\"}},\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"is not one of\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_bad_schema_hetrogeneous_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"items\": [{\"type\": \"number\"}, {\"type\": \"string\"}],\n                }\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"is not of type 'object'\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_bad_binary_format(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"int\": {\"type\": \"number\", \"binaryFormat\": \"int\"}},\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"does not match\"\n        ):\n            metadata.MetadataSchema(schema)\n        # Can't specify endianness\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"int\": {\"type\": \"number\", \"binaryFormat\": \">b\"}},\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"does not match\"\n        ):\n            metadata.MetadataSchema(schema)\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"null\": {\"type\": \"null\", \"binaryFormat\": \"l\"}},\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError,\n            match=\"null type binaryFormat must be padding\",\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_bad_array_length_format(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"array\": {\"type\": \"array\", \"arrayLengthFormat\": \"b\"}},\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"does not match\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_missing_binary_format(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"int\": {\"type\": \"number\"}},\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError,\n            match=\"number type must have binaryFormat set\",\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_bad_string_encoding(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"string\": {\n                    \"type\": \"string\",\n                    \"binaryFormat\": \"5s\",\n                    \"stringEncoding\": 58,\n                }\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"is not of type\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_bad_null_terminated(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"string\": {\n                    \"type\": \"string\",\n                    \"binaryFormat\": \"5s\",\n                    \"nullTerminated\": 58,\n                }\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"is not of type\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_bad_no_length_encoding_exhaust_buffer(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"string\": {\n                    \"type\": \"string\",\n                    \"binaryFormat\": \"5s\",\n                    \"noLengthEncodingExhaustBuffer\": 58,\n                }\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError, match=\"is not of type\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_too_long_array(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"arrayLengthFormat\": \"B\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"I\"},\n                },\n            },\n        }\n        data = {\"array\": list(range(255))}\n        metadata.MetadataSchema(schema).validate_and_encode_row(data)\n        data2 = {\"array\": list(range(256))}\n        with pytest.raises(\n            ValueError,\n            match=\"Couldn't pack array size - it is likely too long for the\"\n            \" specified arrayLengthFormat\",\n        ):\n            metadata.MetadataSchema(schema).validate_and_encode_row(data2)\n\n    def test_additional_properties(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"additional_properties\": True,\n            \"properties\": {},\n        }\n        with pytest.raises(\n            ValueError, match=\"Struct codec does not support additional_properties\"\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_unrequired_property_needs_default(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n            \"required\": [\"float\"],\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError,\n            match=\"Optional property 'int' must have a default value\",\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_no_default_implies_required(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"int\": {\"type\": \"number\", \"binaryFormat\": \"i\", \"default\": 5},\n                \"float\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n            },\n        }\n        self.encode(schema, {\"float\": 5.5})\n        with pytest.raises(\n            exceptions.MetadataValidationError, match=\"'float' is a required property\"\n        ):\n            self.encode(schema, {})\n\n    def test_fixed_length_array_wrong_length(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"length\": 3,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            },\n        }\n        ms = metadata.MetadataSchema(schema)\n\n        with pytest.raises(\n            ValueError, match=\"Array length 2 does not match schema fixed length 3\"\n        ):\n            ms.validate_and_encode_row({\"array\": [1, 2]})\n\n        with pytest.raises(\n            ValueError, match=\"Array length 4 does not match schema fixed length 3\"\n        ):\n            ms.validate_and_encode_row({\"array\": [1, 2, 3, 4]})\n\n    def test_fixed_length_array_conflicts(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"test\": {\n                    \"type\": \"array\",\n                    \"length\": 3,\n                    \"noLengthEncodingExhaustBuffer\": True,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError,\n            match=\"test array cannot have both 'length' and \"\n            \"'noLengthEncodingExhaustBuffer' set\",\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_fixed_length_with_length_format(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"length\": 3,\n                    \"arrayLengthFormat\": \"B\",\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            },\n        }\n        with pytest.raises(\n            exceptions.MetadataSchemaValidationError,\n            match=\"fixed-length array should not specify 'arrayLengthFormat'\",\n        ):\n            metadata.MetadataSchema(schema)\n\n    def test_negative_fixed_length(self):\n        \"\"\"Test that negative fixed-length values are rejected.\"\"\"\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"array\": {\n                    \"type\": \"array\",\n                    \"length\": -5,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n            },\n        }\n        with pytest.raises(exceptions.MetadataSchemaValidationError):\n            metadata.MetadataSchema(schema)\n\n\nclass TestSLiMDecoding:\n    \"\"\"\n    Test with byte strings copied from a SLiM tree sequence\n    \"\"\"\n\n    def test_node(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"genomeID\": {\"type\": \"integer\", \"binaryFormat\": \"q\", \"index\": 0},\n                \"isNull\": {\"type\": \"boolean\", \"binaryFormat\": \"?\", \"index\": 1},\n                \"genomeType\": {\"type\": \"integer\", \"binaryFormat\": \"B\", \"index\": 2},\n            },\n        }\n        for example, expected in [\n            (\n                b\"E,\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\",\n                {\"genomeID\": 11333, \"genomeType\": 1, \"isNull\": False},\n            ),\n            (\n                b\"\\xdd.\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\",\n                {\"genomeID\": 11997, \"genomeType\": 0, \"isNull\": True},\n            ),\n        ]:\n            assert metadata.MetadataSchema(schema).decode_row(example) == expected\n\n    def test_individual(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": [\"object\", \"null\"],\n            \"properties\": {\n                \"pedigreeID\": {\"type\": \"integer\", \"binaryFormat\": \"q\", \"index\": 1},\n                \"age\": {\"type\": \"integer\", \"binaryFormat\": \"i\", \"index\": 2},\n                \"subpopulationID\": {\n                    \"type\": \"integer\",\n                    \"binaryFormat\": \"i\",\n                    \"index\": 3,\n                },\n                \"sex\": {\"type\": \"integer\", \"binaryFormat\": \"i\", \"index\": 4},\n                \"flags\": {\"type\": \"integer\", \"binaryFormat\": \"I\", \"index\": 5},\n            },\n        }\n        for example, expected in [\n            (\n                b\"\\x17\\x99\\x07\\x00\\x00\\x00\\x00\\x00\\x05\\x00\\x01\\x00\\x03\\x00\\x00\\x00\\x01\"\n                b\"\\x00\\x00\\x00\\x00\\x10\\x00\\x00\",\n                {\n                    \"age\": 65541,\n                    \"flags\": 4096,\n                    \"pedigreeID\": 497943,\n                    \"sex\": 1,\n                    \"subpopulationID\": 3,\n                },\n            ),\n            (b\"\", None),\n            (\n                b\"\\x18\\x99\\x07\\x00\\x00\\x00\\x00\\x00\\x05\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x01\"\n                b\"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\",\n                {\n                    \"age\": 5,\n                    \"flags\": 0,\n                    \"pedigreeID\": 497944,\n                    \"sex\": 1,\n                    \"subpopulationID\": 1,\n                },\n            ),\n        ]:\n            assert metadata.MetadataSchema(schema).decode_row(example) == expected\n\n    def test_mutation(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"stacked_mutation_array\": {\n                    \"type\": \"array\",\n                    \"noLengthEncodingExhaustBuffer\": True,\n                    \"items\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"mutationTypeID\": {\n                                \"type\": \"integer\",\n                                \"binaryFormat\": \"i\",\n                                \"index\": 1,\n                            },\n                            \"selectionCoeff\": {\n                                \"type\": \"number\",\n                                \"binaryFormat\": \"f\",\n                                \"index\": 2,\n                            },\n                            \"subpopulationID\": {\n                                \"type\": \"integer\",\n                                \"binaryFormat\": \"i\",\n                                \"index\": 3,\n                            },\n                            \"originGeneration\": {\n                                \"type\": \"integer\",\n                                \"binaryFormat\": \"i\",\n                                \"index\": 4,\n                            },\n                            \"nucleotide\": {\n                                \"type\": \"integer\",\n                                \"binaryFormat\": \"b\",\n                                \"index\": 5,\n                            },\n                        },\n                    },\n                }\n            },\n        }\n\n        for example, expected in [\n            (\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\xd8\\x03\\x00\\x00\\xff\",\n                [\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 984,\n                        \"nucleotide\": -1,\n                    }\n                ],\n            ),\n            (\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\xc8\\x03\\x00\\x00\\xff\"\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x94\\x01\\x00\\x00\\xff\",\n                [\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 968,\n                        \"nucleotide\": -1,\n                    },\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 404,\n                        \"nucleotide\": -1,\n                    },\n                ],\n            ),\n            (\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\xd1\\x03\\x00\\x00\\xff\"\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\xb1\\x02\\x00\\x00\\xff\"\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\xdf\\x01\\x00\\x00\\xff\"\n                b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\xbc\\x00\\x00\\x00\\xff\",\n                [\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 977,\n                        \"nucleotide\": -1,\n                    },\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 689,\n                        \"nucleotide\": -1,\n                    },\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 479,\n                        \"nucleotide\": -1,\n                    },\n                    {\n                        \"mutationTypeID\": 1,\n                        \"selectionCoeff\": 0.0,\n                        \"subpopulationID\": 1,\n                        \"originGeneration\": 188,\n                        \"nucleotide\": -1,\n                    },\n                ],\n            ),\n        ]:\n            assert (\n                metadata.MetadataSchema(schema).decode_row(example)[\n                    \"stacked_mutation_array\"\n                ]\n                == expected\n            )\n\n    def test_population(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"subpopulationID\": {\n                    \"type\": \"integer\",\n                    \"binaryFormat\": \"i\",\n                    \"index\": 0,\n                },\n                \"femaleCloneFraction\": {\n                    \"type\": \"number\",\n                    \"binaryFormat\": \"d\",\n                    \"index\": 1,\n                },\n                \"maleCloneFraction\": {\n                    \"type\": \"number\",\n                    \"binaryFormat\": \"d\",\n                    \"index\": 2,\n                },\n                \"sexRatio\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 3},\n                \"boundsX0\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 4},\n                \"boundsX1\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 5},\n                \"boundsY0\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 6},\n                \"boundsY1\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 7},\n                \"boundsZ0\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 8},\n                \"boundsZ1\": {\"type\": \"number\", \"binaryFormat\": \"d\", \"index\": 9},\n                \"migrationRecCount\": {\n                    \"type\": \"integer\",\n                    \"binaryFormat\": \"d\",\n                    \"index\": 10,\n                },\n            },\n        }\n        example = (\n            b\"\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\"\n            b\"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\"\n            b\"\\x00\\x00\\xe0?\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\"\n            b\"\\x00\\xf0?\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\"\n            b\"\\xf0?\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf0\"\n            b\"?\\x00\\x00\\x00\\x00\"\n        )\n        expected = {\n            \"boundsX0\": 0.5,\n            \"boundsX1\": 0.0,\n            \"boundsY0\": 1.0,\n            \"boundsY1\": 0.0,\n            \"boundsZ0\": 1.0,\n            \"boundsZ1\": 0.0,\n            \"femaleCloneFraction\": 0.0,\n            \"maleCloneFraction\": 0.0,\n            \"migrationRecCount\": 1.0,\n            \"sexRatio\": 0.0,\n            \"subpopulationID\": 1,\n        }\n        assert metadata.MetadataSchema(schema).decode_row(example) == expected\n\n\nclass TestTableCollectionEquality:\n    def test_equality(self):\n        ts = msprime.simulate(10, random_seed=42)\n        tables = ts.dump_tables()\n        tables2 = ts.dump_tables()\n        schema = collections.OrderedDict(\n            codec=\"json\",\n            title=\"Example Metadata\",\n            type=\"object\",\n            properties=collections.OrderedDict(\n                one={\"type\": \"string\"}, two={\"type\": \"number\"}\n            ),\n            required=[\"one\", \"two\"],\n            additionalProperties=False,\n        )\n        schema2 = collections.OrderedDict(\n            type=\"object\",\n            properties=collections.OrderedDict(\n                two={\"type\": \"number\"}, one={\"type\": \"string\"}\n            ),\n            required=[\"one\", \"two\"],\n            additionalProperties=False,\n            title=\"Example Metadata\",\n            codec=\"json\",\n        )\n        tables.metadata_schema = metadata.MetadataSchema(schema)\n        assert tables != tables2\n        tables2.metadata_schema = metadata.MetadataSchema(schema2)\n        tables.assert_equals(tables2)\n        tables.metadata = collections.OrderedDict(one=\"tree\", two=5)\n        assert tables != tables2\n        tables2.metadata = collections.OrderedDict(two=5, one=\"tree\")\n        tables.assert_equals(tables2)\n\n    def test_fixing_uncanonical(self):\n        ts = msprime.simulate(10, random_seed=42)\n        tables = ts.dump_tables()\n        schema = collections.OrderedDict(\n            codec=\"json\",\n            title=\"Example Metadata\",\n            type=\"object\",\n            properties=collections.OrderedDict(\n                one={\"type\": \"string\"}, two={\"type\": \"number\"}\n            ),\n            required=[\"one\", \"two\"],\n            additionalProperties=False,\n        )\n        # Set with low-level to emulate loading.\n        tables._ll_tables.metadata_schema = json.dumps(schema)\n        assert tables._ll_tables.metadata_schema != tskit.canonical_json(schema)\n        tables.metadata_schema = tables.metadata_schema\n        assert tables._ll_tables.metadata_schema == tskit.canonical_json(schema)\n\n\nclass TestStructuredArrays:\n    \"\"\"\n    Tests for the get_numpy_dtype method in StructCodec\n    \"\"\"\n\n    def test_not_implemented_json(self):\n        schema = {\"codec\": \"json\"}\n        with pytest.raises(NotImplementedError):\n            metadata.MetadataSchema(schema).numpy_dtype()\n        with pytest.raises(NotImplementedError):\n            metadata.MetadataSchema(schema).structured_array_from_buffer(b\"\")\n\n    @pytest.mark.parametrize(\n        \"type_name, format_code, numpy_type\",\n        [\n            (\"integer\", \"b\", \"<i1\"),\n            (\"integer\", \"B\", \"u1\"),\n            (\"integer\", \"h\", \"<i2\"),\n            (\"integer\", \"H\", \"<u2\"),\n            (\"integer\", \"i\", \"<i4\"),\n            (\"integer\", \"I\", \"<u4\"),\n            (\"integer\", \"q\", \"<i8\"),\n            (\"integer\", \"Q\", \"<u8\"),\n            (\"number\", \"f\", \"<f4\"),\n            (\"number\", \"d\", \"<f8\"),\n            (\"boolean\", \"?\", \"?\"),\n            (\"string\", \"c\", \"S1\"),\n            (\"string\", \"s\", \"S1\"),\n            (\"string\", \"10s\", \"S10\"),\n            (\"null\", \"x\", \"V1\"),\n            (\"null\", \"5x\", \"V5\"),\n        ],\n    )\n    def test_types(self, type_name, format_code, numpy_type):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"value\": {\"type\": type_name, \"binaryFormat\": format_code}},\n        }\n\n        schema = metadata.MetadataSchema(schema)\n        dtype = schema.numpy_dtype()\n        assert dtype.names == (\"value\",)\n        assert dtype[\"value\"] == np.dtype(numpy_type)\n\n        test_arrays = {\n            \"integer\": [{\"value\": i} for i in range(3)],\n            \"number\": [{\"value\": i + 0.5} for i in range(3)],\n            \"boolean\": [{\"value\": i} for i in [True, False, True, True]],\n            \"string\": [\n                {\"value\": str(i) * (1 if format_code in \"cs\" else 3)} for i in range(3)\n            ],\n            \"null\": [{\"value\": None}, {\"value\": None}, {\"value\": None}],\n        }\n        test_array = test_arrays[type_name]\n        encoded = b\"\".join(schema.validate_and_encode_row(row) for row in test_array)\n        struct_array = schema.structured_array_from_buffer(encoded)\n\n        if \"S\" not in numpy_type and \"V\" not in numpy_type:\n            assert np.array_equal(\n                struct_array[\"value\"], [i[\"value\"] for i in test_array]\n            )\n        elif \"S\" in numpy_type:\n            assert np.array_equal(\n                struct_array[\"value\"], [i[\"value\"].encode() for i in test_array]\n            )\n        else:\n            for val in struct_array[\"value\"]:\n                assert (\n                    str(val) == \"b'\\\\x00'\"\n                    if numpy_type == \"V1\"\n                    else \"b'\\\\x00\\\\x00\\\\x00\\\\x00\\\\x00'\"\n                )\n\n    def test_object_with_multiple_fields(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                \"name\": {\"type\": \"string\", \"binaryFormat\": \"10s\"},\n                \"value\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                \"active\": {\"type\": \"boolean\", \"binaryFormat\": \"?\"},\n            },\n        }\n\n        schema = metadata.MetadataSchema(schema)\n        dtype = schema.numpy_dtype()\n        assert dtype.names == (\"active\", \"id\", \"name\", \"value\")  # Note reordering!\n        assert dtype[\"id\"] == np.dtype(\"<i4\")\n        assert dtype[\"name\"] == np.dtype(\"S10\")\n        assert dtype[\"value\"] == np.dtype(\"<f8\")\n        assert dtype[\"active\"] == np.dtype(\"?\")\n\n        # Test array of objects with multiple fields\n        test_array = [\n            {\"id\": 1, \"name\": \"test1\", \"value\": 1.5, \"active\": True},\n            {\"id\": 2, \"name\": \"test2\", \"value\": 2.5, \"active\": False},\n            {\"id\": 3, \"name\": \"test3\", \"value\": 3.5, \"active\": True},\n        ]\n        encoded = b\"\".join(schema.validate_and_encode_row(row) for row in test_array)\n        struct_array = schema.structured_array_from_buffer(encoded)\n\n        assert np.array_equal(struct_array[\"id\"], [1, 2, 3])\n        assert np.array_equal(struct_array[\"value\"], [1.5, 2.5, 3.5])\n        assert np.array_equal(struct_array[\"active\"], [True, False, True])\n        assert np.array_equal(struct_array[\"name\"], [b\"test1\", b\"test2\", b\"test3\"])\n\n    def test_nested_objects(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                \"nested\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"x\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                        \"y\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                    },\n                },\n            },\n        }\n\n        schema = metadata.MetadataSchema(schema)\n        dtype = schema.numpy_dtype()\n        assert dtype.names == (\"id\", \"nested\")\n        assert dtype[\"id\"] == np.dtype(\"<i4\")\n        assert dtype[\"nested\"].names == (\"x\", \"y\")\n        assert dtype[\"nested\"][\"x\"] == np.dtype(\"<f8\")\n        assert dtype[\"nested\"][\"y\"] == np.dtype(\"<f8\")\n\n        # Test array of objects with nested objects\n        test_array = [\n            {\"id\": 1, \"nested\": {\"x\": 1.0, \"y\": 2.0}},\n            {\"id\": 2, \"nested\": {\"x\": 3.0, \"y\": 4.0}},\n            {\"id\": 3, \"nested\": {\"x\": 5.0, \"y\": 6.0}},\n        ]\n        encoded = b\"\".join(schema.validate_and_encode_row(row) for row in test_array)\n        struct_array = schema.structured_array_from_buffer(encoded)\n\n        assert np.array_equal(struct_array[\"id\"], [1, 2, 3])\n        assert np.array_equal(struct_array[\"nested\"][\"x\"], [1.0, 3.0, 5.0])\n        assert np.array_equal(struct_array[\"nested\"][\"y\"], [2.0, 4.0, 6.0])\n\n    def test_fixed_length_arrays(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"vector\": {\n                    \"type\": \"array\",\n                    \"length\": 3,\n                    \"items\": {\"type\": \"number\", \"binaryFormat\": \"d\"},\n                },\n                \"matrix\": {\n                    \"type\": \"array\",\n                    \"length\": 2,\n                    \"items\": {\n                        \"type\": \"array\",\n                        \"length\": 2,\n                        \"items\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                    },\n                },\n            },\n        }\n\n        schema = metadata.MetadataSchema(schema)\n        dtype = schema.numpy_dtype()\n        assert dtype.names == (\"matrix\", \"vector\")  # Note reordering\n        assert dtype[\"vector\"].shape == (3,)\n        assert dtype[\"vector\"].base == np.dtype(\"<f8\")\n        assert dtype[\"matrix\"].shape == (2,)\n        assert dtype[\"matrix\"].base == (np.dtype(\"<i4\"), (2,))\n\n        # Test array with fixed-length arrays\n        test_array = [\n            {\"vector\": [1.1, 2.2, 3.3], \"matrix\": [[1, 2], [3, 4]]},\n            {\"vector\": [4.4, 5.5, 6.6], \"matrix\": [[5, 6], [7, 8]]},\n            {\"vector\": [7.7, 8.8, 9.9], \"matrix\": [[9, 10], [11, 12]]},\n        ]\n        encoded = b\"\".join(schema.validate_and_encode_row(row) for row in test_array)\n        struct_array = schema.structured_array_from_buffer(encoded)\n\n        expected_vectors = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])\n        assert np.allclose(struct_array[\"vector\"], expected_vectors)\n\n        expected_matrices = np.array(\n            [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]]\n        )\n        assert np.array_equal(struct_array[\"matrix\"], expected_matrices)\n\n    def test_complex_nested_structure(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                \"data\": {\n                    \"type\": \"array\",\n                    \"length\": 2,\n                    \"items\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"name\": {\"type\": \"string\", \"binaryFormat\": \"10s\"},\n                            \"coords\": {\n                                \"type\": \"array\",\n                                \"length\": 3,\n                                \"items\": {\"type\": \"number\", \"binaryFormat\": \"f\"},\n                            },\n                        },\n                    },\n                },\n            },\n        }\n\n        schema = metadata.MetadataSchema(schema)\n        dtype = schema.numpy_dtype()\n        assert dtype.names == (\"data\", \"id\")  # Note reordering\n        assert dtype[\"id\"] == np.dtype(\"<i4\")\n        assert dtype[\"data\"].shape == (2,)\n        assert dtype[\"data\"].base.names == (\"coords\", \"name\")  # Note reordering\n        assert dtype[\"data\"].base[\"name\"] == np.dtype(\"S10\")\n        assert dtype[\"data\"].base[\"coords\"].shape == (3,)\n        assert dtype[\"data\"].base[\"coords\"].base == np.dtype(\"<f4\")\n\n        test_array = [\n            {\n                \"id\": 1,\n                \"data\": [\n                    {\"name\": \"point1\", \"coords\": [1.0, 2.0, 3.0]},\n                    {\"name\": \"point2\", \"coords\": [4.0, 5.0, 6.0]},\n                ],\n            },\n            {\n                \"id\": 2,\n                \"data\": [\n                    {\"name\": \"point3\", \"coords\": [7.0, 8.0, 9.0]},\n                    {\"name\": \"point4\", \"coords\": [10.0, 11.0, 12.0]},\n                ],\n            },\n            {\n                \"id\": 3,\n                \"data\": [\n                    {\"name\": \"point5\", \"coords\": [13.0, 14.0, 15.0]},\n                    {\"name\": \"point6\", \"coords\": [16.0, 17.0, 18.0]},\n                ],\n            },\n        ]\n        encoded = b\"\".join(schema.validate_and_encode_row(row) for row in test_array)\n        struct_array = schema.structured_array_from_buffer(encoded)\n\n        assert np.array_equal(struct_array[\"id\"], [1, 2, 3])\n\n        expected_names = np.array(\n            [[b\"point1\", b\"point2\"], [b\"point3\", b\"point4\"], [b\"point5\", b\"point6\"]]\n        )\n        assert np.array_equal(struct_array[\"data\"][\"name\"], expected_names)\n\n        expected_coords = np.array(\n            [\n                [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],\n                [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]],\n                [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]],\n            ]\n        )\n        assert np.allclose(struct_array[\"data\"][\"coords\"], expected_coords)\n\n    def test_unsupported_formats(self):\n        # Pascal strings not supported\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"pascal_string\": {\"type\": \"string\", \"binaryFormat\": \"10p\"}},\n        }\n\n        with pytest.raises(ValueError, match=\"Pascal string format\"):\n            metadata.MetadataSchema(schema).numpy_dtype()\n\n    def test_variable_length_arrays_not_supported(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"var_array\": {\n                    \"type\": \"array\",\n                    \"items\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                }\n            },\n        }\n\n        with pytest.raises(ValueError, match=\"Only fixed-length arrays\"):\n            metadata.MetadataSchema(schema).numpy_dtype()\n\n    def test_null_union_top_level_not_supported(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": [\"object\", \"null\"],\n            \"properties\": {\n                \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\"},\n                \"name\": {\"type\": \"string\", \"binaryFormat\": \"10s\"},\n            },\n        }\n\n        with pytest.raises(\n            ValueError, match=\"Top level object/null union not supported\"\n        ):\n            metadata.MetadataSchema(schema).numpy_dtype()\n\n    def test_explicit_ordering(self):\n        schema = {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"id\": {\"type\": \"integer\", \"binaryFormat\": \"i\", \"index\": 1},\n                \"name\": {\"type\": \"string\", \"binaryFormat\": \"10s\", \"index\": 2},\n                \"age\": {\"type\": \"integer\", \"binaryFormat\": \"i\", \"index\": 3},\n            },\n            \"required\": [\"id\", \"name\", \"age\"],\n        }\n\n        dtype = metadata.MetadataSchema(schema).numpy_dtype()\n        assert dtype.names == (\"id\", \"name\", \"age\")\n"
  },
  {
    "path": "python/tests/test_ms.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2022 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for ms output in tskit. All of these tests have separate versions\nfor the cases of single replicate and multiple replicates. This is because\nmsprime.simulate generates a tree_sequence object if the num_replicates argument\nis not used but an iterator over tree_sequences if the num_replicates argument\nis used.\n\"\"\"\n\nimport collections\nimport itertools\nimport os\nimport tempfile\nimport unittest\n\nimport msprime\n\nimport tskit as ts\n\nlength = 1e2\nmutation_rate = 1e-2\nnum_replicates = 3\n\n\ndef get_ms_file_quantity(ms_file, quantity):\n    quantities = {}\n    num_replicates = 0\n    num_sites = []\n    num_positions = []\n    num_haplotypes = []\n    genotypes = []\n    positions = []\n    gens = []\n    for line in ms_file:\n        if len(line.split()) > 0:\n            if line.split()[0] == \"//\":\n                num_replicates = num_replicates + 1\n                num_haplotypes.append(0)\n                if len(gens) > 0:\n                    genotypes.append(gens)\n                    gens = []\n            if line.split()[0] == \"segsites:\":\n                num_sites.append(int(line.split()[1]))\n            if line.split()[0] == \"positions:\":\n                num_positions.append(len(line.split()) - 1)\n                positions.append(line[11:].rstrip())\n            if (\n                line[0:2] == \"00\"\n                or line[0:2] == \"01\"\n                or line[0:2] == \"10\"\n                or line[0:2] == \"11\"\n            ):\n                num_haplotypes[-1] = num_haplotypes[-1] + 1\n                gens.append(line.rstrip())\n    genotypes.append(gens)\n    quantities[\"num_replicates\"] = num_replicates\n    quantities[\"num_sites\"] = num_sites\n    quantities[\"num_positions\"] = num_positions\n    quantities[\"num_haplotypes\"] = num_haplotypes\n    quantities[\"genotypes\"] = genotypes\n    quantities[\"positions\"] = positions\n\n    return quantities[quantity]\n\n\nclass TestNumReplicates(unittest.TestCase):\n    \"\"\"\n    Tests that the number of replicates written out is the same as\n    the number of replicates simulated\n    \"\"\"\n\n    def verify_num_replicates(self, tree_seq, num_replicates):\n        if isinstance(tree_seq, collections.abc.Iterable):\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(\n                        tree_seq,\n                        f,\n                        num_replicates=num_replicates,\n                    )\n                with open(ms_file_path) as handle:\n                    num_replicates_file = get_ms_file_quantity(handle, \"num_replicates\")\n        else:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(tree_seq, f)\n                with open(ms_file_path) as handle:\n                    num_replicates_file = get_ms_file_quantity(handle, \"num_replicates\")\n        self.assertEqual(num_replicates, num_replicates_file)\n\n    def test_num_replicates(self):\n        tree_seq = msprime.simulate(\n            25, length=length, mutation_rate=mutation_rate, random_seed=123\n        )\n        self.verify_num_replicates(tree_seq, 1)\n\n    def test_num_replicates_multiple(self):\n        tree_seq = msprime.simulate(\n            25,\n            length=length,\n            mutation_rate=mutation_rate,\n            random_seed=123,\n            num_replicates=num_replicates,\n        )\n        self.verify_num_replicates(tree_seq, num_replicates)\n\n\nclass TestNumHaplotypes(unittest.TestCase):\n    \"\"\"\n    Tests that the number of haplotypes output are the same as the\n    number of individuals simulated.\n    \"\"\"\n\n    def verify_num_haplotypes(self, tree_seq, tree_seq2, num_replicates):\n        if isinstance(tree_seq, collections.abc.Iterable):\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(\n                        tree_seq,\n                        f,\n                        num_replicates=num_replicates,\n                    )\n                with open(ms_file_path) as handle:\n                    num_haplotypes = get_ms_file_quantity(handle, \"num_haplotypes\")\n            j = 0\n            for ts_indv in tree_seq2:\n                self.assertEqual(ts_indv.num_samples, num_haplotypes[j])\n                j = j + 1\n        else:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(tree_seq, f)\n                with open(ms_file_path) as handle:\n                    num_haplotypes = get_ms_file_quantity(handle, \"num_haplotypes\")\n            self.assertEqual(tree_seq.num_samples, num_haplotypes[0])\n\n    def test_num_haplotypes(self):\n        tree_seq = msprime.simulate(\n            25, length=length, mutation_rate=mutation_rate, random_seed=123\n        )\n        self.verify_num_haplotypes(tree_seq, tree_seq, 1)\n\n    def test_num_haplotypes_replicates(self):\n        tree_seq = msprime.simulate(\n            25,\n            length=length,\n            mutation_rate=mutation_rate,\n            random_seed=123,\n            num_replicates=num_replicates,\n        )\n        tree_seq, tree_seq2 = itertools.tee(tree_seq)\n        self.verify_num_haplotypes(tree_seq, tree_seq2, num_replicates)\n\n\nclass TestNumSites(unittest.TestCase):\n    \"\"\"\n    Tests that the number of sites written out as well as the length\n    of the positions list match the number of variants in the tree sequence\n    \"\"\"\n\n    def verify_num_sites(self, tree_seq, tree_seq2, num_replicates):\n        if isinstance(tree_seq, collections.abc.Iterable):\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(\n                        tree_seq,\n                        f,\n                        num_replicates=num_replicates,\n                    )\n                with open(ms_file_path) as handle:\n                    num_sites = get_ms_file_quantity(handle, \"num_sites\")\n                with open(ms_file_path) as handle:\n                    num_positions = get_ms_file_quantity(handle, \"num_positions\")\n            j = 0\n            for ts_indv in tree_seq2:\n                self.assertEqual(ts_indv.num_sites, num_sites[j])\n                self.assertEqual(ts_indv.num_sites, num_positions[j])\n                j = j + 1\n        else:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(tree_seq, f)\n                with open(ms_file_path) as handle:\n                    num_sites = get_ms_file_quantity(handle, \"num_sites\")\n                with open(ms_file_path) as handle:\n                    num_positions = get_ms_file_quantity(handle, \"num_positions\")\n            self.assertEqual(tree_seq.num_sites, num_sites[0])\n            self.assertEqual(tree_seq.num_sites, num_positions[0])\n\n    def test_num_sites(self):\n        tree_seq = msprime.simulate(\n            25, length=length, mutation_rate=mutation_rate, random_seed=123\n        )\n        self.verify_num_sites(tree_seq, tree_seq, 1)\n\n    def test_num_sites_replicates(self):\n        tree_seq = msprime.simulate(\n            25,\n            length=length,\n            mutation_rate=mutation_rate,\n            random_seed=123,\n            num_replicates=num_replicates,\n        )\n        tree_seq, tree_seq2 = itertools.tee(tree_seq)\n        self.verify_num_sites(tree_seq, tree_seq2, num_replicates)\n\n\nclass TestGenotypes(unittest.TestCase):\n    \"\"\"\n    Tests that the haplotypes written out are the same as the haplotypes generated.\n    \"\"\"\n\n    def get_genotypes(self, tree_seq):\n        genotypes = tree_seq.genotype_matrix()\n        gens_array = []\n        for k in range(tree_seq.num_samples):\n            tmp_str = \"\".join(map(str, genotypes[:, k]))\n            gens_array.append(tmp_str)\n        return gens_array\n\n    def verify_genotypes(self, tree_seq, tree_seq2, num_replicates):\n        if isinstance(tree_seq, collections.abc.Iterable):\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(\n                        tree_seq,\n                        f,\n                        num_replicates=num_replicates,\n                    )\n                with open(ms_file_path) as handle:\n                    genotypes = get_ms_file_quantity(handle, \"genotypes\")\n            j = 0\n            for ts_indv in tree_seq2:\n                self.assertEqual(self.get_genotypes(ts_indv), genotypes[j])\n                j = j + 1\n        else:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(tree_seq, f)\n                with open(ms_file_path) as handle:\n                    genotypes = get_ms_file_quantity(handle, \"genotypes\")\n                self.assertEqual(self.get_genotypes(tree_seq), genotypes[0])\n\n    def test_genotypes(self):\n        tree_seq = msprime.simulate(\n            25, length=length, mutation_rate=mutation_rate, random_seed=123\n        )\n        self.verify_genotypes(tree_seq, tree_seq, 1)\n\n    def test_genotypes_replicates(self):\n        tree_seq = msprime.simulate(\n            25,\n            length=length,\n            mutation_rate=mutation_rate,\n            random_seed=123,\n            num_replicates=num_replicates,\n        )\n        tree_seq, tree_seq2 = itertools.tee(tree_seq)\n        self.verify_genotypes(tree_seq, tree_seq2, num_replicates)\n\n\nclass TestPositions(unittest.TestCase):\n    \"\"\"\n    Tests that the positions for the mutations written out are the same as the\n    positions generated.\n    \"\"\"\n\n    def get_positions(self, tree_seq):\n        positions = []\n        for i in range(tree_seq.num_sites):\n            positions.append(\n                f\"{tree_seq.site(i).position / tree_seq.sequence_length:.4f}\"\n            )\n        positions = \" \".join(positions)\n        return positions\n\n    def verify_positions(self, tree_seq, tree_seq2, num_replicates):\n        if isinstance(tree_seq, collections.abc.Iterable):\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(\n                        tree_seq,\n                        f,\n                        num_replicates=num_replicates,\n                    )\n                with open(ms_file_path) as handle:\n                    positions = get_ms_file_quantity(handle, \"positions\")\n            j = 0\n            for ts_indv in tree_seq2:\n                self.assertEqual(self.get_positions(ts_indv), positions[j])\n                j = j + 1\n        else:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                ms_file_path = os.path.join(temp_dir, \"testing_ms_file.txt\")\n                with open(ms_file_path, \"w\") as f:\n                    ts.write_ms(tree_seq, f)\n                with open(ms_file_path) as handle:\n                    positions = get_ms_file_quantity(handle, \"positions\")\n            self.assertEqual(self.get_positions(tree_seq), positions[0])\n\n    def test_positions(self):\n        tree_seq = msprime.simulate(\n            25, length=length, mutation_rate=mutation_rate, random_seed=123\n        )\n        self.verify_positions(tree_seq, tree_seq, 1)\n\n    def test_positions_replicates(self):\n        tree_seq = msprime.simulate(\n            25,\n            length=length,\n            mutation_rate=mutation_rate,\n            random_seed=123,\n            num_replicates=num_replicates,\n        )\n        tree_seq, tree_seq2 = itertools.tee(tree_seq)\n        self.verify_positions(tree_seq, tree_seq2, num_replicates)\n"
  },
  {
    "path": "python/tests/test_parsimony.py",
    "content": "# MIT License\n#\n# Copyright (c) 2019-2022 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for the tree parsimony methods.\n\"\"\"\n\nimport dataclasses\nimport io\nimport itertools\n\nimport Bio.Phylo.TreeConstruction\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests.tsutil as tsutil\nimport tskit\n\nINF = np.inf\n\n\ndef bp_sankoff_score(tree, genotypes, cost_matrix):\n    \"\"\"\n    Returns the sankoff score matrix computed by BioPython.\n    \"\"\"\n    ts = tree.tree_sequence\n    bp_tree = Bio.Phylo.read(io.StringIO(tree.as_newick()), \"newick\")\n    records = [\n        Bio.SeqRecord.SeqRecord(Bio.Seq.Seq(str(genotypes[j])), id=f\"n{u}\")\n        for j, u in enumerate(ts.samples())\n    ]\n    alignment = Bio.Align.MultipleSeqAlignment(records)\n    lower_triangular = []\n    for j in range(cost_matrix.shape[0]):\n        lower_triangular.append(list(cost_matrix[j, : j + 1]))\n    bp_matrix = Bio.Phylo.TreeConstruction._Matrix(\n        list(map(str, range(cost_matrix.shape[0]))), lower_triangular\n    )\n    ps = Bio.Phylo.TreeConstruction.ParsimonyScorer(bp_matrix)\n    return ps.get_score(bp_tree, alignment)\n\n\ndef bp_fitch_score(tree, genotypes):\n    \"\"\"\n    Returns the Fitch parsimony score computed by BioPython.\n    \"\"\"\n    ts = tree.tree_sequence\n    bp_tree = Bio.Phylo.read(io.StringIO(tree.as_newick()), \"newick\")\n    records = [\n        Bio.SeqRecord.SeqRecord(Bio.Seq.Seq(str(genotypes[j])), id=f\"n{u}\")\n        for j, u in enumerate(ts.samples())\n    ]\n    alignment = Bio.Align.MultipleSeqAlignment(records)\n    ps = Bio.Phylo.TreeConstruction.ParsimonyScorer()\n    return ps.get_score(bp_tree, alignment)\n\n\ndef sankoff_score(tree, genotypes, cost_matrix):\n    \"\"\"\n    Returns a num_nodes * num_alleles numpy array giving the minimum cost\n    scores for the specified genotypes on the specified tree. If a cost\n    matrix is provided, it must be a num_alleles * num_alleles array giving\n    the cost of transitioning from each allele to every other allele.\n    \"\"\"\n    num_alleles = cost_matrix.shape[0]\n    S = np.zeros((tree.tree_sequence.num_nodes, num_alleles))\n    for allele, u in zip(genotypes, tree.tree_sequence.samples()):\n        S[u, :] = INF\n        S[u, allele] = 0\n    for parent in tree.nodes(order=\"postorder\"):\n        for child in tree.children(parent):\n            for j in range(num_alleles):\n                S[parent, j] += np.min(cost_matrix[:, j] + S[child])\n    return S\n\n\ndef fitch_score(tree, genotypes):\n    \"\"\"\n    Returns the Fitch parsimony score for the specified set of genotypes.\n    \"\"\"\n    # Use the simplest set operation encoding of the set operations.\n    A = {}\n    for allele, u in zip(genotypes, tree.tree_sequence.samples()):\n        A[u] = {allele}\n    score = 0\n    for u in tree.nodes(order=\"postorder\"):\n        if tree.is_internal(u):\n            A[u] = set.intersection(*[A[v] for v in tree.children(u)])\n            if len(A[u]) == 0:\n                A[u] = set.union(*[A[v] for v in tree.children(u)])\n                score += 1\n    return score\n\n\ndef fitch_map_mutations(tree, genotypes, alleles):\n    \"\"\"\n    Returns the Fitch parsimony reconstruction for the specified set of genotypes.\n    The reconstruction is specified by returning the ancestral state and a\n    list of mutations on the tree. Each mutation is a (node, parent, state)\n    triple, where node is the node over which the transition occurs, the\n    parent is the index of the parent transition above it on the tree (or -1\n    if there is none) and state is the new state.\n    \"\"\"\n    genotypes = np.array(genotypes)\n    # Encode the set operations using a numpy array.\n    not_missing = genotypes != -1\n    if np.sum(not_missing) == 0:\n        raise ValueError(\"Must have at least one non-missing genotype\")\n    num_alleles = np.max(genotypes[not_missing]) + 1\n    A = np.zeros((tree.tree_sequence.num_nodes, num_alleles), dtype=np.int8)\n    for allele, u in zip(genotypes, tree.tree_sequence.samples()):\n        if allele != -1:\n            A[u, allele] = 1\n        else:\n            A[u] = 1\n    for u in tree.nodes(order=\"postorder\"):\n        if tree.num_children(u) > 2:\n            raise ValueError(\"Fitch parsimony is for binary trees only\")\n        if not tree.is_sample(u):\n            A[u] = 1\n            for v in tree.children(u):\n                A[u] = np.logical_and(A[u], A[v])\n            if np.sum(A[u]) == 0:\n                for v in tree.children(u):\n                    A[u] = np.logical_or(A[u], A[v])\n\n    root_states = np.zeros_like(A[0])\n    for root in tree.roots:\n        root_states = np.logical_or(root_states, A[root])\n    ancestral_state = np.where(root_states == 1)[0][0]\n\n    mutations = []\n    state = {}\n    for root in tree.roots:\n        state[root] = ancestral_state\n        parent = tskit.NULL\n        if A[root, ancestral_state] != 1:\n            state[root] = np.where(A[root] == 1)[0][0]\n            mutations.append(\n                tskit.Mutation(\n                    node=root, parent=tskit.NULL, derived_state=alleles[state[root]]\n                )\n            )\n            parent = len(mutations) - 1\n        stack = [(root, parent)]\n        while len(stack) > 0:\n            u, parent_mutation = stack.pop()\n            for v in tree.children(u):\n                state[v] = state[u]\n                if A[v, state[u]] != 1:\n                    state[v] = np.where(A[v] == 1)[0][0]\n                    mutations.append(\n                        tskit.Mutation(\n                            node=v,\n                            parent=parent_mutation,\n                            derived_state=alleles[state[v]],\n                        )\n                    )\n                    stack.append((v, len(mutations) - 1))\n                else:\n                    stack.append((v, parent_mutation))\n    return alleles[ancestral_state], mutations\n\n\ndef hartigan_map_mutations(tree, genotypes, alleles, ancestral_state=None):\n    \"\"\"\n    Returns a Hartigan parsimony reconstruction for the specified set of genotypes.\n    The reconstruction is specified by returning the ancestral state and a\n    list of mutations on the tree. Each mutation is a (node, parent, state)\n    triple, where node is the node over which the transition occurs, the\n    parent is the index of the parent transition above it on the tree (or -1\n    if there is none) and state is the new state.\n    \"\"\"\n    # The python version of map_mutations allows the ancestral_state to be a string\n    # from the alleles list, so we implement this at the top of this function although\n    # it doesn't need to be in the C equivalent of this function\n    if isinstance(ancestral_state, str):\n        ancestral_state = alleles.index(ancestral_state)\n\n    # equivalent C implementation can start here\n    genotypes = np.array(genotypes)\n    not_missing = genotypes != -1\n    if np.sum(not_missing) == 0:\n        raise ValueError(\"Must have at least one non-missing genotype\")\n    num_alleles = np.max(genotypes[not_missing]) + 1\n    if ancestral_state is not None:\n        if ancestral_state < 0 or ancestral_state >= len(alleles):\n            raise ValueError(\"ancestral_state must be a number from 0..(num_alleles-1)\")\n        if ancestral_state >= num_alleles:\n            num_alleles = ancestral_state + 1\n    num_nodes = tree.tree_sequence.num_nodes\n\n    # use a numpy array of 0/1 values to represent the set of states\n    # to make the code as similar as possible to the C implementation.\n    optimal_set = np.zeros((num_nodes + 1, num_alleles), dtype=np.int8)\n    for allele, u in zip(genotypes, tree.tree_sequence.samples()):\n        if allele != -1:\n            optimal_set[u, allele] = 1\n        else:\n            optimal_set[u] = 1\n\n    allele_count = np.zeros(num_alleles, dtype=int)\n    for u in tree.nodes(tree.virtual_root, order=\"postorder\"):\n        allele_count[:] = 0\n        for v in tree.children(u):\n            for j in range(num_alleles):\n                allele_count[j] += optimal_set[v, j]\n        if not tree.is_sample(u):\n            max_allele_count = np.max(allele_count)\n            optimal_set[u, allele_count == max_allele_count] = 1\n\n    if ancestral_state is None:\n        ancestral_state = np.argmax(optimal_set[tree.virtual_root])\n    else:\n        optimal_set[tree.virtual_root] = 1\n\n    @dataclasses.dataclass\n    class StackElement:\n        node: int\n        state: int\n        mutation_parent: int\n\n    mutations = []\n    stack = [StackElement(tree.virtual_root, ancestral_state, -1)]\n    while len(stack) > 0:\n        s = stack.pop()\n        if optimal_set[s.node, s.state] == 0:\n            s.state = np.argmax(optimal_set[s.node])\n            mutation = tskit.Mutation(\n                node=s.node,\n                derived_state=alleles[s.state],\n                parent=s.mutation_parent,\n            )\n            s.mutation_parent = len(mutations)\n            mutations.append(mutation)\n        for v in tree.children(s.node):\n            stack.append(StackElement(v, s.state, s.mutation_parent))\n    return alleles[ancestral_state], mutations\n\n\ndef reconstruct_states(tree, genotypes, S, cost_matrix):\n    \"\"\"\n    Given the specified observations for the samples and tree score\n    matrix computed by sankoff_score and the transition cost matrix,\n    return the ancestral_state and state transitions on the tree.\n    \"\"\"\n    root_cost = np.zeros_like(S[0])\n    for root in tree.roots:\n        for j in range(S.shape[1]):\n            root_cost[j] += np.min(cost_matrix[:, j] + S[root])\n    ancestral_state = np.argmin(root_cost)\n\n    transitions = {}\n    A = {}\n    for root in tree.roots:\n        A[root] = ancestral_state\n        for u in tree.nodes(order=\"preorder\"):\n            for v in tree.children(u):\n                cost = cost_matrix[A[u]] + S[v]\n                A[v] = np.argmin(cost)\n                if A[u] != A[v]:\n                    transitions[v] = A[v]\n\n    return ancestral_state, transitions\n\n\ndef sankoff_map_mutations(tree, genotypes, cost_matrix=None):\n    \"\"\"\n    Returns the recontructed minimal state transitions for the specified set of\n    genotypes on the specified (optional) cost matrix.\n\n    NOTE: we don't consider complications of multiple roots and internal samples\n    here.\n\n    TODO: update this to take the alleles as input like the other methods.\n    \"\"\"\n    if cost_matrix is None:\n        num_alleles = np.max(genotypes) + 1\n        cost_matrix = np.ones((num_alleles, num_alleles))\n        np.fill_diagonal(cost_matrix, 0)\n    S = sankoff_score(tree, genotypes, cost_matrix)\n    return reconstruct_states(tree, genotypes, S, cost_matrix)\n\n\ndef felsenstein_tables():\n    \"\"\"\n    Return tables for the example tree.\n    \"\"\"\n    #\n    #     8\n    #   ┏━┻━━┓\n    #   ┃    7\n    #   ┃   ┏┻┓\n    #   6   ┃ ┃\n    # ┏━┻┓  ┃ ┃\n    # ┃  5  ┃ ┃\n    # ┃ ┏┻┓ ┃ ┃\n    # 2 3 4 0 1\n    #\n    tables = tskit.TableCollection(1)\n    for _ in range(5):\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n    for j in range(4):\n        tables.nodes.add_row(flags=0, time=j + 1)\n    tables.edges.add_row(0, 1, 7, 0)\n    tables.edges.add_row(0, 1, 7, 1)\n    tables.edges.add_row(0, 1, 6, 2)\n    tables.edges.add_row(0, 1, 5, 3)\n    tables.edges.add_row(0, 1, 5, 4)\n    tables.edges.add_row(0, 1, 6, 5)\n    tables.edges.add_row(0, 1, 8, 6)\n    tables.edges.add_row(0, 1, 8, 7)\n    tables.sort()\n    return tables\n\n\ndef felsenstein_example():\n    \"\"\"\n    Returns the tree used in Felsenstein's book, pg.15.\n    \"\"\"\n    ts = felsenstein_tables().tree_sequence()\n    return ts.first()\n\n\nclass TestSankoff:\n    \"\"\"\n    Tests for the Sankoff algorithm.\n    \"\"\"\n\n    def test_felsenstein_example_score(self):\n        tree = felsenstein_example()\n        genotypes = [1, 0, 1, 0, 2]\n        cost_matrix = np.array(\n            [[0, 2.5, 1, 2.5], [2.5, 0, 2.5, 1], [1, 2.5, 0, 2.5], [2.5, 1, 2.5, 0]]\n        )\n        S = sankoff_score(tree, genotypes, cost_matrix)\n        S2 = [\n            [INF, 0, INF, INF],\n            [0, INF, INF, INF],\n            [INF, 0, INF, INF],\n            [0, INF, INF, INF],\n            [INF, INF, 0, INF],\n            [1, 5, 1, 5],\n            [3.5, 3.5, 3.5, 4.5],\n            [2.5, 2.5, 3.5, 3.5],\n            [6, 6, 7, 8],\n        ]\n        assert np.array_equal(S, np.array(S2))\n\n    def test_felsenstein_example_reconstruct(self):\n        tree = felsenstein_example()\n        genotypes = [1, 0, 1, 0, 2]\n        cost_matrix = np.array(\n            [[0, 2.5, 1, 2.5], [2.5, 0, 2.5, 1], [1, 2.5, 0, 2.5], [2.5, 1, 2.5, 0]]\n        )\n        S = sankoff_score(tree, genotypes, cost_matrix)\n        ancestral_state, transitions = reconstruct_states(\n            tree, genotypes, S, cost_matrix\n        )\n        assert {2: 1, 4: 2, 0: 1} == transitions\n        assert 0 == ancestral_state\n\n    def verify_infinite_sites(self, ts):\n        assert ts.num_trees == 1\n        assert ts.num_sites > 5\n        tree = ts.first()\n        for variant in ts.variants():\n            ancestral_state, transitions = sankoff_map_mutations(tree, variant.genotypes)\n            assert len(transitions) == 1\n            assert ancestral_state == 0\n            assert transitions[variant.site.mutations[0].node] == 1\n\n    def test_infinite_sites_binary_n2(self):\n        ts = msprime.simulate(2, mutation_rate=10, random_seed=1)\n        self.verify_infinite_sites(ts)\n\n    def test_infinite_sites_binary_n50(self):\n        ts = msprime.simulate(50, mutation_rate=2, random_seed=1)\n        self.verify_infinite_sites(ts)\n\n    def test_infinite_sites_acgt_n2(self):\n        ts = msprime.simulate(2, random_seed=1)\n        ts = msprime.mutate(\n            ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=1\n        )\n        self.verify_infinite_sites(ts)\n\n    def test_infinite_sites_acgt_n15(self):\n        ts = msprime.simulate(2, random_seed=1)\n        ts = msprime.mutate(\n            ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=1\n        )\n        self.verify_infinite_sites(ts)\n\n    def verify_jukes_cantor(self, ts, cost_matrix):\n        assert ts.num_trees == 1\n        assert ts.num_mutations > ts.num_sites\n        tree = ts.first()\n        for variant in ts.variants():\n            single_score = bp_sankoff_score(tree, variant.genotypes, cost_matrix)\n            score_matrix = sankoff_score(tree, variant.genotypes, cost_matrix)\n            score = np.min(score_matrix[tree.root])\n            assert single_score == score\n\n    def test_jukes_cantor_n2_simple_matrix(self):\n        cost_matrix = np.ones((4, 4))\n        np.fill_diagonal(cost_matrix, 0)\n        ts = msprime.simulate(2, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify_jukes_cantor(ts, cost_matrix)\n\n    def test_jukes_cantor_n20_simple_matrix(self):\n        cost_matrix = np.ones((4, 4))\n        np.fill_diagonal(cost_matrix, 0)\n        ts = msprime.simulate(20, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify_jukes_cantor(ts, cost_matrix)\n\n    def test_jukes_cantor_n2_felsenstein_matrix(self):\n        cost_matrix = np.array(\n            [[0, 2.5, 1, 2.5], [2.5, 0, 2.5, 1], [1, 2.5, 0, 2.5], [2.5, 1, 2.5, 0]]\n        )\n        ts = msprime.simulate(2, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify_jukes_cantor(ts, cost_matrix)\n\n    def test_jukes_cantor_n20_felsenstein_matrix(self):\n        cost_matrix = np.array(\n            [[0, 2.5, 1, 2.5], [2.5, 0, 2.5, 1], [1, 2.5, 0, 2.5], [2.5, 1, 2.5, 0]]\n        )\n        ts = msprime.simulate(20, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify_jukes_cantor(ts, cost_matrix)\n\n\nclass TestFitchParsimonyDistance:\n    \"\"\"\n    Tests for the Fitch parsimony algorithm.\n    \"\"\"\n\n    def verify(self, ts):\n        assert ts.num_trees == 1\n        assert ts.num_sites > 3\n        tree = ts.first()\n        for variant in ts.variants(isolated_as_missing=False):\n            score = fitch_score(tree, variant.genotypes)\n            bp_score = bp_fitch_score(tree, variant.genotypes)\n            assert bp_score == score\n            ancestral_state1, transitions1 = fitch_map_mutations(\n                tree, variant.genotypes, variant.alleles\n            )\n            ancestral_state2, transitions2 = tree.map_mutations(\n                variant.genotypes, variant.alleles\n            )\n            assert ancestral_state1 == ancestral_state2\n            assert len(transitions1) == len(transitions2)\n            # The Sankoff algorithm doesn't recontruct the state in the same way.\n            # Just a limitation of the implementation.\n            ancestral_state3, transitions3 = sankoff_map_mutations(\n                tree, variant.genotypes\n            )\n            assert ancestral_state1 == variant.alleles[ancestral_state3]\n            # The algorithms will make slightly different choices on where to put\n            # the transitions, but they are equally parsimonious.\n            assert len(transitions1) == len(transitions3)\n\n    def test_infinite_sites_binary_n2(self):\n        ts = msprime.simulate(2, mutation_rate=10, random_seed=1)\n        self.verify(ts)\n\n    def test_infinite_sites_binary_n50(self):\n        ts = msprime.simulate(50, mutation_rate=2, random_seed=1)\n        self.verify(ts)\n\n    def test_infinite_sites_acgt_n2(self):\n        ts = msprime.simulate(2, random_seed=1)\n        ts = msprime.mutate(\n            ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=1\n        )\n        self.verify(ts)\n\n    def test_jukes_cantor_n2(self):\n        ts = msprime.simulate(2, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify(ts)\n\n    def test_jukes_cantor_n5(self):\n        ts = msprime.simulate(5, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 5, 1, seed=0)\n        self.verify(ts)\n\n    def test_jukes_cantor_n20(self):\n        ts = msprime.simulate(20, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=11)\n        self.verify(ts)\n\n    def test_jukes_cantor_n50(self):\n        ts = msprime.simulate(50, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify(ts)\n\n\nclass TestParsimonyBase:\n    \"\"\"\n    Base class for tests of the map_mutations parsimony method.\n    \"\"\"\n\n    def do_map_mutations(\n        self, tree, genotypes, alleles=None, ancestral_state=None, compare_lib=True\n    ):\n        if alleles is None:\n            alleles = [str(j) for j in range(max(genotypes) + 1)]\n        ancestral_state1, transitions1 = tree.map_mutations(\n            genotypes, alleles, ancestral_state\n        )\n        if compare_lib:\n            ancestral_state2, transitions2 = hartigan_map_mutations(\n                tree, genotypes, alleles, ancestral_state\n            )\n            assert ancestral_state1 == ancestral_state2\n            assert len(transitions1) == len(transitions2)\n            sorted_t1 = sorted((m.node, m.derived_state) for m in transitions1)\n            sorted_t2 = sorted((m.node, m.derived_state) for m in transitions2)\n            assert sorted_t1 == sorted_t2\n            assert transitions1 == transitions2\n        return ancestral_state1, transitions1\n\n\nclass TestParsimonyBadAlleles(TestParsimonyBase):\n    tree = tskit.Tree.generate_comb(3)\n\n    def test_too_many_alleles(self):\n        genotypes = [0, 0, 64]\n        alleles = [str(j) for j in range(max(genotypes) + 1)]\n        with pytest.raises(ValueError, match=\"maximum of 64\"):\n            # Only a limitation in the C version of map_mutations\n            self.tree.map_mutations(genotypes, alleles)\n\n    def test_ancestral_state_too_big(self):\n        genotypes = [0, 0, 1]\n        alleles = [str(x) for x in range(2**8)]  # exceeds HARTIGAN_MAX_ALLELES\n        with pytest.raises(ValueError, match=\"maximum of 64\"):\n            # Only a limitation in the C version of map_mutations\n            self.tree.map_mutations(\n                genotypes, alleles=alleles, ancestral_state=alleles[-1]\n            )\n\n\nclass TestParsimonyRoundTrip(TestParsimonyBase):\n    \"\"\"\n    Tests that we can reproduce the genotypes for set of tree sequences by\n    inferring the locations of mutations.\n    \"\"\"\n\n    def verify(self, ts):\n        G = ts.genotype_matrix(isolated_as_missing=False)\n        alleles = [v.alleles for v in ts.variants()]\n        for randomize_ancestral_states in [False, True]:\n            tables = ts.dump_tables()\n            tables.sites.clear()\n            tables.mutations.clear()\n            fixed_anc_state = None\n            for tree in ts.trees():\n                for site in tree.sites():\n                    if randomize_ancestral_states:\n                        num_alleles = len(alleles[site.id])\n                        if alleles[site.id][-1] is None:\n                            num_alleles -= 1\n                        fixed_anc_state = np.random.randint(num_alleles)\n                    ancestral_state, mutations = self.do_map_mutations(\n                        tree,\n                        G[site.id],\n                        alleles[site.id],\n                        ancestral_state=fixed_anc_state,\n                    )\n                    site_id = tables.sites.append(\n                        site.replace(ancestral_state=ancestral_state)\n                    )\n                    parent_offset = len(tables.mutations)\n                    for mutation in mutations:\n                        parent = mutation.parent\n                        if parent != tskit.NULL:\n                            parent += parent_offset\n                        tables.mutations.append(\n                            mutation.replace(site=site_id, parent=parent)\n                        )\n            other_ts = tables.tree_sequence()\n            for h1, h2 in zip(\n                ts.haplotypes(isolated_as_missing=False),\n                other_ts.haplotypes(isolated_as_missing=False),\n            ):\n                assert h1 == h2\n\n            # Make sure we're computing the parent correctly.\n            tables2 = tables.copy()\n            nulled = np.zeros_like(tables.mutations.parent) - 1\n            tables2.mutations.parent = nulled\n            assert np.array_equal(tables.mutations.parent, tables.mutations.parent)\n\n    def test_infinite_sites_n3(self):\n        ts = msprime.simulate(3, mutation_rate=3, random_seed=3)\n        self.verify(ts)\n\n    def test_infinite_sites_n20(self):\n        ts = msprime.simulate(20, mutation_rate=3, random_seed=3)\n        self.verify(ts)\n\n    def test_infinite_sites_n20_recombination(self):\n        ts = msprime.simulate(20, mutation_rate=3, recombination_rate=2, random_seed=3)\n        assert ts.num_trees > 2\n        self.verify(ts)\n\n    def test_infinite_sites_n5_internal_samples(self):\n        ts = msprime.simulate(5, mutation_rate=3, random_seed=3)\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_infinite_sites_n20_internal_samples(self):\n        ts = msprime.simulate(20, mutation_rate=3, random_seed=3)\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_jukes_cantor_n5(self):\n        ts = msprime.simulate(5, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 1, seed=1)\n        self.verify(ts)\n\n    def test_jukes_cantor_n20(self):\n        ts = msprime.simulate(20, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify(ts)\n\n    def test_jukes_cantor_n50(self):\n        ts = msprime.simulate(50, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)\n        self.verify(ts)\n\n    def test_jukes_cantor_n5_internal_samples(self):\n        ts = msprime.simulate(5, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 1, 1, seed=1)\n        ts = tsutil.jiggle_samples(ts)\n        self.verify(ts)\n\n    def test_jukes_cantor_n20_internal_samples(self):\n        ts = msprime.simulate(20, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=1)\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_jukes_cantor_n50_internal_samples(self):\n        ts = msprime.simulate(50, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_jukes_cantor_balanced_ternary_internal_samples(self):\n        tree = tskit.Tree.generate_balanced(27, arity=3)\n        ts = tsutil.jukes_cantor(tree.tree_sequence, 5, 2, seed=1)\n        assert ts.num_sites > 1\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_infinite_sites_n20_multiroot(self):\n        ts = msprime.simulate(20, mutation_rate=3, random_seed=3)\n        self.verify(ts.decapitate(np.max(ts.tables.nodes.time) / 2))\n\n    def test_jukes_cantor_n15_multiroot(self):\n        ts = msprime.simulate(15, random_seed=1)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 5)\n        ts = tsutil.jukes_cantor(ts, 15, 2, seed=3)\n        self.verify(ts)\n\n    def test_jukes_cantor_balanced_ternary_multiroot(self):\n        ts = tskit.Tree.generate_balanced(50, arity=3).tree_sequence\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 3)\n        ts = tsutil.jukes_cantor(ts, 15, 2, seed=3)\n        self.verify(ts)\n        assert ts.num_sites > 1\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_jukes_cantor_n50_multiroot(self):\n        ts = msprime.simulate(50, random_seed=1)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)\n        self.verify(ts)\n\n    def test_jukes_cantor_root_polytomy_n5(self):\n        tree = tskit.Tree.unrank(5, (1, 0))\n        ts = tsutil.jukes_cantor(tree.tree_sequence, 5, 2, seed=1)\n        assert ts.num_sites > 2\n        self.verify(ts)\n\n    def test_jukes_cantor_leaf_polytomy_n5(self):\n        tree = tskit.Tree.unrank(5, (7, 0))\n        ts = tsutil.jukes_cantor(tree.tree_sequence, 5, 2, seed=1)\n        assert ts.num_sites > 2\n        self.verify(ts)\n\n    @pytest.mark.parametrize(\n        \"tree_builder\", [tskit.Tree.generate_balanced, tskit.Tree.generate_comb]\n    )\n    @pytest.mark.parametrize(\"n\", [2, 5, 10])\n    def test_many_states_binary(self, tree_builder, n):\n        tree = tree_builder(n)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0.5, \"0\")\n        for j in range(1, n):\n            tables.mutations.add_row(0, derived_state=str(j), node=j)\n        ts = tables.tree_sequence()\n        assert np.array_equal(ts.genotype_matrix(), [np.arange(n, dtype=np.int8)])\n        self.verify(tables.tree_sequence())\n\n    @pytest.mark.parametrize(\"arity\", [2, 3, 4])\n    @pytest.mark.parametrize(\"n\", [2, 5, 10])\n    def test_many_states_arity(self, n, arity):\n        tree = tskit.Tree.generate_balanced(n, arity=arity)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0.5, \"0\")\n        for j in range(1, n):\n            tables.mutations.add_row(0, derived_state=str(j), node=j)\n        ts = tables.tree_sequence()\n        assert np.array_equal(ts.genotype_matrix(), [np.arange(n, dtype=np.int8)])\n        self.verify(tables.tree_sequence())\n\n\nclass TestParsimonyRoundTripMissingData(TestParsimonyRoundTrip):\n    \"\"\"\n    Tests that we can reproduce the genotypes for set of tree sequences by\n    inferring the locations of mutations.\n    \"\"\"\n\n    def verify(self, ts):\n        tables = ts.dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        G = ts.genotype_matrix(isolated_as_missing=False)\n        # Set the first sample to missing data everywhere\n        G[:, 0] = -1\n        alleles = [v.alleles for v in ts.variants()]\n        for tree in ts.trees():\n            for site in tree.sites():\n                ancestral_state, mutations = self.do_map_mutations(\n                    tree, G[site.id], alleles[site.id]\n                )\n                site_id = tables.sites.append(\n                    site.replace(ancestral_state=ancestral_state)\n                )\n                parent_offset = len(tables.mutations)\n                for m in mutations:\n                    parent = m.parent\n                    if m.parent != tskit.NULL:\n                        parent = m.parent + parent_offset\n                    tables.mutations.append(m.replace(site=site_id, parent=parent))\n        other_ts = tables.tree_sequence()\n        assert ts.num_samples == other_ts.num_samples\n        H1 = list(ts.haplotypes(isolated_as_missing=False))\n        H2 = list(other_ts.haplotypes(isolated_as_missing=False))\n        # All samples except 0 should be reproduced exactly.\n        assert H1[1:] == H2[1:]\n\n\nclass TestParsimonyMissingData(TestParsimonyBase):\n    \"\"\"\n    Tests that we correctly map_mutations when we have missing data.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_all_missing(self, n):\n        ts = msprime.simulate(n, random_seed=2)\n        tree = ts.first()\n        genotypes = np.zeros(n, dtype=np.int8) - 1\n        alleles = [\"0\", \"1\"]\n        with pytest.raises(ValueError):\n            fitch_map_mutations(tree, genotypes, alleles)\n        with pytest.raises(ValueError):\n            hartigan_map_mutations(tree, genotypes, alleles)\n        with pytest.raises(tskit.LibraryError):\n            tree.map_mutations(genotypes, alleles)\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_one_non_missing(self, n):\n        ts = msprime.simulate(n, random_seed=2)\n        tree = ts.first()\n        for j in range(n):\n            genotypes = np.zeros(n, dtype=np.int8) - 1\n            genotypes[j] = 0\n            ancestral_state, transitions = self.do_map_mutations(\n                tree, genotypes, [\"0\", \"1\"]\n            )\n            assert ancestral_state == \"0\"\n            assert len(transitions) == 0\n\n    @pytest.mark.parametrize(\"arity\", range(2, 10))\n    def test_one_non_missing_balanced(self, arity):\n        n = 40\n        tree = tskit.Tree.generate_balanced(n, arity=arity)\n        for j in range(n):\n            genotypes = np.zeros(n, dtype=np.int8) - 1\n            genotypes[j] = 0\n            ancestral_state, transitions = self.do_map_mutations(\n                tree, genotypes, [\"0\", \"1\"]\n            )\n            assert ancestral_state == \"0\"\n            assert len(transitions) == 0\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_many_states_half_missing(self, n):\n        ts = msprime.simulate(n, random_seed=2)\n        tree = ts.first()\n        genotypes = np.zeros(n, dtype=np.int8) - 1\n        genotypes[0 : n // 2] = np.arange(n // 2, dtype=int)\n        alleles = [str(j) for j in range(n)]\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes, alleles)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == max(0, n // 2 - 1)\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_one_missing(self, n):\n        ts = msprime.simulate(n, random_seed=2)\n        tree = ts.first()\n        alleles = [str(j) for j in range(2)]\n        for j in range(n):\n            genotypes = np.zeros(n, dtype=np.int8) - 1\n            genotypes[j] = 0\n            ancestral_state, transitions = self.do_map_mutations(\n                tree, genotypes, alleles\n            )\n            assert ancestral_state == \"0\"\n            assert len(transitions) == 0\n\n    @pytest.mark.parametrize(\"arity\", range(2, 10))\n    def test_one_missing_balanced(self, arity):\n        n = 40\n        tree = tskit.Tree.generate_balanced(n, arity=arity)\n        alleles = [str(j) for j in range(2)]\n        for j in range(n):\n            genotypes = np.zeros(n, dtype=np.int8) - 1\n            genotypes[j] = 0\n            ancestral_state, transitions = self.do_map_mutations(\n                tree, genotypes, alleles\n            )\n            assert ancestral_state == \"0\"\n            assert len(transitions) == 0\n\n    def test_one_missing_derived_state(self):\n        tables = felsenstein_tables()\n        ts = tables.tree_sequence()\n        genotypes = np.zeros(5, dtype=np.int8)\n        genotypes[0] = -1\n        genotypes[1] = 1\n        alleles = [str(j) for j in range(2)]\n        ancestral_state, transitions = self.do_map_mutations(\n            ts.first(), genotypes, alleles\n        )\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0].node == 7\n        assert transitions[0].parent == -1\n        assert transitions[0].derived_state == \"1\"\n\n\nclass TestParsimonyExamples(TestParsimonyBase):\n    \"\"\"\n    Some examples on a given tree.\n    \"\"\"\n\n    #\n    #          8\n    #         / \\\n    #        /   \\\n    #       /     \\\n    #      7       \\\n    #     / \\       6\n    #    /   5     / \\\n    #   /   / \\   /   \\\n    #  4   0   1 2     3\n    small_tree_ex_nodes = \"\"\"\\\n    id      is_sample   population      time\n    0       1       0               0.00000000000000\n    1       1       0               0.00000000000000\n    2       1       0               0.00000000000000\n    3       1       0               0.00000000000000\n    4       1       0               0.00000000000000\n    5       0       0               0.14567111023387\n    6       0       0               0.21385545626353\n    7       0       0               0.43508024345063\n    8       0       0               1.60156352971203\n    \"\"\"\n    small_tree_ex_edges = \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      5       0,1\n    1       0.00000000      1.00000000      6       2,3\n    2       0.00000000      1.00000000      7       4,5\n    3       0.00000000      1.00000000      8       6,7\n    \"\"\"\n    tree = tskit.load_text(\n        nodes=io.StringIO(small_tree_ex_nodes),\n        edges=io.StringIO(small_tree_ex_edges),\n        strict=False,\n    ).first()\n\n    def test_mutation_over_0(self):\n        genotypes = [1, 0, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=0, parent=-1, derived_state=\"1\")\n\n    def test_mutation_over_5(self):\n        genotypes = [1, 1, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=5, parent=-1, derived_state=\"1\")\n\n    def test_mutation_over_7(self):\n        genotypes = [1, 1, 0, 0, 1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=7, parent=-1, derived_state=\"1\")\n\n    def test_mutation_over_7_0(self):\n        genotypes = [2, 1, 0, 0, 1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=7, parent=-1, derived_state=\"1\")\n        assert transitions[1] == tskit.Mutation(node=0, parent=0, derived_state=\"2\")\n\n    def test_mutation_over_7_0_alleles(self):\n        genotypes = [2, 1, 0, 0, 1]\n        alleles = [\"ANC\", \"ONE\", \"TWO\"]\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, alleles\n        )\n        assert ancestral_state == \"ANC\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=7, parent=-1, derived_state=\"ONE\")\n        assert transitions[1] == tskit.Mutation(node=0, parent=0, derived_state=\"TWO\")\n\n    def test_mutation_over_7_missing_data_0(self):\n        genotypes = [-1, 1, 0, 0, 1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=7, parent=-1, derived_state=\"1\")\n\n    def test_mutation_over_leaf_sibling_missing(self):\n        genotypes = [0, 0, 1, -1, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        # We assume that the mutation is over the parent of 2 and the missing data\n        # so we impute that 3 also has allele 1. This suprising behaviour to me:\n        # I would have thought it was more parsimonious to assume that the missing\n        # data had the ancestral state. However, the number of *state changes*\n        # is the same, which is what the algorithm is minimising.\n        assert transitions[0] == tskit.Mutation(node=6, parent=-1, derived_state=\"1\")\n\n        # Reverse is the same\n        genotypes = [0, 0, -1, 1, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=6, parent=-1, derived_state=\"1\")\n\n    def test_mutation_over_6_missing_data_0(self):\n        genotypes = [-1, 0, 1, 1, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=6, parent=-1, derived_state=\"1\")\n\n    def test_mutation_over_0_missing_data_4(self):\n        genotypes = [1, 0, 0, 0, -1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=0, parent=-1, derived_state=\"1\")\n\n    def test_multi_mutation_missing_data(self):\n        genotypes = [1, 2, -1, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=5, parent=-1, derived_state=\"1\")\n        assert transitions[1] == tskit.Mutation(node=1, parent=0, derived_state=\"2\")\n\n\nclass TestParsimonyExamplesPolytomy(TestParsimonyBase):\n    \"\"\"\n    Some examples on a given non-binary tree.\n    \"\"\"\n\n    #         9\n    #       ┏━┻━━┓\n    #       7    8\n    #     ┏━┻━┓ ┏┻┓\n    #     6   ┃ ┃ ┃\n    #   ┏━╋━┓ ┃ ┃ ┃\n    #   0 2 4 5 1 3\n\n    nodes = io.StringIO(\n        \"\"\"\\\n    id      is_sample   time\n    0       1           0\n    1       1           0\n    2       1           0\n    3       1           0\n    4       1           0\n    5       1           0\n    6       0           1\n    7       0           2\n    8       0           2\n    9       0           3\n    \"\"\"\n    )\n    edges = io.StringIO(\n        \"\"\"\\\n    left    right   parent  child\n    0       1       6       0,2,4\n    0       1       7       6,5\n    0       1       8       1,3\n    0       1       9       7,8\n    \"\"\"\n    )\n\n    tree = tskit.load_text(\n        nodes=nodes,\n        edges=edges,\n        strict=False,\n    ).first()\n\n    def test_all_zeros(self):\n        genotypes = [0, 0, 0, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 0\n\n    def test_mutation_over_8(self):\n        genotypes = [0, 1, 0, 1, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=8, derived_state=\"1\")\n\n    def test_mutation_over_6(self):\n        genotypes = [1, 0, 1, 0, 1, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=6, derived_state=\"1\")\n\n    def test_mutation_over_0_5(self):\n        # Bug reported in https://github.com/tskit-dev/tskit/issues/987\n        genotypes = [1, 0, 0, 0, 0, 1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=0, derived_state=\"1\")\n        assert transitions[1] == tskit.Mutation(node=5, derived_state=\"1\")\n\n    def test_mutation_over_7_back_mutation_4(self):\n        genotypes = [1, 0, 1, 0, 0, 1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=7, derived_state=\"1\")\n        assert transitions[1] == tskit.Mutation(node=4, derived_state=\"0\", parent=0)\n\n\nclass TestParsimonyExamplesStar(TestParsimonyBase):\n    \"\"\"\n    Some examples on star topologies.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"n\", range(3, 8))\n    def test_two_states_freq_n_minus_1(self, n):\n        tree = tskit.Tree.generate_star(n)\n        genotypes = np.zeros(n, dtype=np.int8)\n        genotypes[0] = 1\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=0, derived_state=\"1\")\n\n        genotypes[:] = 1\n        genotypes[0] = 0\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=0, derived_state=\"0\")\n\n    @pytest.mark.parametrize(\"n\", range(5, 10))\n    def test_two_states_freq_n_minus_2(self, n):\n        tree = tskit.Tree.generate_star(n)\n        genotypes = np.zeros(n, dtype=np.int8)\n        genotypes[0:2] = 1\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=1, derived_state=\"1\")\n        assert transitions[1] == tskit.Mutation(node=0, derived_state=\"1\")\n\n        genotypes[:] = 1\n        genotypes[0:2] = 0\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=1, derived_state=\"0\")\n        assert transitions[1] == tskit.Mutation(node=0, derived_state=\"0\")\n\n    @pytest.mark.parametrize(\"n\", range(5, 10))\n    def test_three_states_freq_n_minus_2(self, n):\n        tree = tskit.Tree.generate_star(n)\n        genotypes = np.zeros(n, dtype=np.int8)\n        genotypes[0] = 1\n        genotypes[1] = 2\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=1, derived_state=\"2\")\n        assert transitions[1] == tskit.Mutation(node=0, derived_state=\"1\")\n\n    @pytest.mark.parametrize(\"n\", range(2, 10))\n    def test_n_states(self, n):\n        tree = tskit.Tree.generate_star(n)\n        genotypes = np.arange(n, dtype=np.int8)\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == n - 1\n\n    @pytest.mark.parametrize(\"n\", range(3, 10))\n    def test_missing_data(self, n):\n        tree = tskit.Tree.generate_star(n)\n        genotypes = np.zeros(n, dtype=np.int8)\n        genotypes[0] = tskit.MISSING_DATA\n        genotypes[1] = 1\n        ancestral_state, transitions = self.do_map_mutations(tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=1, derived_state=\"1\")\n\n\nclass TestParsimonyExamplesBalancedTernary(TestParsimonyBase):\n    \"\"\"\n    Some examples on a given non-binary tree.\n    \"\"\"\n\n    tree = tskit.Tree.generate_balanced(27, arity=3)\n    #                                39\n    #         ┏━━━━━━━━━━━━━━━━━━━━━┳━┻━━━━━━━━━━━━━━━━━━━━━━━━┓\n    #        30                    34                         38\n    #   ┏━━━━━╋━━━━━┓      ┏━━━━━━━━╋━━━━━━━━┓        ┏━━━━━━━━╋━━━━━━━━┓\n    #  27    28    29     31       32       33       35       36       37\n    # ┏━╋━┓ ┏━╋━┓ ┏━╋━┓ ┏━━╋━━┓  ┏━━╋━━┓  ┏━━╋━━┓  ┏━━╋━━┓  ┏━━╋━━┓  ┏━━╋━━┓\n    # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26\n\n    def test_mutation_over_27_29(self):\n        genotypes = np.zeros(27, dtype=int)\n        genotypes[0:3] = 1\n        genotypes[6:9] = 1\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        # the algorithm chooses a back mutation instead\n        assert transitions[0] == tskit.Mutation(node=30, derived_state=\"1\")\n        assert transitions[1] == tskit.Mutation(node=28, derived_state=\"0\", parent=0)\n\n    def test_three_clades(self):\n        genotypes = np.zeros(27, dtype=int)\n        genotypes[9:18] = 1\n        genotypes[18:27] = 2\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=38, derived_state=\"2\")\n        assert transitions[1] == tskit.Mutation(node=34, derived_state=\"1\")\n\n    def test_nonzero_ancestral_state(self):\n        genotypes = np.ones(27, dtype=int)\n        genotypes[0] = 0\n        genotypes[26] = 0\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=26, derived_state=\"0\")\n        assert transitions[1] == tskit.Mutation(node=0, derived_state=\"0\")\n\n    def test_many_states(self):\n        genotypes = np.arange(27, dtype=int)\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 26\n\n    def test_least_parsimonious(self):\n        genotypes = [0, 1, 2] * 9\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 18\n\n\nclass TestParsimonyExamplesUnary(TestParsimonyBase):\n    \"\"\"\n    Some examples on a tree with unary nodes. The mutation should be placed\n    on the highest node along the lineage compatible with the parsimonious placement\n    \"\"\"\n\n    #        9\n    #      ┏━┻━┓\n    #      8   ┃\n    #    ┏━┻━┓ ┃\n    #    6   7 ┃\n    #    ┃   ┃ ┃\n    #    5   ┃ ┃\n    #  ┏━╋━┓ ┃ ┃\n    #  0 2 3 1 4\n\n    nodes = io.StringIO(\n        \"\"\"\\\n    id      is_sample   time\n    0       1           0\n    1       1           0\n    2       1           0\n    3       1           0\n    4       1           0\n    5       0           1\n    6       0           2\n    7       0           2\n    8       0           3\n    9       0           4\n    \"\"\"\n    )\n    edges = io.StringIO(\n        \"\"\"\\\n    left    right   parent  child\n    0       1       5       0,2,3\n    0       1       6       5\n    0       1       7       1\n    0       1       8       6\n    0       1       8       7\n    0       1       9       8\n    0       1       9       4\n    \"\"\"\n    )\n\n    tree = tskit.load_text(\n        nodes=nodes,\n        edges=edges,\n        strict=False,\n    ).first()\n\n    def test_all_zeros(self):\n        genotypes = [0, 0, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 0\n\n    def test_mutation_over_6(self):\n        genotypes = [1, 0, 1, 1, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=6, derived_state=\"1\")\n\n    def test_mutation_over_7(self):\n        genotypes = [0, 1, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=7, derived_state=\"1\")\n\n    def test_reversed_mutation_over_7(self):\n        genotypes = [1, 0, 1, 1, 1]\n        ancestral_state, transitions = self.do_map_mutations(self.tree, genotypes)\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=7, derived_state=\"0\")\n\n\nclass TestParsimonyExamplesAncestralState(TestParsimonyBase):\n    \"\"\"\n    Test fixing the ancestral state. Note that a mutation can occur above node 10\n    to switch the ancestral state\n    \"\"\"\n\n    #     10\n    #    ┏━┻━┓\n    #    ┃   9\n    #    ┃ ┏━┻━┓\n    #    ┃ ┃   8\n    #    ┃ ┃ ┏━┻━┓\n    #    ┃ ┃ ┃   7\n    #    ┃ ┃ ┃ ┏━┻┓\n    #    ┃ ┃ ┃ ┃  6\n    #    ┃ ┃ ┃ ┃ ┏┻┓\n    #    0 1 2 3 4 5\n    tree = tskit.Tree.generate_comb(6)\n\n    def test_mutation_over_0(self):\n        genotypes = [1, 0, 0, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, ancestral_state=0\n        )\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=0, parent=-1, derived_state=\"1\")\n\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, ancestral_state=1\n        )\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=9, parent=-1, derived_state=\"0\")\n\n    def test_mutation_over_3(self):\n        genotypes = [0, 0, 0, 1, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, ancestral_state=None\n        )\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=3, parent=-1, derived_state=\"1\")\n\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, ancestral_state=0\n        )\n        assert ancestral_state == \"0\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=3, parent=-1, derived_state=\"1\")\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, ancestral_state=1\n        )\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 2\n        assert transitions[0] == tskit.Mutation(node=10, parent=-1, derived_state=\"0\")\n        assert transitions[1] == tskit.Mutation(node=3, parent=0, derived_state=\"1\")\n\n    def test_novel_ancestral_state(self):\n        # should put a single mutation above the root\n        genotypes = [0, 0, 0, 0, 0, 0]\n        for alleles in ([\"0\", \"1\", \"2\", \"3\"], [\"0\", \"1\", \"2\", \"3\", None]):\n            ancestral_state, transitions = self.do_map_mutations(\n                self.tree, genotypes, alleles=alleles, ancestral_state=3\n            )\n            assert len(transitions) == 1\n            assert transitions[0] == tskit.Mutation(node=10, derived_state=\"0\")\n\n    def test_mutations_over_root(self):\n        tree = tskit.Tree.generate_star(6)\n        # Mutations on root children\n        genotypes = [0, 0, 0, 1, 1, 1]\n        ancestral_state, transitions = self.do_map_mutations(\n            tree, genotypes, ancestral_state=1\n        )\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 3\n        assert all(m.derived_state == \"0\" for m in transitions)\n        assert set(range(3)) == {m.node for m in transitions}\n\n        # Should now switch to a mutation over the root\n        genotypes = [0, 0, 0, 0, 1, 1]\n        ancestral_state, transitions = self.do_map_mutations(\n            tree, genotypes, ancestral_state=1\n        )\n        assert ancestral_state == \"1\"\n        assert len(transitions) == 3\n        assert transitions[0] == tskit.Mutation(node=tree.root, derived_state=\"0\")\n        assert all(m.derived_state == \"1\" for m in transitions[1:])\n        assert all(m.parent == 0 for m in transitions[1:])\n        assert {4, 5} == {m.node for m in transitions[1:]}\n\n    def test_all_isolated_different_from_ancestral(self):\n        ts = tskit.Tree.generate_star(6).tree_sequence\n        ts = ts.decapitate(0)\n        tree = ts.first()\n        genotypes = [0, 0, 0, 1, 1, 1]\n        ancestral_state, transitions = self.do_map_mutations(\n            tree, genotypes, alleles=[\"A\", \"T\", \"G\", \"C\"], ancestral_state=2\n        )\n        assert len(transitions) == 6\n        assert all(m.parent == -1 for m in transitions)\n        derived_states = [m.derived_state for m in transitions]\n        assert derived_states.count(\"A\") == 3\n        assert derived_states.count(\"T\") == 3\n        assert {m.node for m in transitions if m.derived_state == \"A\"} == {0, 1, 2}\n        assert {m.node for m in transitions if m.derived_state == \"T\"} == {3, 4, 5}\n\n    def test_ancestral_as_string(self):\n        genotypes = [1, 0, 0, 0, 0, 0]\n        ancestral_state, transitions = self.do_map_mutations(\n            self.tree, genotypes, alleles=[\"A\", \"T\", \"G\", \"C\"], ancestral_state=\"A\"\n        )\n        assert ancestral_state == \"A\"\n        assert len(transitions) == 1\n        assert transitions[0] == tskit.Mutation(node=0, parent=-1, derived_state=\"T\")\n\n    def test_bad_ancestral_state(self):\n        genotypes = [0, 0, 0, 1, 0, 0]\n        alleles = [str(j) for j in range(max(genotypes) + 1)]\n        for bad, err in {\n            2: \"ancestral_state\",\n            -1: \"ancestral_state\",\n            \"A\": \"not in list\",\n        }.items():\n            with pytest.raises(ValueError, match=err):\n                hartigan_map_mutations(\n                    self.tree, genotypes, alleles=alleles, ancestral_state=bad\n                )\n            with pytest.raises(ValueError, match=err):\n                self.tree.map_mutations(genotypes, alleles=alleles, ancestral_state=bad)\n\n\nclass TestReconstructAllTuples:\n    \"\"\"\n    Tests that the parsimony algorithm correctly round-trips all possible\n    states.\n    \"\"\"\n\n    def verify(self, ts, k):\n        tables = ts.dump_tables()\n        assert ts.num_trees == 1\n        tree = ts.first()\n        n = ts.num_samples\n        m = k**n\n        tables.sequence_length = m + 1\n        tables.edges.set_columns(\n            left=tables.edges.left,\n            right=np.zeros_like(tables.edges.right) + tables.sequence_length,\n            parent=tables.edges.parent,\n            child=tables.edges.child,\n        )\n        G1 = np.zeros((m, n), dtype=np.int8)\n        alleles = [str(j) for j in range(k)]\n        for j, genotypes in enumerate(itertools.product(range(k), repeat=n)):\n            G1[j] = genotypes\n            ancestral_state, mutations = tree.map_mutations(G1[j], alleles)\n            tables.sites.add_row(j, ancestral_state=ancestral_state)\n            parent_offset = len(tables.mutations)\n            for mutation in mutations:\n                parent = mutation.parent\n                if parent != tskit.NULL:\n                    parent += parent_offset\n                tables.mutations.append(mutation.replace(site=j, parent=parent))\n\n        ts2 = tables.tree_sequence()\n        G2 = np.zeros((m, n), dtype=np.int8)\n        for j, variant in enumerate(ts2.variants()):\n            alleles = np.array(list(map(int, variant.alleles)), dtype=np.int8)\n            G2[j] = alleles[variant.genotypes]\n        assert np.array_equal(G1, G2)\n\n    def test_simple_n3_k2(self):\n        ts = msprime.simulate(3, random_seed=4)\n        self.verify(ts, 2)\n\n    def test_simple_n3_k4(self):\n        ts = msprime.simulate(3, random_seed=4)\n        self.verify(ts, 4)\n\n    def test_simple_n4_k2(self):\n        ts = msprime.simulate(4, random_seed=4)\n        self.verify(ts, 2)\n\n    def test_simple_n4_k4(self):\n        ts = msprime.simulate(4, random_seed=4)\n        self.verify(ts, 4)\n\n    def test_simple_n4_k5(self):\n        ts = msprime.simulate(4, random_seed=4)\n        self.verify(ts, 5)\n\n    def test_simple_n5_k4(self):\n        ts = msprime.simulate(5, random_seed=4)\n        self.verify(ts, 4)\n\n    def test_simple_n6_k3(self):\n        ts = msprime.simulate(6, random_seed=4)\n        self.verify(ts, 3)\n\n    def test_root_polytomy_n5_k4(self):\n        tree = tskit.Tree.unrank(5, (1, 0))\n        self.verify(tree.tree_sequence, 4)\n\n    def test_leaf_polytomy_n5_k4(self):\n        tree = tskit.Tree.unrank(5, (7, 0))\n        self.verify(tree.tree_sequence, 4)\n\n    def test_leaf_polytomy_n5_k5(self):\n        tree = tskit.Tree.unrank(5, (7, 0))\n        self.verify(tree.tree_sequence, 5)\n"
  },
  {
    "path": "python/tests/test_phylo_formats.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2021 Tskit Developers\n# Copyright (c) 2016-2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for phylogenetics export functions, newick, nexus, FASTA etc.\n\"\"\"\n\nimport functools\nimport io\nimport random\nimport textwrap\n\nimport dendropy\nimport msprime\nimport newick\nimport numpy as np\nimport pytest\nfrom Bio import SeqIO\n\nimport tests\nimport tskit\nfrom tests.tsutil import get_example_tree_sequences\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this. The example_ts here is intended to be the\n# basic tree sequence which should give a meaningful result for\n# most operations. Probably rename it to ``examples.simple_ts()``\n# or something.\n\n\n@functools.lru_cache(maxsize=100)\ndef alignment_example(sequence_length, include_reference=True):\n    ts = msprime.sim_ancestry(\n        samples=5, sequence_length=sequence_length, random_seed=123\n    )\n    ts = msprime.sim_mutations(ts, rate=0.1, random_seed=1234)\n    tables = ts.dump_tables()\n    if include_reference:\n        tables.reference_sequence.data = tskit.random_nucleotides(\n            ts.sequence_length, seed=1234\n        )\n    ts = tables.tree_sequence()\n    assert ts.num_sites > 5\n    return ts\n\n\n@tests.cached_example\ndef missing_data_example():\n    # 2.00┊   4     ┊\n    #     ┊ ┏━┻┓    ┊\n    # 1.00┊ ┃  3    ┊\n    #     ┊ ┃ ┏┻┓   ┊\n    # 0.00┊ 0 1 2 5 ┊\n    #     0        10\n    #      |      |\n    #  pos 2      9\n    #  anc A      T\n    ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n    tables = ts.dump_tables()\n    tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n    tables.sites.add_row(2, ancestral_state=\"A\")\n    tables.sites.add_row(9, ancestral_state=\"T\")\n    tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n    tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n    return tables.tree_sequence()\n\n\ndef alignment_map(ts, **kwargs):\n    alignments = ts.alignments(**kwargs)\n    return {f\"n{u}\": alignment for u, alignment in zip(ts.samples(), alignments)}\n\n\ndef assert_fully_labelled_trees_equal(tree, root, node_labels, dpy_tree):\n    \"\"\"\n    Checks the the specified fully-labelled tree rooted at the specified\n    node is equivalent to the specified Dendropy tree.\n    \"\"\"\n    label_map = {}\n    for node in dpy_tree:\n        label_map[str(node.taxon.label)] = node\n\n    for u in tree.nodes(root, order=\"postorder\"):\n        # Consume the nodes in the dendropy node map one-by-one\n        dpy_node = label_map.pop(node_labels[u])\n        parent = tree.parent(u)\n        if parent == tskit.NULL:\n            assert dpy_node.edge_length is None\n            assert dpy_node.parent_node is None\n        else:\n            assert tree.branch_length(u) == pytest.approx(dpy_node.edge_length)\n            assert dpy_node.parent_node is label_map[node_labels[parent]]\n    assert len(label_map) == 0\n\n\ndef assert_sample_labelled_trees_equal(tree, dpy_tree):\n    \"\"\"\n    Checks that the specified trees are equivalent, where the dendropy tree\n    only has labels identifying the samples.\n    \"\"\"\n    for sample in tree.samples():\n        dpy_node = dpy_tree.find_node_with_taxon_label(f\"n{sample}\")\n        # Check the branch length paths to root are equal\n        p1 = []\n        u = sample\n        while tree.parent(u) != tskit.NULL:\n            p1.append(tree.branch_length(u))\n            u = tree.parent(u)\n        p2 = []\n        while dpy_node.parent_node is not None:\n            p2.append(dpy_node.edge_length)\n            dpy_node = dpy_node.parent_node\n        assert len(p1) == len(p2)\n        np.testing.assert_array_almost_equal(p1, p2)\n\n\ndef assert_dpy_tree_list_equal(ts, tree_list):\n    \"\"\"\n    Check that the nexus-encoded tree list output from tskit is\n    parsed correctly by dendropy.\n    \"\"\"\n    assert ts.num_trees == len(tree_list)\n    for tsk_tree, dpy_tree in zip(ts.trees(), tree_list):\n        # We're specifying that the tree is rooted.\n        assert dpy_tree.is_rooted\n        assert dpy_tree.label.startswith(\"t\")\n        left, right = map(float, dpy_tree.label[1:].split(\"^\"))\n        assert tsk_tree.interval.left == pytest.approx(left)\n        assert tsk_tree.interval.right == pytest.approx(right)\n        assert_sample_labelled_trees_equal(tsk_tree, dpy_tree)\n\n\nclass TestBackendsGiveIdenticalOutput:\n    # At the default precision of 17 we should get identical results between\n    # the two backends as there's no rounding done. In general, we can't\n    # depend on this, though, since rounding may be done differently by the\n    # Python and C library implementations.\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_default_precision(self, ts):\n        for tree in ts.trees():\n            if tree.has_single_root:\n                assert tree.as_newick() == tree.as_newick(\n                    node_labels={u: f\"n{u}\" for u in tree.samples()}\n                )\n\n\nclass TestNewickRoundTrip:\n    \"\"\"\n    Test that the newick formats can round-trip the data under various\n    assumptions.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_leaf_labels_newick_lib(self, ts):\n        for tree in ts.trees():\n            # Multiroot trees raise an error\n            for root in tree.roots:\n                leaf_labels = {u: f\"n{u}\" for u in tree.leaves(root)}\n                ns = tree.newick(\n                    root=root,\n                    precision=16,\n                    node_labels=leaf_labels,\n                )\n                newick_tree = newick.loads(\n                    ns, length_parser=lambda x: None if x is None else float(x)\n                )[0]\n                leaf_names = newick_tree.get_leaf_names()\n                assert sorted(leaf_names) == sorted(leaf_labels.values())\n                for u in tree.leaves(root):\n                    name = leaf_labels[u]\n                    node = newick_tree.get_node(name)\n                    while u != root:\n                        assert node.length == pytest.approx(tree.branch_length(u))\n                        node = node.ancestor\n                        u = tree.parent(u)\n                    assert node.ancestor is None\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_all_node_labels_dendropy(self, ts):\n        node_labels = {u: f\"n{u}\" for u in range(ts.num_nodes)}\n        for tree in ts.trees():\n            # Multiroot trees raise an error\n            for root in tree.roots:\n                ns = tree.newick(\n                    root=root,\n                    precision=16,\n                    node_labels=node_labels,\n                )\n                dpy_tree = dendropy.Tree.get(\n                    data=ns, suppress_internal_node_taxa=False, schema=\"newick\"\n                )\n                assert_fully_labelled_trees_equal(tree, root, node_labels, dpy_tree)\n\n\nclass TestNexusTreeRoundTrip:\n    \"\"\"\n    Test that the nexus format can round-trip tree data under various\n    assumptions.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_dendropy_defaults(self, ts):\n        if any(tree.num_roots != 1 for tree in ts.trees()):\n            with pytest.raises(ValueError, match=\"single root\"):\n                ts.as_nexus(include_alignments=False)\n        else:\n            nexus = ts.as_nexus(include_alignments=False)\n            tree_list = dendropy.TreeList()\n            tree_list.read(\n                data=nexus,\n                schema=\"nexus\",\n                suppress_internal_node_taxa=False,\n            )\n            assert_dpy_tree_list_equal(ts, tree_list)\n\n\nclass TestNexusIncludeSections:\n    \"\"\"\n    Test if we include the sections as expected.\n    \"\"\"\n\n    @tests.cached_example\n    def ts(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0      10\n        #      |    |\n        #  pos 2    9\n        #  anc A    T\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        return tables.tree_sequence()\n\n    def test_nexus_default(self):\n        ref = \"ACTGACTGAC\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 ACGGACTGAT\n                n1 ACAGACTGAC\n                n2 ACAGACTGAC\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(reference_sequence=ref)\n\n    def test_nexus_no_trees(self):\n        ref = \"ACTGACTGAC\"\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 ACGGACTGAT\n                n1 ACAGACTGAC\n                n2 ACAGACTGAC\n              ;\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(\n            reference_sequence=ref, include_trees=False\n        )\n\n    def test_nexus_no_alignments(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(include_alignments=False)\n\n    def test_nexus_no_trees_or_alignments(self):\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            \"\"\"\n        )\n        assert expected == self.ts().as_nexus(\n            include_trees=False, include_alignments=False\n        )\n\n\nclass TestNexusNodeLabels:\n    @tests.cached_example\n    def balanced_tree(self):\n        #   4\n        # ┏━┻┓\n        # ┃  3\n        # ┃ ┏┻┓\n        # 0 1 2\n        return tskit.Tree.generate_balanced(3)\n\n    def test_as_nexus_labels_basic(self):\n        ts = self.balanced_tree().tree_sequence\n        labels = {0: \"human\", 1: \"chimp\", 2: \"bonobo\"}\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS human chimp bonobo;\n            END;\n            BEGIN TREES;\n              TRANSLATE n0 human, n1 chimp, n2 bonobo;\n              TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == ts.as_nexus(include_alignments=False, node_labels=labels)\n\n    def test_as_nexus_labels_partial(self):\n        ts = self.balanced_tree().tree_sequence\n        labels = {0: \"human\", 2: \"bonobo\"}\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS human n1 bonobo;\n            END;\n            BEGIN TREES;\n              TRANSLATE n0 human, n2 bonobo;\n              TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == ts.as_nexus(include_alignments=False, node_labels=labels)\n\n    def test_as_nexus_labels_none(self):\n        ts = self.balanced_tree().tree_sequence\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert expected == ts.as_nexus(include_alignments=False, node_labels=None)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_parseable(self, ts):\n        def all_samples_are_leaves(ts):\n            internal_nodes = np.unique(ts.edges_parent)\n            is_internal_sample = np.isin(ts.samples(), internal_nodes)\n            return not np.any(is_internal_sample)\n\n        if not all_samples_are_leaves(ts):\n            # TRANSLATE doesn't support translating internal nodes\n            return\n\n        for tree in ts.trees():\n            if not tree.has_single_root:\n                return\n\n        labels = {}\n        samples = ts.samples()\n        k = random.randint(1, len(samples))\n        for node in random.sample(list(samples), k):\n            labels[node] = f\"new_node_which_was_{node}\"\n\n        nexus = ts.as_nexus(include_alignments=False, node_labels=labels)\n        ds = dendropy.DataSet.get(data=nexus, schema=\"nexus\")\n        tree = ds.tree_lists[0][0]\n        dendropy_labels = [node.taxon.label for node in tree.nodes() if node.taxon]\n        for label in labels.values():\n            assert label.replace(\"_\", \" \") in dendropy_labels\n\n\nclass TestNewickCodePaths:\n    \"\"\"\n    Test that the different code paths we use under the hood lead to\n    identical results.\n    \"\"\"\n\n    # NOTE this probabably won't work in general because the C and\n    # Python code paths using different rounding algorithms.\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_default_newick(self, ts):\n        for tree in ts.trees():\n            for root in tree.roots:\n                ns1 = tree.newick(root=root)\n                node_labels = {u: str(u + 1) for u in tree.leaves()}\n                ns2 = tree.newick(root=root, node_labels=node_labels)\n                assert ns1 == ns2\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_default_as_newick(self, ts):\n        for tree in ts.trees():\n            for root in tree.roots:\n                ns1 = tree.as_newick(root=root)\n                node_labels = {u: f\"n{u}\" for u in tree.tree_sequence.samples()}\n                ns2 = tree.as_newick(root=root, node_labels=node_labels)\n                assert ns1 == ns2\n\n\nclass TestBalancedBinaryExample:\n    #   4\n    # ┏━┻┓\n    # ┃  3\n    # ┃ ┏┻┓\n    # 0 1 2\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(3)\n\n    def test_newick_default(self):\n        s = (\n            \"(1:2.00000000000000,(2:1.00000000000000,3:1.00000000000000)\"\n            \":1.00000000000000);\"\n        )\n        assert self.tree().newick() == s\n\n    def test_as_newick_default(self):\n        s = \"(n0:2,(n1:1,n2:1):1);\"\n        assert self.tree().as_newick() == s\n\n    def test_newick_zero_precision(self):\n        s = \"(1:2,(2:1,3:1):1);\"\n        assert self.tree().newick(precision=0) == s\n\n    def test_as_newick_zero_precision(self):\n        s = \"(n0:2,(n1:1,n2:1):1);\"\n        assert self.tree().as_newick(precision=0) == s\n\n    def test_as_newick_precision_1(self):\n        s = \"(n0:2.0,(n1:1.0,n2:1.0):1.0);\"\n        assert self.tree().as_newick(precision=1) == s\n\n    def test_as_newick_precision_1_explicit_labels(self):\n        tree = self.tree()\n        s = \"(x0:2.0,(x1:1.0,x2:1.0):1.0);\"\n        node_labels = {u: f\"x{u}\" for u in tree.samples()}\n        assert tree.as_newick(precision=1, node_labels=node_labels) == s\n\n    def test_newick_no_branch_lengths(self):\n        s = \"(1,(2,3));\"\n        assert self.tree().newick(include_branch_lengths=False) == s\n\n    def test_as_newick_no_branch_lengths(self):\n        s = \"(n0,(n1,n2));\"\n        assert self.tree().as_newick(include_branch_lengths=False) == s\n\n    def test_newick_all_node_labels(self):\n        s = \"(0:2,(1:1,2:1)3:1)4;\"\n        node_labels = {u: str(u) for u in self.tree().nodes()}\n        ns = self.tree().newick(precision=0, node_labels=node_labels)\n        assert s == ns\n\n    def test_as_newick_all_node_labels(self):\n        s = \"(0:2,(1:1,2:1)3:1)4;\"\n        node_labels = {u: str(u) for u in self.tree().nodes()}\n        ns = self.tree().as_newick(node_labels=node_labels)\n        assert s == ns\n\n    def test_as_newick_variable_length_node_labels(self):\n        s = \"(:2,(1:1,22:1)333:1)4444;\"\n        node_labels = {u: str(u) * u for u in self.tree().nodes()}\n        ns = self.tree().as_newick(node_labels=node_labels)\n        assert s == ns\n\n    def test_as_newick_empty_node_labels(self):\n        s = \"(:2,(:1,:1):1);\"\n        ns = self.tree().as_newick(node_labels={})\n        assert s == ns\n\n    def test_newick_partial_node_labels(self):\n        s = \"(0:2,(1:1,2:1)3:1);\"\n        node_labels = {u: str(u) for u in self.tree().preorder()[1:]}\n        ns = self.tree().newick(precision=0, node_labels=node_labels)\n        assert s == ns\n\n    def test_newick_root(self):\n        s = \"(2:1,3:1);\"\n        assert self.tree().newick(root=3, precision=0) == s\n\n    def test_as_nexus_default(self):\n        ts = self.tree().tree_sequence\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n        \"\"\"\n        )\n        assert ts.as_nexus() == expected\n\n    def test_as_nexus_precision_1(self):\n        ts = self.tree().tree_sequence\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0.0^1.0 = [&R] (n0:2.0,(n1:1.0,n2:1.0):1.0);\n            END;\n        \"\"\"\n        )\n        assert ts.as_nexus(precision=1) == expected\n\n\nclass TestFractionalBranchLengths:\n    # 0.67┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 0.33┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0       1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(3, branch_length=1 / 3)\n\n    def test_newick_default(self):\n        s = (\n            \"(1:0.66666666666667,(2:0.33333333333333,3:0.33333333333333)\"\n            \":0.33333333333333);\"\n        )\n        assert self.tree().newick() == s\n\n    def test_as_newick_default(self):\n        s = (\n            \"(n0:0.66666666666666663,(n1:0.33333333333333331,\"\n            \"n2:0.33333333333333331):0.33333333333333331);\"\n        )\n        assert self.tree().as_newick() == s\n\n    def test_c_and_py_output_equal(self):\n        t = self.tree()\n        assert t.as_newick() == t.as_newick(\n            node_labels={u: f\"n{u}\" for u in t.samples()}\n        )\n\n    def test_as_newick_precision_3(self):\n        s = \"(n0:0.667,(n1:0.333,n2:0.333):0.333);\"\n        assert self.tree().as_newick(precision=3) == s\n\n    def test_newick_precision_3(self):\n        s = \"(1:0.667,(2:0.333,3:0.333):0.333);\"\n        assert self.tree().newick(precision=3) == s\n\n    def test_as_newick_precision_3_labels(self):\n        node_labels = {u: f\"n{u}\" * 3 for u in self.tree().nodes()}\n        s = \"(n0n0n0:0.667,(n1n1n1:0.333,n2n2n2:0.333)n3n3n3:0.333)n4n4n4;\"\n        assert self.tree().as_newick(precision=3, node_labels=node_labels) == s\n\n\nclass TestLargeBranchLengths:\n    # 2000000000.00┊   4   ┊\n    #              ┊ ┏━┻┓  ┊\n    # 1000000000.00┊ ┃  3  ┊\n    #              ┊ ┃ ┏┻┓ ┊\n    # 0.00         ┊ 0 1 2 ┊\n    #              0       1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(3, branch_length=1e9)\n\n    def test_newick_default(self):\n        s = (\n            \"(1:2000000000.00000000000000,(2:1000000000.00000000000000,\"\n            \"3:1000000000.00000000000000):1000000000.00000000000000);\"\n        )\n        assert self.tree().newick() == s\n\n    def test_as_newick_default(self):\n        s = \"(n0:2000000000,(n1:1000000000,n2:1000000000):1000000000);\"\n        assert self.tree().as_newick() == s\n\n    def test_newick_precision_3(self):\n        s = \"(1:2000000000.000,(2:1000000000.000,3:1000000000.000):1000000000.000);\"\n        assert self.tree().newick(precision=3) == s\n\n    def test_as_newick_precision_3(self):\n        s = \"(n0:2000000000.000,(n1:1000000000.000,n2:1000000000.000):1000000000.000);\"\n        assert self.tree().as_newick(precision=3) == s\n\n\nclass TestInternalSampleExample:\n    #   4\n    # ┏━┻┓\n    # ┃ *3*\n    # ┃ ┏┻┓\n    # 0 1 2\n    # Leaves are samples but 3 is also a sample.\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.Tree.generate_balanced(3).tree_sequence.dump_tables()\n        flags = tables.nodes.flags\n        flags[3] = 1\n        tables.nodes.flags = flags\n        return tables.tree_sequence().first()\n\n    def test_newick_default(self):\n        # Old newick method doesn't do anything with internal sample\n        s = (\n            \"(1:2.00000000000000,(2:1.00000000000000,3:1.00000000000000)\"\n            \":1.00000000000000);\"\n        )\n        assert self.tree().newick() == s\n\n    def test_as_newick_default(self):\n        # Samples are labelled by default, not leaves.\n        s = \"(n0:2,(n1:1,n2:1)n3:1);\"\n        assert self.tree().as_newick() == s\n\n    def test_dendropy_parsing(self):\n        dpy_tree = dendropy.Tree.get(\n            data=self.tree().as_newick(),\n            schema=\"newick\",\n            suppress_internal_node_taxa=False,\n            rooting=\"default-rooted\",\n        )\n        # Just check that we can correctly parse out the internal sample.\n        # More exhaustive testing of properties is done elsewhere.\n        n3 = dpy_tree.find_node_with_taxon_label(\"n3\")\n        n1 = dpy_tree.find_node_with_taxon_label(\"n1\")\n        assert n1.parent_node is n3\n        n2 = dpy_tree.find_node_with_taxon_label(\"n2\")\n        assert n2.parent_node is n3\n\n\nclass TestAncientSampleExample:\n    #     8\n    #  ┏━━┻━┓\n    #  5    7\n    # ┏┻┓ ┏━┻┓\n    # 0 1 ┃  6\n    #     ┃ ┏┻┓\n    #     2 3 4\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.Tree.generate_balanced(5).tree_sequence.dump_tables()\n        time = tables.nodes.time\n        time[0] = 1\n        time[1] = 1\n        time[5] = 2\n        tables.nodes.time = time\n        tables.sort()\n        return tables.tree_sequence().first()\n\n    def test_as_newick(self):\n        s = \"((n0:1,n1:1):1,(n2:2,(n3:1,n4:1):1):1);\"\n        assert self.tree().as_newick() == s\n\n    def test_newick(self):\n        s = \"((1:1,2:1):1,(3:2,(4:1,5:1):1):1);\"\n        assert self.tree().newick(precision=0) == s\n\n\nclass TestNonSampleLeafExample:\n    #   4\n    # ┏━┻┓\n    # ┃  3\n    # ┃ ┏┻┓\n    # |0|1 2\n    # Leaf 0 is *not* a sample\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.Tree.generate_balanced(3).tree_sequence.dump_tables()\n        flags = tables.nodes.flags\n        flags[0] = 0\n        tables.nodes.flags = flags\n        return tables.tree_sequence().first()\n\n    def test_newick(self):\n        # newick method doesn't think about samples at all.\n        s = \"(1:2,(2:1,3:1):1);\"\n        assert self.tree().newick(precision=0) == s\n\n    def test_as_newick_default(self):\n        # We don't label node 0 even though it's a leaf.\n        s = \"(:2,(n1:1,n2:1):1);\"\n        assert self.tree().as_newick() == s\n\n    def test_dendropy_parsing(self):\n        # This odd topology parses OK with dendropy\n        dpy_tree = dendropy.Tree.get(\n            data=self.tree().as_newick(),\n            schema=\"newick\",\n            suppress_internal_node_taxa=False,\n            rooting=\"default-rooted\",\n        )\n        n1 = dpy_tree.find_node_with_taxon_label(\"n1\")\n        assert n1 is not None\n        n2 = dpy_tree.find_node_with_taxon_label(\"n2\")\n        assert n2 is not None\n        leaves = dpy_tree.leaf_nodes()\n        assert len(leaves) == 3\n        leaves = set(leaves)\n        leaves.remove(n1)\n        leaves.remove(n2)\n        n0 = leaves.pop()\n        assert n0.taxon is None\n\n    def test_newick_lib_parsing(self):\n        newick_tree = newick.loads(self.tree().as_newick())[0]\n        leaf_names = newick_tree.get_leaf_names()\n        assert len(leaf_names) == 3\n        assert \"n1\" in leaf_names\n        assert \"n2\" in leaf_names\n        assert None in leaf_names\n\n\nclass TestNonBinaryExample:\n    # 2.00┊        12         ┊\n    #     ┊   ┏━━━━━╋━━━━━┓   ┊\n    # 1.00┊   9    10    11   ┊\n    #     ┊ ┏━╋━┓ ┏━╋━┓ ┏━╋━┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 ┊\n    #     0                   1\n    @tests.cached_example\n    def tree(self):\n        return tskit.Tree.generate_balanced(9, arity=3)\n\n    def test_as_newick(self):\n        s = \"((n0:1,n1:1,n2:1):1,(n3:1,n4:1,n5:1):1,(n6:1,n7:1,n8:1):1);\"\n        assert self.tree().as_newick() == s\n\n    def test_newick(self):\n        s = \"((1:1,2:1,3:1):1,(4:1,5:1,6:1):1,(7:1,8:1,9:1):1);\"\n        assert self.tree().newick(precision=0) == s\n\n\nclass TestMultiRootExample:\n    #\n    # 1.00┊   9    10    11   ┊\n    #     ┊ ┏━╋━┓ ┏━╋━┓ ┏━╋━┓ ┊\n    # 0.00┊ 0 1 2 3 4 5 6 7 8 ┊\n    #     0                   1\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.Tree.generate_balanced(9, arity=3).tree_sequence.dump_tables()\n        edges = tables.edges.copy()\n        tables.edges.clear()\n        for edge in edges:\n            if edge.parent != 12:\n                tables.edges.append(edge)\n        return tables.tree_sequence().first()\n\n    def test_as_newick_fails(self):\n        with pytest.raises(ValueError, match=\"single root\"):\n            self.tree().as_newick()\n\n    def test_newick_fails(self):\n        with pytest.raises(ValueError, match=\"single root\"):\n            self.tree().newick()\n\n    def test_as_newick_per_root(self):\n        t = self.tree()\n        assert t.as_newick(root=9) == \"(n0:1,n1:1,n2:1);\"\n        assert t.as_newick(root=10) == \"(n3:1,n4:1,n5:1);\"\n        assert t.as_newick(root=11) == \"(n6:1,n7:1,n8:1);\"\n\n    def test_newick_per_root(self):\n        t = self.tree()\n        assert t.newick(root=9, precision=0) == \"(1:1,2:1,3:1);\"\n        assert t.newick(root=10, precision=0) == \"(4:1,5:1,6:1);\"\n        assert t.newick(root=11, precision=0) == \"(7:1,8:1,9:1);\"\n\n\nclass TestLineTree:\n    # 3.00┊ 3 ┊\n    #     ┊ ┃ ┊\n    # 2.00┊ 2 ┊\n    #     ┊ ┃ ┊\n    # 1.00┊ 1 ┊\n    #     ┊ ┃ ┊\n    # 0.00┊ 0 ┊\n    #     0   1\n\n    @tests.cached_example\n    def tree(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        for j in range(3):\n            tables.nodes.add_row(flags=0, time=j + 1)\n            tables.edges.add_row(left=0, right=1, parent=j + 1, child=j)\n        tables.sort()\n        return tables.tree_sequence().first()\n\n    def test_newick(self):\n        s = \"(((1:1.00000000000000):1.00000000000000):1.00000000000000);\"\n        assert s == self.tree().newick()\n\n    def test_as_newick(self):\n        s = \"(((n0:1):1):1);\"\n        assert s == self.tree().as_newick()\n\n    def test_dendropy_parsing(self):\n        dpy_tree = dendropy.Tree.get(\n            data=self.tree().as_newick(),\n            schema=\"newick\",\n            rooting=\"default-rooted\",\n        )\n        n0 = dpy_tree.find_node_with_taxon_label(\"n0\")\n        assert n0 is not None\n        assert n0.edge_length == 1\n\n\nclass TestEmptyTree:\n    # The empty tree sequence has no nodes and so there's zero roots.\n    # This gets caught by the \"has_single_root\" error check, which is\n    # probably not right (we should just return the empty string).\n    # It's not an important corner case though, so probably not worth\n    # worrying about.\n    def tree(self):\n        tables = tskit.TableCollection(1.0)\n        return tables.tree_sequence().first()\n\n    def test_newick(self):\n        with pytest.raises(ValueError, match=\"single root\"):\n            self.tree().newick()\n\n    def test_as_newick(self):\n        with pytest.raises(ValueError, match=\"single root\"):\n            self.tree().as_newick()\n\n    def test_as_nexus(self):\n        with pytest.raises(ValueError, match=\"single root\"):\n            self.tree().tree_sequence.as_nexus()\n\n\nclass TestSingleNodeTree:\n    def tree(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        return tables.tree_sequence().first()\n\n    def test_newick(self):\n        assert self.tree().newick() == \"1;\"\n\n    def test_as_newick(self):\n        assert self.tree().as_newick() == \"n0;\"\n\n    def test_as_newick_labels(self):\n        assert self.tree().as_newick(node_labels={0: \"ABCDE\"}) == \"ABCDE;\"\n\n\nclass TestIntegerTreeSequence:\n    # 3.00┊   5   ┊       ┊\n    #     ┊ ┏━┻┓  ┊       ┊\n    # 2.00┊ ┃  4  ┊   4   ┊\n    #     ┊ ┃ ┏┻┓ ┊  ┏┻━┓ ┊\n    # 1.00┊ ┃ ┃ ┃ ┊  3  ┃ ┊\n    #     ┊ ┃ ┃ ┃ ┊ ┏┻┓ ┃ ┊\n    # 0.00┊ 0 1 2 ┊ 0 2 1 ┊\n    #     0       2      10\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        5       0           3\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        2.0     10      3       0\n        2.0     10      3       2\n        0.0     10      4       1\n        0.0     2.0     4       2\n        2.0     10      4       3\n        0.0     2.0     5       0\n        0.0     2.0     5       4\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_nexus_defaults(self):\n        ts = self.ts()\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0^2 = [&R] (n0:3,(n1:2,n2:2):1);\n              TREE t2^10 = [&R] (n1:2,(n0:1,n2:1):1);\n            END;\n            \"\"\"\n        )\n        assert ts.as_nexus() == expected\n\n    def test_nexus_precision_2(self):\n        ts = self.ts()\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0.00^2.00 = [&R] (n0:3.00,(n1:2.00,n2:2.00):1.00);\n              TREE t2.00^10.00 = [&R] (n1:2.00,(n0:1.00,n2:1.00):1.00);\n            END;\n            \"\"\"\n        )\n        assert ts.as_nexus(precision=2) == expected\n\n    @pytest.mark.parametrize(\"precision\", [None, 0, 1, 3])\n    def test_file_version_identical(self, precision):\n        ts = self.ts()\n        out = io.StringIO()\n        ts.write_nexus(out, precision=precision)\n        assert out.getvalue() == ts.as_nexus(precision=precision)\n\n\nclass TestFloatTimeTreeSequence:\n    # 3.25┊   5   ┊       ┊\n    #     ┊ ┏━┻┓  ┊       ┊\n    # 2.00┊ ┃  4  ┊   4   ┊\n    #     ┊ ┃ ┏┻┓ ┊  ┏┻━┓ ┊\n    # 1.00┊ ┃ ┃ ┃ ┊  3  ┃ ┊\n    #     ┊ ┃ ┃ ┃ ┊ ┏┻┓ ┃ ┊\n    # 0.00┊ 0 1 2 ┊ 0 2 1 ┊\n    #     0       2      10\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        5       0           3.25\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        2.0     10      3       0\n        2.0     10      3       2\n        0.0     10      4       1\n        0.0     2.0     4       2\n        2.0     10      4       3\n        0.0     2.0     5       0\n        0.0     2.0     5       4\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_nexus_defaults(self):\n        ts = self.ts()\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0^2 = [&R] (n0:3.25000000000000000,(n1:2.00000000000000000,n2:2.00000000000000000):1.25000000000000000);\n              TREE t2^10 = [&R] (n1:2.00000000000000000,(n0:1.00000000000000000,n2:1.00000000000000000):1.00000000000000000);\n            END;\n            \"\"\"  # noqa: E501\n        )\n        assert ts.as_nexus() == expected\n\n    def test_nexus_precision_2(self):\n        ts = self.ts()\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0.00^2.00 = [&R] (n0:3.25,(n1:2.00,n2:2.00):1.25);\n              TREE t2.00^10.00 = [&R] (n1:2.00,(n0:1.00,n2:1.00):1.00);\n            END;\n            \"\"\"\n        )\n        assert ts.as_nexus(precision=2) == expected\n\n\nclass TestFloatPositionTreeSequence:\n    # 3.00┊   5   ┊       ┊\n    #     ┊ ┏━┻┓  ┊       ┊\n    # 2.00┊ ┃  4  ┊   4   ┊\n    #     ┊ ┃ ┏┻┓ ┊  ┏┻━┓ ┊\n    # 1.00┊ ┃ ┃ ┃ ┊  3  ┃ ┊\n    #     ┊ ┃ ┃ ┃ ┊ ┏┻┓ ┃ ┊\n    # 0.00┊ 0 1 2 ┊ 0 2 1 ┊\n    #     0      2.5      10\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        5       0           3\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        2.5     10      3       0\n        2.5     10      3       2\n        0.0     10      4       1\n        0.0     2.5     4       2\n        2.5     10      4       3\n        0.0     2.5     5       0\n        0.0     2.5     5       4\n        \"\"\"\n        )\n        return tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_nexus_defaults(self):\n        ts = self.ts()\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0.00000000000000000^2.50000000000000000 = [&R] (n0:3,(n1:2,n2:2):1);\n              TREE t2.50000000000000000^10.00000000000000000 = [&R] (n1:2,(n0:1,n2:1):1);\n            END;\n            \"\"\"  # noqa: E501\n        )\n        assert ts.as_nexus() == expected\n\n    def test_nexus_precision_2(self):\n        ts = self.ts()\n        expected = textwrap.dedent(\n            \"\"\"\\\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0.00^2.50 = [&R] (n0:3.00,(n1:2.00,n2:2.00):1.00);\n              TREE t2.50^10.00 = [&R] (n1:2.00,(n0:1.00,n2:1.00):1.00);\n            END;\n            \"\"\"\n        )\n        assert ts.as_nexus(precision=2) == expected\n\n\ndef test_newick_buffer_too_small_bug():\n    nodes = io.StringIO(\n        \"\"\"\\\n    id  is_sample   population individual time\n    0       1       0       -1      0.00000000000000\n    1       1       0       -1      0.00000000000000\n    2       1       0       -1      0.00000000000000\n    3       1       0       -1      0.00000000000000\n    4       0       0       -1      0.21204940078588\n    5       0       0       -1      0.38445004304611\n    6       0       0       -1      0.83130278081275\n    \"\"\"\n    )\n    edges = io.StringIO(\n        \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      4       0\n    1       0.00000000      1.00000000      4       2\n    2       0.00000000      1.00000000      5       1\n    3       0.00000000      1.00000000      5       3\n    4       0.00000000      1.00000000      6       4\n    5       0.00000000      1.00000000      6       5\n    \"\"\"\n    )\n    ts = tskit.load_text(nodes, edges, sequence_length=1, strict=False)\n    tree = ts.first()\n    for precision in range(18):\n        newick_c = tree.newick(precision=precision)\n        node_labels = {u: str(u + 1) for u in ts.samples()}\n        newick_py = tree.newick(precision=precision, node_labels=node_labels)\n        assert newick_c == newick_py\n\n\nclass TestWrapText:\n    def test_even_split(self):\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, 4))\n        assert result == [\"ABCD\", \"EFGH\"]\n\n    def test_non_even_split(self):\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, 3))\n        assert result == [\"ABC\", \"DEF\", \"GH\"]\n\n    def test_width_one(self):\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, 1))\n        assert result == [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"H\"]\n\n    def test_width_full_length(self):\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, 8))\n        assert result == [\"ABCDEFGH\"]\n\n    def test_width_more_than_length(self):\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, 100))\n        assert result == [\"ABCDEFGH\"]\n\n    def test_width_0(self):\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, 0))\n        assert result == [\"ABCDEFGH\"]\n\n    @pytest.mark.parametrize(\"width\", [-1, -2, -8, -100])\n    def test_width_negative(self, width):\n        # Just documenting that the current implementation works for negative\n        # values fine.\n        example = \"ABCDEFGH\"\n        result = list(tskit.text_formats.wrap_text(example, width))\n        assert result == [\"ABCDEFGH\"]\n\n\nclass TestFastaLineLength:\n    \"\"\"\n    Tests if the fasta file produced has the correct line lengths for\n    default, custom, and no-wrapping options.\n    \"\"\"\n\n    def verify_line_length(self, length, wrap_width=60):\n        # set up data\n        ts = alignment_example(length)\n        output = io.StringIO()\n        ts.write_fasta(output, wrap_width=wrap_width)\n        output.seek(0)\n\n        # check if length perfectly divisible by wrap_width or not and thus\n        # expected line lengths\n        no_hanging_line = True\n        if wrap_width == 0:\n            lines_expect = 1\n            # for easier code in testing function, redefine wrap_width as\n            # full length, ok as called write already\n            wrap_width = length\n        elif length % wrap_width == 0:\n            lines_expect = length // wrap_width\n        else:\n            lines_expect = length // wrap_width + 1\n            extra_line_length = length % wrap_width\n            no_hanging_line = False\n\n        seq_line_counter = 0\n        id_lines = 0\n        for line in output:\n            # testing correct characters per sequence line\n            if line[0] != \">\":\n                seq_line_counter += 1\n                line_chars = len(line.strip(\"\\n\"))\n                # test full default width lines\n                if seq_line_counter < lines_expect:\n                    assert wrap_width == line_chars\n                elif no_hanging_line:\n                    assert wrap_width == line_chars\n                # test extra line if not perfectly divided by wrap_width\n                else:\n                    assert extra_line_length == line_chars\n            # testing correct number of lines per sequence and correct num sequences\n            else:\n                id_lines += 1\n                if seq_line_counter > 0:\n                    assert lines_expect == seq_line_counter\n                    seq_line_counter = 0\n        assert id_lines == ts.num_samples\n\n    def test_wrap_length_default_easy(self):\n        # default wrap width (60) perfectly divides sequence length\n        self.verify_line_length(length=300)\n\n    def test_wrap_length_default_harder(self):\n        # default wrap_width imperfectly divides sequence length\n        self.verify_line_length(length=280)\n\n    def test_wrap_length_custom_easy(self):\n        # custom wrap_width, perfectly divides\n        self.verify_line_length(length=100, wrap_width=20)\n\n    def test_wrap_length_custom_harder(self):\n        # custom wrap_width, imperfectly divides\n        self.verify_line_length(length=100, wrap_width=30)\n\n    def test_wrap_length_no_wrap(self):\n        # no wrapping set by wrap_width = 0\n        self.verify_line_length(length=100, wrap_width=0)\n\n    def test_negative_wrap(self):\n        ts = alignment_example(10)\n        with pytest.raises(ValueError, match=\"non-negative integer\"):\n            ts.as_fasta(wrap_width=-1)\n\n    def test_floating_wrap(self):\n        ts = alignment_example(10)\n        with pytest.raises(ValueError):\n            ts.as_fasta(wrap_width=1.1)\n\n    def test_numpy_wrap(self):\n        ts = alignment_example(10)\n        x1 = ts.as_fasta(wrap_width=4)\n        x2 = ts.as_fasta(wrap_width=np.array([4.0])[0])\n        assert x1 == x2\n\n\nclass TestFileTextOutputEqual:\n    @tests.cached_example\n    def ts(self):\n        return alignment_example(20)\n\n    def test_fasta_defaults(self):\n        ts = self.ts()\n        buff = io.StringIO()\n        ts.write_fasta(buff)\n        assert buff.getvalue() == ts.as_fasta()\n\n    def test_fasta_wrap_width(self):\n        ts = self.ts()\n        buff = io.StringIO()\n        ts.write_fasta(buff, wrap_width=4)\n        assert buff.getvalue() == ts.as_fasta(wrap_width=4)\n\n    def test_nexus_defaults(self):\n        ts = self.ts()\n        buff = io.StringIO()\n        ts.write_nexus(buff)\n        assert buff.getvalue() == ts.as_nexus()\n\n    def test_nexus_precision(self):\n        ts = self.ts()\n        buff = io.StringIO()\n        ts.write_nexus(buff, precision=2)\n        assert buff.getvalue() == ts.as_nexus(precision=2)\n\n\nclass TestFlexibleFileArgFasta:\n    @tests.cached_example\n    def ts(self):\n        return alignment_example(20)\n\n    def test_pathlib(self, tmp_path):\n        path = tmp_path / \"file.fa\"\n        ts = self.ts()\n        ts.write_fasta(path)\n        with open(path) as f:\n            assert f.read() == ts.as_fasta()\n\n    def test_path_str(self, tmp_path):\n        path = str(tmp_path / \"file.fa\")\n        ts = self.ts()\n        ts.write_fasta(path)\n        with open(path) as f:\n            assert f.read() == ts.as_fasta()\n\n    def test_fileobj(self, tmp_path):\n        path = tmp_path / \"file.fa\"\n        ts = self.ts()\n        with open(path, \"w\") as f:\n            ts.write_fasta(f)\n        with open(path) as f:\n            assert f.read() == ts.as_fasta()\n\n\nclass TestFlexibleFileArgNexus:\n    @tests.cached_example\n    def ts(self):\n        return alignment_example(20)\n\n    def test_pathlib(self, tmp_path):\n        path = tmp_path / \"file.nex\"\n        ts = self.ts()\n        ts.write_nexus(path)\n        with open(path) as f:\n            assert f.read() == ts.as_nexus()\n\n    def test_path_str(self, tmp_path):\n        path = str(tmp_path / \"file.nex\")\n        ts = self.ts()\n        ts.write_nexus(path)\n        with open(path) as f:\n            assert f.read() == ts.as_nexus()\n\n    def test_fileobj(self, tmp_path):\n        path = tmp_path / \"file.nex\"\n        ts = self.ts()\n        with open(path, \"w\") as f:\n            ts.write_nexus(f)\n        with open(path) as f:\n            assert f.read() == ts.as_nexus()\n\n\ndef get_alignment_map(ts, reference_sequence=None):\n    alignments = ts.alignments(reference_sequence=reference_sequence)\n    return {f\"n{u}\": alignment for u, alignment in zip(ts.samples(), alignments)}\n\n\nclass TestFastaBioPythonRoundTrip:\n    \"\"\"\n    Tests that output from our code is read in by available software packages\n    Here test for compatability with biopython processing - Bio.SeqIO\n    \"\"\"\n\n    def verify(self, ts, wrap_width=60, reference_sequence=None):\n        text = ts.as_fasta(wrap_width=wrap_width, reference_sequence=reference_sequence)\n        bio_map = {\n            k: v.seq\n            for k, v in SeqIO.to_dict(SeqIO.parse(io.StringIO(text), \"fasta\")).items()\n        }\n        assert bio_map == get_alignment_map(ts, reference_sequence)\n\n    def test_equal_lines(self):\n        # sequence length perfectly divisible by wrap_width\n        ts = alignment_example(300)\n        self.verify(ts)\n\n    def test_unequal_lines(self):\n        # sequence length not perfectly divisible by wrap_width\n        ts = alignment_example(280)\n        self.verify(ts)\n\n    def test_unwrapped(self):\n        # sequences not wrapped\n        ts = alignment_example(300)\n        self.verify(ts, wrap_width=0)\n\n    def test_A_reference(self):\n        ts = alignment_example(20)\n        self.verify(ts, reference_sequence=\"A\" * 20)\n\n    def test_missing_data(self):\n        self.verify(missing_data_example())\n\n\nclass TestFastaDendropyRoundTrip:\n    def parse(self, fasta):\n        d = dendropy.DnaCharacterMatrix.get(data=fasta, schema=\"fasta\")\n        return {str(k.label): str(v) for k, v in d.items()}\n\n    def test_wrapped(self):\n        ts = alignment_example(300)\n        text = ts.as_fasta()\n        alignment_map = self.parse(text)\n        assert get_alignment_map(ts) == alignment_map\n\n    def test_unwrapped(self):\n        ts = alignment_example(300)\n        text = ts.as_fasta(wrap_width=0)\n        alignment_map = self.parse(text)\n        assert get_alignment_map(ts) == alignment_map\n\n    def test_no_reference(self):\n        ts = alignment_example(100, include_reference=False)\n        text = ts.as_fasta()\n        alignment_map = self.parse(text)\n        assert get_alignment_map(ts) == alignment_map\n\n    def test_missing_data(self):\n        ts = missing_data_example()\n        text = ts.as_fasta()\n        alignment_map = self.parse(text)\n        assert get_alignment_map(ts) == alignment_map\n\n\nclass TestDendropyMissingReference:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0       10\n    #      |     |\n    #  pos 2     9\n    #  anc A     T\n\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        return tables.tree_sequence()\n\n    def assert_missing_data_encoded(self, d):\n        assert d.sequence_size == 10\n        assert str(d[\"n0\"][2]) == \"G\"\n        assert str(d[\"n0\"][9]) == \"T\"\n        assert str(d[\"n1\"][2]) == \"A\"\n        assert str(d[\"n1\"][9]) == \"C\"\n        assert str(d[\"n2\"][2]) == \"A\"\n        assert str(d[\"n2\"][9]) == \"C\"\n        for a in d.values():\n            for j in range(d.sequence_size):\n                if j in [2, 9]:\n                    assert (\n                        a[j].state_denomination\n                        == dendropy.StateAlphabet.FUNDAMENTAL_STATE\n                    )\n                else:\n                    assert (\n                        a[j].state_denomination == dendropy.StateAlphabet.AMBIGUOUS_STATE\n                    )\n\n    def test_fasta(self):\n        ts = self.ts()\n        text = ts.as_fasta()\n        d = dendropy.DnaCharacterMatrix.get(data=text, schema=\"fasta\")\n        self.assert_missing_data_encoded(d)\n        assert str(d[\"n0\"][0]) == \"N\"\n\n    def test_fasta_missing_question(self):\n        ts = self.ts()\n        text = ts.as_fasta(missing_data_character=\"?\")\n        d = dendropy.DnaCharacterMatrix.get(data=text, schema=\"fasta\")\n        self.assert_missing_data_encoded(d)\n        assert str(d[\"n0\"][0]) == \"?\"\n\n    def test_nexus(self):\n        ts = self.ts()\n        text = ts.as_nexus()\n        d = dendropy.DnaCharacterMatrix.get(data=text, schema=\"nexus\")\n        self.assert_missing_data_encoded(d)\n        assert str(d[\"n0\"][0]) == \"?\"\n\n    def test_nexus_missing_N(self):\n        ts = self.ts()\n        text = ts.as_nexus(missing_data_character=\"N\")\n        d = dendropy.DnaCharacterMatrix.get(data=text, schema=\"nexus\")\n        self.assert_missing_data_encoded(d)\n        assert str(d[\"n0\"][0]) == \"N\"\n\n\nclass TestDendropyMissingData:\n    \"\"\"\n    Test that we detect missing data correctly in dendropy under\n    various combinations of options.\n    \"\"\"\n\n    # 2.00┊   4     ┊\n    #     ┊ ┏━┻┓    ┊\n    # 1.00┊ ┃  3    ┊\n    #     ┊ ┃ ┏┻┓   ┊\n    # 0.00┊ 0 1 2 5 ┊\n    #     0        10\n    #      |      |\n    #  pos 2      9\n    #  anc A      T\n\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        return tables.tree_sequence()\n\n    def assert_missing_data_encoded_A_ref(self, d):\n        assert d.sequence_size == 10\n        assert str(d[\"n0\"]) == \"AAGAAAAAAT\"\n        assert str(d[\"n1\"]) == \"AAAAAAAAAC\"\n        assert str(d[\"n2\"]) == \"AAAAAAAAAC\"\n        for a in [d[\"n0\"], d[\"n1\"], d[\"n2\"]]:\n            assert all(\n                a[j].state_denomination == dendropy.StateAlphabet.FUNDAMENTAL_STATE\n                for j in range(10)\n            )\n        # Do we detect that we have an ambiguous state for the missing sample?\n\n        a5 = d[\"n5\"]\n        # a5 is missing along the full length of the genome, so all sites are missing.\n        assert all(\n            a5[j].state_denomination == dendropy.StateAlphabet.AMBIGUOUS_STATE\n            for j in range(10)\n        )\n\n    def test_fasta_defaults_A_ref(self):\n        ts = self.ts()\n        ref = \"A\" * int(ts.sequence_length)\n        text = ts.as_fasta(reference_sequence=ref)\n        d = dendropy.DnaCharacterMatrix.get(data=text, schema=\"fasta\")\n        self.assert_missing_data_encoded_A_ref(d)\n\n    def test_nexus_defaults_A_ref(self):\n        ts = self.ts()\n        ref = \"A\" * int(ts.sequence_length)\n        text = ts.as_nexus(reference_sequence=ref, include_trees=False)\n        d = dendropy.DnaCharacterMatrix.get(data=text, schema=\"nexus\")\n        self.assert_missing_data_encoded_A_ref(d)\n"
  },
  {
    "path": "python/tests/test_provenance.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (C) 2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for the provenance information attached to tree sequences.\n\"\"\"\n\nimport json\nimport os\nimport platform\nimport sys\nimport time\n\ntry:\n    import resource\nexcept ImportError:\n    resource = None  # resource absent on windows\n\n\nimport msprime\nimport pytest\n\nimport _tskit\nimport tskit\nimport tskit.provenance as provenance\n\n\ndef get_provenance(\n    software_name=\"x\",\n    software_version=\"y\",\n    schema_version=\"1\",\n    environment=None,\n    parameters=None,\n):\n    \"\"\"\n    Utility function to return a provenance document for testing.\n    \"\"\"\n    document = {\n        \"schema_version\": schema_version,\n        \"software\": {\"name\": software_name, \"version\": software_version},\n        \"environment\": {} if environment is None else environment,\n        \"parameters\": {} if parameters is None else parameters,\n    }\n    return document\n\n\nclass TestSchema:\n    \"\"\"\n    Tests for schema validation.\n    \"\"\"\n\n    def test_empty(self):\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance({})\n\n    def test_missing_keys(self):\n        minimal = get_provenance()\n        tskit.validate_provenance(minimal)\n        for key in minimal.keys():\n            copy = dict(minimal)\n            del copy[key]\n            with pytest.raises(tskit.ProvenanceValidationError):\n                tskit.validate_provenance(copy)\n        copy = dict(minimal)\n        del copy[\"software\"][\"name\"]\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance(copy)\n        copy = dict(minimal)\n        del copy[\"software\"][\"version\"]\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance(copy)\n\n    def test_software_types(self):\n        for bad_type in [0, [1, 2, 3], {}]:\n            doc = get_provenance(software_name=bad_type)\n            with pytest.raises(tskit.ProvenanceValidationError):\n                tskit.validate_provenance(doc)\n            doc = get_provenance(software_version=bad_type)\n            with pytest.raises(tskit.ProvenanceValidationError):\n                tskit.validate_provenance(doc)\n\n    def test_schema_version_empth(self):\n        doc = get_provenance(schema_version=\"\")\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance(doc)\n\n    def test_software_empty_strings(self):\n        doc = get_provenance(software_name=\"\")\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance(doc)\n        doc = get_provenance(software_version=\"\")\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance(doc)\n\n    def test_minimal(self):\n        minimal = {\n            \"schema_version\": \"1\",\n            \"software\": {\"name\": \"x\", \"version\": \"y\"},\n            \"environment\": {},\n            \"parameters\": {},\n        }\n        tskit.validate_provenance(minimal)\n\n    def test_extra_stuff(self):\n        extra = {\n            \"you\": \"can\",\n            \"schema_version\": \"1\",\n            \"software\": {\"put\": \"anything\", \"name\": \"x\", \"version\": \"y\"},\n            \"environment\": {\"extra\": [\"you\", \"want\"]},\n            \"parameters\": {\"so\": [\"long\", \"its\", \"JSON\", 0]},\n        }\n        tskit.validate_provenance(extra)\n\n    def test_resources(self):\n        resources = {\n            \"schema_version\": \"1\",\n            \"software\": {\"name\": \"x\", \"version\": \"y\"},\n            \"environment\": {},\n            \"parameters\": {},\n            \"resources\": {\n                \"elapsed_time\": 1,\n                \"user_time\": 2,\n                \"sys_time\": 3,\n                \"max_memory\": 4,\n            },\n        }\n        tskit.validate_provenance(resources)\n\n    def test_resources_error(self):\n        resources = {\n            \"schema_version\": \"1\",\n            \"software\": {\"name\": \"x\", \"version\": \"y\"},\n            \"environment\": {},\n            \"parameters\": {},\n            \"resources\": {\n                \"elapsed_time\": \"1\",\n                \"user_time\": 2,\n                \"sys_time\": 3,\n                \"max_memory\": 4,\n            },\n        }\n        with pytest.raises(tskit.ProvenanceValidationError):\n            tskit.validate_provenance(resources)\n\n\nclass TestOutputProvenance:\n    \"\"\"\n    Check that the schemas we produce in tskit are valid.\n    \"\"\"\n\n    def test_simplify(self):\n        ts = msprime.simulate(5, random_seed=1)\n        ts = ts.simplify()\n        prov = json.loads(ts.provenance(1).record)\n        tskit.validate_provenance(prov)\n        assert prov[\"parameters\"][\"command\"] == \"simplify\"\n        assert prov[\"environment\"] == provenance.get_environment(include_tskit=False)\n        assert prov[\"software\"] == {\"name\": \"tskit\", \"version\": tskit.__version__}\n\n\nclass TestEnvironment:\n    \"\"\"\n    Tests for the environment provenance.\n    \"\"\"\n\n    def test_os(self):\n        env = provenance.get_environment()\n        os = {\n            \"system\": platform.system(),\n            \"node\": platform.node(),\n            \"release\": platform.release(),\n            \"version\": platform.version(),\n            \"machine\": platform.machine(),\n        }\n        assert env[\"os\"] == os\n\n    def test_python(self):\n        env = provenance.get_environment()\n        python = {\n            \"implementation\": platform.python_implementation(),\n            \"version\": platform.python_version(),\n        }\n        assert env[\"python\"] == python\n\n    def test_libraries(self):\n        kastore_lib = {\"version\": \".\".join(map(str, _tskit.get_kastore_version()))}\n        env = provenance.get_environment()\n        assert {\"kastore\": kastore_lib, \"tskit\": {\"version\": tskit.__version__}} == env[\n            \"libraries\"\n        ]\n\n        env = provenance.get_environment(include_tskit=False)\n        assert {\"kastore\": kastore_lib} == env[\"libraries\"]\n\n        extra_libs = {\"abc\": [], \"xyz\": {\"one\": 1}}\n        env = provenance.get_environment(include_tskit=False, extra_libs=extra_libs)\n        libs = {\"kastore\": kastore_lib}\n        libs.update(extra_libs)\n        assert libs == env[\"libraries\"]\n\n\nclass TestGetResources:\n    def test_get_resources_keys(self):\n        resources = provenance.get_resources(0)\n        assert \"elapsed_time\" in resources\n        assert \"user_time\" in resources\n        assert \"sys_time\" in resources\n        if resource is not None:\n            assert \"max_memory\" in resources\n\n    def test_get_resources_values(self):\n        delta = 0.1\n        t = time.time()\n        resources = provenance.get_resources(t - delta)\n        assert isinstance(resources[\"elapsed_time\"], float)\n        assert isinstance(resources[\"user_time\"], float)\n        assert isinstance(resources[\"sys_time\"], float)\n        assert resources[\"elapsed_time\"] >= delta - 0.001\n        assert resources[\"user_time\"] > 0\n        assert resources[\"sys_time\"] > 0\n        if resource is not None:\n            assert isinstance(resources[\"max_memory\"], int)\n            assert resources[\"max_memory\"] > 1024\n\n    def test_get_resources_platform(self):\n        resources = provenance.get_resources(0)\n        if sys.platform != \"darwin\" and resource is not None:\n            assert resources[\"max_memory\"] % 1024 == 0\n\n\nclass TestGetSchema:\n    \"\"\"\n    Ensure we return the correct JSON schema.\n    \"\"\"\n\n    def test_file_equal(self):\n        s1 = provenance.get_schema()\n        base = os.path.join(os.path.dirname(__file__), \"..\", \"tskit\")\n        with open(os.path.join(base, \"provenance.schema.json\")) as f:\n            s2 = json.load(f)\n        assert s1 == s2\n\n    def test_caching(self):\n        n = 10\n        schemas = [provenance.get_schema() for _ in range(n)]\n        # Ensure all the schemas are different objects.\n        assert len(set(map(id, schemas))) == n\n        # Ensure the schemas are all equal\n        for j in range(n):\n            assert schemas[0] == schemas[j]\n\n    def test_form(self):\n        s = provenance.get_schema()\n        assert s[\"schema\"] == \"http://json-schema.org/draft-07/schema#\"\n        assert s[\"version\"] == \"1.1.0\"\n\n\nclass TestTreeSeqEditMethods:\n    \"\"\"\n    Ensure that tree sequence 'edit' methods correctly record themselves\n    \"\"\"\n\n    def test_keep_delete_different(self):\n        ts = msprime.simulate(5, random_seed=1)\n        ts_keep = ts.keep_intervals([[0.25, 0.5]])\n        ts_del = ts.delete_intervals([[0, 0.25], [0.5, 1.0]])\n        assert ts_keep.num_provenances == ts_del.num_provenances\n        for i, (p1, p2) in enumerate(zip(ts_keep.provenances(), ts_del.provenances())):\n            if i == ts_keep.num_provenances - 1:\n                # last one should be different\n                assert p1.record != p2.record\n            else:\n                assert p1.record == p2.record\n"
  },
  {
    "path": "python/tests/test_python_c.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2015-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for the low level C interface to tskit.\n\"\"\"\n\nimport collections\nimport gc\nimport inspect\nimport itertools\nimport os\nimport random\nimport tempfile\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport _tskit\nimport tskit\n\nNON_UTF8_STRING = \"\\ud861\\udd37\"\n\n\ndef get_tracked_sample_counts(ts, st, tracked_samples):\n    \"\"\"\n    Returns a list giving the number of samples in the specified list\n    that are in the subtree rooted at each node.\n    \"\"\"\n    nu = [0 for j in range(ts.get_num_nodes())]\n    for j in tracked_samples:\n        # Duplicates not permitted.\n        assert nu[j] == 0\n        u = j\n        while u != _tskit.NULL:\n            nu[u] += 1\n            u = st.get_parent(u)\n    return nu\n\n\ndef get_sample_counts(tree_sequence, st):\n    \"\"\"\n    Returns a list of the sample node counts for the specified tree.\n    \"\"\"\n    nu = [0 for j in range(tree_sequence.get_num_nodes())]\n    for j in range(tree_sequence.get_num_samples()):\n        u = j\n        while u != _tskit.NULL:\n            nu[u] += 1\n            u = st.get_parent(u)\n    return nu\n\n\nclass LowLevelTestCase:\n    \"\"\"\n    Superclass of tests for the low-level interface.\n    \"\"\"\n\n    def verify_tree_dict(self, n, pi):\n        \"\"\"\n        Verifies that the specified tree in dict format is a\n        consistent coalescent history for a sample of size n.\n        \"\"\"\n        assert len(pi) <= 2 * n - 1\n        # _tskit.NULL should not be a node\n        assert _tskit.NULL not in pi\n        # verify the root is equal for all samples\n        root = 0\n        while pi[root] != _tskit.NULL:\n            root = pi[root]\n        for j in range(n):\n            k = j\n            while pi[k] != _tskit.NULL:\n                k = pi[k]\n            assert k == root\n        # 0 to n - 1 inclusive should always be nodes\n        for j in range(n):\n            assert j in pi\n        num_children = collections.defaultdict(int)\n        for j in pi.keys():\n            num_children[pi[j]] += 1\n        # nodes 0 to n are samples.\n        for j in range(n):\n            assert pi[j] != 0\n            assert num_children[j] == 0\n        # All non-sample nodes should be binary\n        for j in pi.keys():\n            if j > n:\n                assert num_children[j] >= 2\n\n    def get_example_tree_sequence(\n        self, sample_size=10, length=1, mutation_rate=1, random_seed=1\n    ):\n        ts = msprime.simulate(\n            sample_size,\n            recombination_rate=0.1,\n            mutation_rate=mutation_rate,\n            random_seed=random_seed,\n            length=length,\n        )\n        return ts.ll_tree_sequence\n\n    def get_example_tree_sequences(self):\n        yield self.get_example_tree_sequence()\n        yield self.get_example_tree_sequence(2, 10)\n        yield self.get_example_tree_sequence(20, 10)\n        yield self.get_example_migration_tree_sequence()\n\n    def get_example_migration_tree_sequence(self):\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 1], [1, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        )\n        return ts.ll_tree_sequence\n\n    def verify_iterator(self, iterator):\n        \"\"\"\n        Checks that the specified non-empty iterator implements the\n        iterator protocol correctly.\n        \"\"\"\n        list_ = list(iterator)\n        assert len(list_) > 0\n        for _ in range(10):\n            with pytest.raises(StopIteration):\n                next(iterator)\n\n\nclass MetadataTestMixin:\n    metadata_tables = [\n        \"node\",\n        \"edge\",\n        \"site\",\n        \"mutation\",\n        \"migration\",\n        \"individual\",\n        \"population\",\n    ]\n\n\nclass TestTableCollection(LowLevelTestCase):\n    \"\"\"\n    Tests for the low-level TableCollection class\n    \"\"\"\n\n    def test_skip_tables(self, tmp_path):\n        tc = _tskit.TableCollection(1)\n        self.get_example_tree_sequence().dump_tables(tc)\n        with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n            tc.dump(f)\n\n        for good_bool in [1, True]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                tc_skip = _tskit.TableCollection()\n                tc_skip.load(f, skip_tables=good_bool)\n            assert not tc.equals(tc_skip)\n            assert tc.equals(tc_skip, ignore_tables=True)\n\n        for bad_bool in [\"x\", 0.5, {}]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                tc_skip = _tskit.TableCollection()\n                with pytest.raises(TypeError):\n                    tc_skip.load(f, skip_tables=bad_bool)\n\n    def test_skip_reference_sequence(self, tmp_path):\n        tc = _tskit.TableCollection(1)\n        self.get_example_tree_sequence().dump_tables(tc)\n        tc.reference_sequence.data = \"ACGT\"\n        with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n            tc.dump(f)\n\n        for good_bool in [1, True]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                tc_skip = _tskit.TableCollection()\n                tc_skip.load(f, skip_reference_sequence=good_bool)\n            assert not tc.equals(tc_skip)\n            assert tc.equals(tc_skip, ignore_reference_sequence=True)\n\n        for bad_bool in [\"x\", 0.5, {}]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                tc_skip = _tskit.TableCollection()\n                with pytest.raises(TypeError):\n                    tc_skip.load(f, skip_reference_sequence=bad_bool)\n\n    def test_file_errors(self):\n        tc1 = _tskit.TableCollection(1)\n        self.get_example_tree_sequence().dump_tables(tc1)\n\n        def loader(*args):\n            tc = _tskit.TableCollection(1)\n            tc.load(*args)\n\n        for func in [tc1.dump, loader]:\n            with pytest.raises(TypeError):\n                func()\n            for bad_type in [None, [], {}]:\n                with pytest.raises(TypeError):\n                    func(bad_type)\n\n    def test_file_format_eof_error(self, tmp_path):\n        with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n            f.write(b\"\")\n        with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n            tc2 = _tskit.TableCollection()\n            with pytest.raises(EOFError):\n                tc2.load(f)\n\n    def test_file_format_kas_error(self, tmp_path):\n        tc1 = _tskit.TableCollection(1)\n        self.get_example_tree_sequence().dump_tables(tc1)\n        with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n            tc1.dump(f)\n        with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n            f.seek(1)\n            tc2 = _tskit.TableCollection()\n            with pytest.raises(_tskit.FileFormatError):\n                tc2.load(f)\n\n    def test_dump_equality(self, tmp_path):\n        for ts in self.get_example_tree_sequences():\n            tc = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts.dump_tables(tc)\n            with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n                tc.dump(f)\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                tc2 = _tskit.TableCollection()\n                tc2.load(f)\n            assert tc.equals(tc2)\n\n    def test_reference_deletion(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        # Get references to all the tables\n        tables = [\n            tc.individuals,\n            tc.nodes,\n            tc.edges,\n            tc.migrations,\n            tc.sites,\n            tc.mutations,\n            tc.populations,\n            tc.provenances,\n        ]\n        del tc\n        for _ in range(10):\n            for table in tables:\n                assert len(str(table)) > 0\n\n    def test_set_sequence_length_errors(self):\n        tables = _tskit.TableCollection(1)\n        with pytest.raises(TypeError):\n            del tables.sequence_length\n        for bad_value in [\"sdf\", None, []]:\n            with pytest.raises(TypeError):\n                tables.sequence_length = bad_value\n\n    def test_set_sequence_length(self):\n        tables = _tskit.TableCollection(1)\n        assert tables.sequence_length == 1\n        for value in [-1, 1e6, 1e-22, 1000, 2**32, -10000]:\n            tables.sequence_length = value\n            assert tables.sequence_length == value\n\n    def test_set_time_units_errors(self):\n        tables = _tskit.TableCollection(1)\n        with pytest.raises(AttributeError):\n            del tables.time_units\n        for bad_value in [b\"no bytes\", 59, 43.4, None, []]:\n            with pytest.raises(TypeError):\n                tables.time_units = bad_value\n\n    def test_set_time_units(self):\n        tables = _tskit.TableCollection(1)\n        assert tables.time_units == tskit.TIME_UNITS_UNKNOWN\n        for value in [\"foo\", \"\", \"💩\", \"null char \\0 in string\"]:\n            tables.time_units = value\n            assert tables.time_units == value\n\n    def test_set_metadata_errors(self):\n        tables = _tskit.TableCollection(1)\n        with pytest.raises(AttributeError):\n            del tables.metadata\n        for bad_value in [\"no bytes\", 59, 43.4, None, []]:\n            with pytest.raises(TypeError):\n                tables.metadata = bad_value\n\n    def test_set_metadata(self):\n        tables = _tskit.TableCollection(1)\n        assert tables.metadata == b\"\"\n        for value in [b\"foo\", b\"\", \"💩\".encode(), b\"null char \\0 in string\"]:\n            tables.metadata = value\n            tables.metadata_schema = \"Test we have two separate fields\"\n            assert tables.metadata == value\n\n    def test_set_metadata_schema_errors(self):\n        tables = _tskit.TableCollection(1)\n        with pytest.raises(AttributeError):\n            del tables.metadata_schema\n        for bad_value in [59, 43.4, None, []]:\n            with pytest.raises(TypeError):\n                tables.metadata_schema = bad_value\n\n    def test_set_metadata_schema(self):\n        tables = _tskit.TableCollection(1)\n        assert tables.metadata_schema == \"\"\n        for value in [\"foo\", \"\", \"💩\", \"null char \\0 in string\"]:\n            tables.metadata_schema = value\n            tables.metadata = b\"Test we have two separate fields\"\n            assert tables.metadata_schema == value\n\n    def test_simplify_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        with pytest.raises(TypeError):\n            tc.simplify()\n        with pytest.raises(ValueError):\n            tc.simplify(\"asdf\")\n        with pytest.raises(TypeError):\n            tc.simplify([0, 1], keep_unary=\"sdf\")\n        with pytest.raises(TypeError):\n            tc.simplify([0, 1], keep_unary_in_individuals=\"abc\")\n        with pytest.raises(TypeError):\n            tc.simplify([0, 1], keep_input_roots=\"sdf\")\n        with pytest.raises(TypeError):\n            tc.simplify([0, 1], filter_populations=\"x\")\n        with pytest.raises(TypeError):\n            tc.simplify([0, 1], filter_nodes=\"x\")\n        with pytest.raises(TypeError):\n            tc.simplify([0, 1], update_sample_flags=\"x\")\n        with pytest.raises(_tskit.LibraryError):\n            tc.simplify([0, -1])\n\n    @pytest.mark.parametrize(\"value\", [True, False])\n    @pytest.mark.parametrize(\n        \"flag\",\n        [\n            \"filter_sites\",\n            \"filter_populations\",\n            \"filter_individuals\",\n            \"filter_nodes\",\n            \"update_sample_flags\",\n            \"reduce_to_site_topology\",\n            \"keep_unary\",\n            \"keep_unary_in_individuals\",\n            \"keep_input_roots\",\n        ],\n    )\n    def test_simplify_flags(self, flag, value):\n        tables = _tskit.TableCollection(1)\n        tables.simplify([], **{flag: value})\n\n    def test_link_ancestors_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        with pytest.raises(TypeError):\n            tc.link_ancestors()\n        with pytest.raises(TypeError):\n            tc.link_ancestors([0, 1])\n        with pytest.raises(ValueError):\n            tc.link_ancestors(samples=[0, 1], ancestors=\"sdf\")\n        with pytest.raises(ValueError):\n            tc.link_ancestors(samples=\"sdf\", ancestors=[0, 1])\n        with pytest.raises(_tskit.LibraryError):\n            tc.link_ancestors(samples=[0, 1], ancestors=[11, -1])\n        with pytest.raises(_tskit.LibraryError):\n            tc.link_ancestors(samples=[0, -1], ancestors=[11])\n\n    def test_link_ancestors(self):\n        ts = msprime.simulate(2, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        edges = tc.link_ancestors([0, 1], [3])\n        assert isinstance(edges, _tskit.EdgeTable)\n        del edges\n        assert tc.edges.num_rows == 2\n\n    def test_subset_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        with pytest.raises(TypeError):\n            tc.subset(np.array([\"a\"]))\n        with pytest.raises(ValueError):\n            tc.subset(np.array([[1], [2]], dtype=\"int32\"))\n        with pytest.raises(TypeError):\n            tc.subset()\n        with pytest.raises(_tskit.LibraryError):\n            tc.subset(np.array([100, 200], dtype=\"int32\"))\n\n    def test_union_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        tc2 = tc\n        with pytest.raises(TypeError):\n            tc.union(tc2, np.array([\"a\"]))\n        with pytest.raises(ValueError):\n            tc.union(tc2, np.array([0], dtype=\"int32\"))\n        with pytest.raises(TypeError):\n            tc.union(tc2)\n        with pytest.raises(TypeError):\n            tc.union()\n        node_mapping = np.arange(ts.num_nodes, dtype=\"int32\")\n        node_mapping[0] = 1200\n        with pytest.raises(_tskit.LibraryError):\n            tc.union(tc2, node_mapping)\n        node_mapping = np.array(\n            [node_mapping.tolist(), node_mapping.tolist()], dtype=\"int32\"\n        )\n        with pytest.raises(ValueError):\n            tc.union(tc2, node_mapping)\n        with pytest.raises(ValueError):\n            tc.union(tc2, np.array([[1], [2]], dtype=\"int32\"))\n\n    @pytest.mark.parametrize(\"value\", [True, False])\n    @pytest.mark.parametrize(\n        \"flag\",\n        [\n            \"all_edges\",\n            \"all_mutations\",\n            \"check_shared_equality\",\n            \"add_populations\",\n        ],\n    )\n    def test_union_options(self, flag, value):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        empty_tables = ts.dump_tables()\n        for table in empty_tables.table_name_map.keys():\n            getattr(empty_tables, table).clear()\n        tc2 = empty_tables._ll_tables\n        tc.union(tc2, np.arange(0, dtype=\"int32\"), **{flag: value})\n\n    def test_equals_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1242)\n        tc = ts.dump_tables()._ll_tables\n        with pytest.raises(TypeError):\n            tc.equals()\n        with pytest.raises(TypeError):\n            tc.equals(None)\n        assert tc.equals(tc)\n        with pytest.raises(TypeError):\n            tc.equals(tc, no_such_arg=1)\n        bad_bool = \"x\"\n        with pytest.raises(TypeError):\n            tc.equals(tc, ignore_metadata=bad_bool)\n        with pytest.raises(TypeError):\n            tc.equals(tc, ignore_ts_metadata=bad_bool)\n        with pytest.raises(TypeError):\n            tc.equals(tc, ignore_provenance=bad_bool)\n        with pytest.raises(TypeError):\n            tc.equals(tc, ignore_timestamps=bad_bool)\n        with pytest.raises(TypeError):\n            tc.equals(tc, ignore_tables=bad_bool)\n        with pytest.raises(TypeError):\n            tc.equals(tc, ignore_reference_sequence=bad_bool)\n\n    def test_asdict(self):\n        for ts in self.get_example_tree_sequences():\n            tc = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts.dump_tables(tc)\n            d = tc.asdict()\n            # Method is tested extensively elsewhere, just basic sanity check here\n            assert isinstance(d, dict)\n            assert len(d) > 0\n\n    def test_fromdict(self):\n        for ts in self.get_example_tree_sequences():\n            tc1 = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts.dump_tables(tc1)\n            d = tc1.asdict()\n            tc2 = _tskit.TableCollection(sequence_length=0)\n            tc2.fromdict(d)\n            assert tc1.equals(tc2)\n\n    def test_asdict_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1242)\n        tc = ts.dump_tables()._ll_tables\n        for bad_type in [None, 0.1, \"str\"]:\n            with pytest.raises(TypeError):\n                tc.asdict(force_offset_64=bad_type)\n\n    def test_fromdict_bad_args(self):\n        tc = _tskit.TableCollection(0)\n        for bad_type in [None, 0.1, \"str\"]:\n            with pytest.raises(TypeError):\n                tc.fromdict(bad_type)\n\n    def test_sort_individuals(self):\n        tc = _tskit.TableCollection(1)\n        tc.sort_individuals()\n\n    def test_delete_older_bad_args(self):\n        tc = _tskit.TableCollection(1)\n        self.get_example_tree_sequence().dump_tables(tc)\n        with pytest.raises(TypeError):\n            tc.delete_older()\n        with pytest.raises(TypeError):\n            tc.delete_older(\"1234\")\n\n\nclass TestIbd:\n    def test_uninitialised(self):\n        result = _tskit.IdentitySegments.__new__(_tskit.IdentitySegments)\n        with pytest.raises(SystemError):\n            result.get(0, 1)\n        with pytest.raises(SystemError):\n            result.print_state()\n        with pytest.raises(SystemError):\n            result.num_segments\n        with pytest.raises(SystemError):\n            result.total_span\n        with pytest.raises(SystemError):\n            result.get_keys()\n\n    def test_get_keys(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        pairs = [[0, 1], [0, 2], [1, 2]]\n        result = tc.ibd_segments_within([0, 1, 2], store_pairs=True)\n        np.testing.assert_array_equal(result.get_keys(), pairs)\n\n    def test_store_pairs(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        # By default we can't get any information about pairs.\n        result = tc.ibd_segments_within()\n        with pytest.raises(_tskit.IdentityPairsNotStoredError):\n            result.get_keys()\n        with pytest.raises(_tskit.IdentityPairsNotStoredError):\n            result.num_pairs\n        with pytest.raises(_tskit.IdentityPairsNotStoredError):\n            result.get(0, 1)\n\n        num_pairs = 45\n        result = tc.ibd_segments_within(store_pairs=True)\n        assert len(result.get_keys()) == num_pairs\n        assert result.num_pairs == num_pairs\n\n        seglist = result.get(0, 1)\n        assert seglist.num_segments == 1\n        assert seglist.total_span == 1\n        with pytest.raises(_tskit.IdentitySegmentsNotStoredError):\n            seglist.node\n        with pytest.raises(_tskit.IdentitySegmentsNotStoredError):\n            seglist.left\n        with pytest.raises(_tskit.IdentitySegmentsNotStoredError):\n            seglist.right\n\n    def test_within_all_pairs(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        num_pairs = ts.num_samples * (ts.num_samples - 1) / 2\n        result = tc.ibd_segments_within(store_pairs=True)\n        assert result.num_pairs == num_pairs\n        pairs = np.array(list(itertools.combinations(range(ts.num_samples), 2)))\n        np.testing.assert_array_equal(result.get_keys(), pairs)\n\n    def test_between_all_pairs(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        result = tc.ibd_segments_between([5, 5], range(10), store_pairs=True)\n        assert result.num_pairs == 25\n        pairs = np.array(list(itertools.product(range(5), range(5, 10))))\n        np.testing.assert_array_equal(result.get_keys(), pairs)\n\n    def test_within_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        for bad_samples in [\"sdf\", {}]:\n            with pytest.raises(ValueError):\n                tc.ibd_segments_within(bad_samples)\n        # input array must be 1D\n        with pytest.raises(ValueError):\n            tc.ibd_segments_within([[[1], [1]]])\n        for bad_float in [\"sdf\", None, {}]:\n            with pytest.raises(TypeError):\n                tc.ibd_segments_within(min_span=bad_float)\n            with pytest.raises(TypeError):\n                tc.ibd_segments_within(max_time=bad_float)\n        with pytest.raises(_tskit.LibraryError):\n            tc.ibd_segments_within(max_time=-1)\n        with pytest.raises(_tskit.LibraryError):\n            tc.ibd_segments_within(min_span=-1)\n\n    def test_between_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        with pytest.raises(TypeError):\n            tc.ibd_segments_between()\n        with pytest.raises(TypeError):\n            tc.ibd_segments_between([1])\n\n        with pytest.raises(ValueError):\n            tc.ibd_segments_between(\"sdf\", [1, 2])\n        with pytest.raises(ValueError):\n            tc.ibd_segments_between([1, 2], \"sdf\")\n        # The sample_set parsing code is tested elsewhere, so just test\n        # something basic.\n        with pytest.raises(ValueError, match=\"Sum of sample_set_sizes\"):\n            tc.ibd_segments_between([1, 1], [1])\n        for bad_float in [\"sdf\", None, {}]:\n            with pytest.raises(TypeError):\n                tc.ibd_segments_between([1, 1], [0, 1], min_span=bad_float)\n            with pytest.raises(TypeError):\n                tc.ibd_segments_between([1, 1], [0, 1], max_time=bad_float)\n        with pytest.raises(_tskit.LibraryError):\n            tc.ibd_segments_between([1, 1], [0, 1], min_span=-1)\n        with pytest.raises(_tskit.LibraryError):\n            tc.ibd_segments_between([1, 1], [0, 1], max_time=-1)\n        with pytest.raises(_tskit.LibraryError, match=\"Duplicate sample\"):\n            tc.ibd_segments_between([1, 1], [0, 0])\n\n    def test_get_output(self):\n        ts = msprime.simulate(5, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        pairs = [(0, 1), (2, 3)]\n        result = tc.ibd_segments_within([0, 1, 2, 3], store_segments=True)\n        assert isinstance(result, _tskit.IdentitySegments)\n        for pair in pairs:\n            value = result.get(*pair)\n            assert isinstance(value, _tskit.IdentitySegmentList)\n            assert value.num_segments == 1\n            assert isinstance(value.left, np.ndarray)\n            assert isinstance(value.right, np.ndarray)\n            assert isinstance(value.node, np.ndarray)\n            assert list(value.left) == [0]\n            assert list(value.right) == [1]\n            assert len(value.node) == 1\n            assert value.num_segments == 1\n            assert value.total_span == 1\n\n    def test_get_bad_args(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        result = tc.ibd_segments_within([0, 1, 2], store_segments=True)\n        with pytest.raises(TypeError):\n            result.get()\n        with pytest.raises(TypeError):\n            result.get(\"0\", 1)\n        with pytest.raises(_tskit.LibraryError, match=\"Both nodes\"):\n            result.get(0, 0)\n        with pytest.raises(_tskit.LibraryError, match=\"Node out of bounds\"):\n            result.get(-1, 0)\n        with pytest.raises(_tskit.LibraryError, match=\"Node out of bounds\"):\n            result.get(0, 100)\n        with pytest.raises(KeyError):\n            result.get(0, 3)\n\n    def test_print_state(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        result = tc.ibd_segments_within()\n        with pytest.raises(TypeError):\n            result.print_state()\n\n        with tempfile.TemporaryFile(\"w+\") as f:\n            result.print_state(f)\n            f.seek(0)\n            output = f.read()\n        assert len(output) > 0\n        assert \"IBD\" in output\n\n    def test_direct_instantiation(self):\n        # Nobody should do this, but just in case\n        result = _tskit.IdentitySegments()\n        assert result.num_segments == 0\n        assert result.total_span == 0\n        with tempfile.TemporaryFile(\"w+\") as f:\n            result.print_state(f)\n            f.seek(0)\n            output = f.read()\n        assert len(output) > 0\n        assert \"IBD\" in output\n\n\nclass TestIdentitySegmentList:\n    def test_direct_instantiation(self):\n        # Nobody should do this, but just in case\n        seglist = _tskit.IdentitySegmentList()\n        attrs = [\"num_segments\", \"total_span\", \"left\", \"right\", \"node\"]\n        for attr in attrs:\n            with pytest.raises(SystemError, match=\"not initialised\"):\n                getattr(seglist, attr)\n\n    def test_memory_management_within(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        result = tc.ibd_segments_within(store_segments=True)\n        del ts, tc\n        lst = result.get(0, 1)\n        assert lst.num_segments == 1\n        del result\n        gc.collect()\n        assert lst.num_segments == 1\n        # Do some allocs to see if we're still working properly\n        x = sum(list(range(1000)))\n        assert x > 0\n        assert lst.num_segments == 1\n\n    def test_memory_management_between(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tc = ts.dump_tables()._ll_tables\n        result = tc.ibd_segments_between([2, 2], range(4), store_segments=True)\n        del ts, tc\n        lst = result.get(0, 2)\n        assert lst.num_segments == 1\n        del result\n        gc.collect()\n        assert lst.num_segments == 1\n        # Do some allocs to see if we're still working properly\n        x = sum(list(range(1000)))\n        assert x > 0\n        assert lst.num_segments == 1\n\n\nclass TestTableMethods:\n    \"\"\"\n    Tests for the low-level table methods.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    def test_table_extend(self, table_name, ts_fixture):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        assert len(table) >= 5\n        ll_table = table.ll_table\n        table_copy = table.copy()\n\n        ll_table.extend(table_copy.ll_table, row_indexes=[])\n        assert table == table_copy\n\n        ll_table.clear()\n        ll_table.extend(table_copy.ll_table, row_indexes=range(len(table_copy)))\n        assert table == table_copy\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    @pytest.mark.parametrize(\n        [\"row_indexes\", \"expected_rows\"],\n        [\n            ([0], [0]),\n            ([4] * 1000, [4] * 1000),\n            ([4, 1, 3, 0, 0], [4, 1, 3, 0, 0]),\n            (np.array([0, 1, 4], dtype=np.uint8), [0, 1, 4]),\n            (np.array([3, 3, 3], dtype=np.uint16), [3, 3, 3]),\n            (np.array([4, 2, 1], dtype=np.int8), [4, 2, 1]),\n            (np.array([4, 2], dtype=np.int16), [4, 2]),\n            (np.array([0, 1], dtype=np.int32), [0, 1]),\n            (range(2, -1, -1), [2, 1, 0]),\n        ],\n    )\n    def test_table_extend_types(\n        self, ts_fixture, table_name, row_indexes, expected_rows\n    ):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        assert len(table) >= 5\n        ll_table = table.ll_table\n        table_copy = table.copy()\n\n        ll_table.extend(table_copy.ll_table, row_indexes=row_indexes)\n        assert len(table) == len(table_copy) + len(expected_rows)\n        for i, expected_row in enumerate(expected_rows):\n            assert table[len(table_copy) + i] == table_copy[expected_row]\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    def test_table_keep_rows_errors(self, table_name, ts_fixture):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        n = len(table)\n        ll_table = table.ll_table\n        with pytest.raises(ValueError, match=\"must be of length\"):\n            ll_table.keep_rows(np.ones(n - 1, dtype=bool))\n        with pytest.raises(ValueError, match=\"must be of length\"):\n            ll_table.keep_rows(np.ones(n + 1, dtype=bool))\n        with pytest.raises(TypeError, match=\"Cannot cast\"):\n            ll_table.keep_rows(np.ones(n, dtype=int))\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    def test_table_keep_rows_all(self, table_name, ts_fixture):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        n = len(table)\n        ll_table = table.ll_table\n        a = ll_table.keep_rows(np.ones(n, dtype=bool))\n        assert ll_table.num_rows == n\n        assert a.shape == (n,)\n        assert a.dtype == np.int32\n        assert np.all(a == np.arange(n))\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    def test_table_keep_rows_none(self, table_name, ts_fixture):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        n = len(table)\n        ll_table = table.ll_table\n        a = ll_table.keep_rows(np.zeros(n, dtype=bool))\n        assert ll_table.num_rows == 0\n        assert a.shape == (n,)\n        assert a.dtype == np.int32\n        assert np.all(a == -1)\n\n    def test_mutation_table_keep_rows_ref_error(self):\n        table = _tskit.MutationTable()\n        table.add_row(site=0, node=0, derived_state=\"A\", parent=2)\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_MUTATION_OUT_OF_BOUNDS\"):\n            table.keep_rows([True])\n\n    def test_individual_table_keep_rows_ref_error(self):\n        table = _tskit.IndividualTable()\n        table.add_row(parents=[2])\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS\"\n        ):\n            table.keep_rows([True])\n\n    @pytest.mark.parametrize(\n        [\"table_name\", \"column_name\"],\n        [\n            (t, c)\n            for t in tskit.TABLE_NAMES\n            for c in getattr(tskit, f\"{t[:-1].capitalize()}Table\").column_names\n            if c[-7:] != \"_offset\"\n        ],\n    )\n    def test_table_update(self, ts_fixture, table_name, column_name):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        copy = table.copy()\n        ll_table = table.ll_table\n\n        # Find the first row where this column differs to get a value to swap in\n        other_row_index = -1\n        for i, row in enumerate(table):\n            if not np.array_equal(\n                getattr(table[0], column_name), getattr(row, column_name)\n            ):\n                other_row_index = i\n        assert other_row_index != -1\n\n        # No-op update should not create a change\n        args = ll_table.get_row(0)\n        ll_table.update_row(0, *args)\n        table.assert_equals(copy)\n\n        # Modify the column under test in the first row\n        new_args = list(ll_table.get_row(0))\n        arg_index = list(inspect.signature(table.add_row).parameters.keys()).index(\n            column_name\n        )\n        new_args[arg_index] = ll_table.get_row(other_row_index)[arg_index]\n        ll_table.update_row(0, *new_args)\n        for a, b in zip(ll_table.get_row(0), new_args):\n            np.array_equal(a, b)\n\n    def test_update_defaults(self):\n        t = tskit.IndividualTable()\n        assert t.add_row(flags=1, location=[1, 2], parents=[3, 4], metadata=b\"FOO\") == 0\n        t.ll_table.update_row(0)\n        assert t.flags[0] == 0\n        assert len(t.location) == 0\n        assert t.location_offset[0] == 0\n        assert len(t.parents) == 0\n        assert t.parents_offset[0] == 0\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n        t = tskit.NodeTable()\n        assert (\n            t.add_row(flags=1, time=2, population=3, individual=4, metadata=b\"FOO\") == 0\n        )\n        t.ll_table.update_row(0)\n        assert t.time[0] == 0\n        assert t.flags[0] == 0\n        assert t.population[0] == tskit.NULL\n        assert t.individual[0] == tskit.NULL\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n        t = tskit.EdgeTable()\n        assert t.add_row(1, 2, 3, 4, metadata=b\"FOO\") == 0\n        t.ll_table.update_row(0, 1, 2, 3, 4)\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n        t = tskit.MigrationTable()\n        assert t.add_row(1, 2, 3, 4, 5, 6, b\"FOO\") == 0\n        t.ll_table.update_row(0, 1, 2, 3, 4, 5, 6)\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n        t = tskit.MutationTable()\n        assert t.add_row(1, 2, \"A\", 3, b\"FOO\", 4) == 0\n        t.ll_table.update_row(0, 1, 2, \"A\", 3)\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n        assert tskit.is_unknown_time(t.time[0])\n\n        t = tskit.PopulationTable()\n        assert t.add_row(b\"FOO\") == 0\n        t.ll_table.update_row(0)\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n    def test_update_bad_data(self):\n        t = tskit.IndividualTable()\n        t.add_row()\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, flags=\"x\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, metadata=123)\n        with pytest.raises(ValueError):\n            t.ll_table.update_row(0, location=\"1234\")\n        with pytest.raises(ValueError):\n            t.ll_table.update_row(0, parents=\"forty-two\")\n\n        t = tskit.NodeTable()\n        t.add_row()\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, flags=\"x\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, time=\"x\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, individual=\"x\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, population=\"x\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, metadata=123)\n\n        t = tskit.EdgeTable()\n        t.add_row(1, 2, 3, 4)\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, left=\"x\", right=0, parent=0, child=0)\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(\n                0,\n            )\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, 0, 0, 0, metadata=123)\n\n        t = tskit.SiteTable()\n        t.add_row(0, \"A\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, \"x\", \"A\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, 0)\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, \"A\", metadata=[0, 1, 2])\n\n        t = tskit.MutationTable()\n        t.add_row(0, 0, \"A\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, \"0\", 0, \"A\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, \"0\", \"A\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, 0, \"A\", parent=None)\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, 0, \"A\", metadata=[0])\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, 0, \"A\", time=\"A\")\n\n        t = tskit.MigrationTable()\n        with pytest.raises(TypeError):\n            t.add_row(left=\"x\", right=0, node=0, source=0, dest=0, time=0)\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(\n                0,\n            )\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, 0, 0, 0, 0, 0, metadata=123)\n\n        t = tskit.ProvenanceTable()\n        t.add_row(\"a\", \"b\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, 0, \"b\")\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, \"a\", 0)\n\n        t = tskit.PopulationTable()\n        t.add_row()\n        with pytest.raises(TypeError):\n            t.ll_table.update_row(0, metadata=[0])\n\n\nclass TestTableMethodsErrors:\n    \"\"\"\n    Tests for the error handling of errors in the low-level tables.\n    \"\"\"\n\n    def yield_tables(self, ts):\n        for table in ts.dump_tables().table_name_map.values():\n            yield table.ll_table\n\n    @pytest.mark.parametrize(\n        \"table_name\",\n        tskit.TABLE_NAMES,\n    )\n    def test_table_extend_bad_args(self, ts_fixture, table_name):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        ll_table = table.ll_table\n        ll_table_copy = table.copy().ll_table\n\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"Tables can only be extended using rows from a different table\",\n        ):\n            ll_table.extend(ll_table, row_indexes=[])\n        with pytest.raises(TypeError):\n            ll_table.extend(None, row_indexes=[])\n        with pytest.raises(ValueError):\n            ll_table.extend(ll_table_copy, row_indexes=5)\n        with pytest.raises(TypeError):\n            ll_table.extend(ll_table_copy, row_indexes=[None])\n        with pytest.raises(ValueError, match=\"array\"):\n            ll_table.extend(ll_table_copy, row_indexes=[[0, 1], [2, 3]])\n        with pytest.raises(ValueError, match=\"array\"):\n            ll_table.extend(ll_table_copy, row_indexes=[[0, 1]])\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            ll_table.extend(ll_table_copy, row_indexes=[-1])\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            ll_table.extend(ll_table_copy, row_indexes=[1000])\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            ll_table.extend(ll_table_copy, row_indexes=range(10000000, 10000001))\n\n        # Uncastable types\n        for dtype in [np.uint32, np.int64, np.uint64, np.float32, np.float64]:\n            with pytest.raises(TypeError, match=\"Cannot cast\"):\n                ll_table.extend(ll_table_copy, row_indexes=np.array([0], dtype=dtype))\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    def test_update_bad_row_index(self, ts_fixture, table_name):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        ll_table = table.ll_table\n        row_data = ll_table.get_row(0)\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            ll_table.update_row(-1, *row_data)\n        with pytest.raises(ValueError, match=\"tskit ids must be\"):\n            ll_table.update_row(-42, *row_data)\n        with pytest.raises(TypeError):\n            ll_table.update_row([], *row_data)\n        with pytest.raises(TypeError):\n            ll_table.update_row(\"abc\", *row_data)\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            ll_table.update_row(10000, *row_data)\n        with pytest.raises(OverflowError, match=\"Value too large for tskit id type\"):\n            ll_table.update_row(2**62, *row_data)\n\n    def test_equals_bad_args(self, ts_fixture):\n        for ll_table in self.yield_tables(ts_fixture):\n            assert ll_table.equals(ll_table)\n            with pytest.raises(TypeError):\n                ll_table.equals(None)\n            with pytest.raises(TypeError):\n                ll_table.equals(ll_table, no_such_arg=\"\")\n            uninit_other = type(ll_table).__new__(type(ll_table))\n            with pytest.raises(SystemError):\n                ll_table.equals(uninit_other)\n\n    def test_get_row_bad_args(self, ts_fixture):\n        for ll_table in self.yield_tables(ts_fixture):\n            assert ll_table.get_row(0) is not None\n            with pytest.raises(TypeError):\n                ll_table.get_row(no_such_arg=\"\")\n\n    @pytest.mark.parametrize(\"table\", [\"nodes\", \"individuals\"])\n    def test_flag_underflow_overflow(self, table):\n        tables = _tskit.TableCollection(1)\n        table = getattr(tables, table)\n        table.add_row(flags=0)\n        table.add_row(flags=(1 << 32) - 1)\n        with pytest.raises(OverflowError, match=\"unsigned int32 >= than 2\\\\^32\"):\n            table.add_row(flags=1 << 32)\n        with pytest.raises(OverflowError, match=\"int too big to convert\"):\n            table.add_row(flags=1 << 64)\n        with pytest.raises(OverflowError, match=\"int too big to convert\"):\n            table.add_row(flags=1 << 256)\n        with pytest.raises(\n            ValueError, match=\"Can't convert negative value to unsigned int\"\n        ):\n            table.add_row(flags=-1)\n\n    def test_index(self):\n        tc = msprime.simulate(10, random_seed=42).dump_tables()._ll_tables\n        assert tc.indexes[\"edge_insertion_order\"].dtype == np.int32\n        assert tc.indexes[\"edge_removal_order\"].dtype == np.int32\n        assert np.array_equal(\n            tc.indexes[\"edge_insertion_order\"], np.arange(18, dtype=np.int32)\n        )\n        assert np.array_equal(\n            tc.indexes[\"edge_removal_order\"], np.arange(18, dtype=np.int32)[::-1]\n        )\n        tc.drop_index()\n        assert tc.indexes == {}\n        tc.build_index()\n        assert np.array_equal(\n            tc.indexes[\"edge_insertion_order\"], np.arange(18, dtype=np.int32)\n        )\n        assert np.array_equal(\n            tc.indexes[\"edge_removal_order\"], np.arange(18, dtype=np.int32)[::-1]\n        )\n\n        modify_indexes = tc.indexes\n        modify_indexes[\"edge_insertion_order\"] = np.arange(42, 42 + 18, dtype=np.int32)\n        modify_indexes[\"edge_removal_order\"] = np.arange(4242, 4242 + 18, dtype=np.int32)\n        tc.indexes = modify_indexes\n        assert np.array_equal(\n            tc.indexes[\"edge_insertion_order\"], np.arange(42, 42 + 18, dtype=np.int32)\n        )\n        assert np.array_equal(\n            tc.indexes[\"edge_removal_order\"], np.arange(4242, 4242 + 18, dtype=np.int32)\n        )\n\n    def test_no_indexes(self):\n        tc = msprime.simulate(10, random_seed=42).dump_tables()._ll_tables\n        tc.drop_index()\n        assert tc.indexes == {}\n\n    def test_bad_indexes(self):\n        tc = msprime.simulate(10, random_seed=42).dump_tables()._ll_tables\n        for col in (\"insertion\", \"removal\"):\n            d = tc.indexes\n            d[f\"edge_{col}_order\"] = d[f\"edge_{col}_order\"][:-1]\n            with pytest.raises(\n                ValueError,\n                match=\"^edge_insertion_order and\"\n                \" edge_removal_order must be the same\"\n                \" length$\",\n            ):\n                tc.indexes = d\n        d = tc.indexes\n        for col in (\"insertion\", \"removal\"):\n            d[f\"edge_{col}_order\"] = d[f\"edge_{col}_order\"][:-1]\n        with pytest.raises(\n            ValueError,\n            match=\"^edge_insertion_order and edge_removal_order must be\"\n            \" the same length as the number of edges$\",\n        ):\n            tc.indexes = d\n\n        # Both columns must be provided, if one is\n        for col in (\"insertion\", \"removal\"):\n            d = tc.indexes\n            del d[f\"edge_{col}_order\"]\n            with pytest.raises(\n                TypeError,\n                match=\"^edge_insertion_order and \"\n                \"edge_removal_order must be specified \"\n                \"together$\",\n            ):\n                tc.indexes = d\n\n        tc = (\n            msprime.simulate(10, recombination_rate=10, random_seed=42)\n            .dump_tables()\n            ._ll_tables\n        )\n        modify_indexes = tc.indexes\n        shape = modify_indexes[\"edge_insertion_order\"].shape\n        modify_indexes[\"edge_insertion_order\"] = np.zeros(shape, dtype=np.int32)\n        modify_indexes[\"edge_removal_order\"] = np.zeros(shape, dtype=np.int32)\n        tc.indexes = modify_indexes\n        ts = _tskit.TreeSequence()\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"TSK_ERR_TABLES_BAD_INDEXES\",\n        ):\n            ts.load_tables(tc, build_indexes=False)\n\n        modify_indexes[\"edge_insertion_order\"] = np.full(shape, 2**30, dtype=np.int32)\n        modify_indexes[\"edge_removal_order\"] = np.full(shape, 2**30, dtype=np.int32)\n        tc.indexes = modify_indexes\n        ts = _tskit.TreeSequence()\n        with pytest.raises(_tskit.LibraryError, match=\"Edge out of bounds\"):\n            ts.load_tables(tc, build_indexes=False)\n\n\nclass TestTreeSequence(LowLevelTestCase, MetadataTestMixin):\n    \"\"\"\n    Tests for the low-level interface for the TreeSequence.\n    \"\"\"\n\n    ARRAY_NAMES = [\n        \"individuals_flags\",\n        \"individuals_location\",\n        \"individuals_location_offset\",\n        \"individuals_parents\",\n        \"individuals_parents_offset\",\n        \"individuals_metadata\",\n        \"individuals_metadata_offset\",\n        \"nodes_time\",\n        \"nodes_flags\",\n        \"nodes_population\",\n        \"nodes_individual\",\n        \"nodes_metadata\",\n        \"nodes_metadata_offset\",\n        \"edges_left\",\n        \"edges_right\",\n        \"edges_parent\",\n        \"edges_child\",\n        \"edges_metadata\",\n        \"edges_metadata_offset\",\n        \"sites_position\",\n        \"sites_metadata\",\n        \"sites_metadata_offset\",\n        \"mutations_site\",\n        \"mutations_node\",\n        \"mutations_parent\",\n        \"mutations_time\",\n        \"mutations_metadata\",\n        \"mutations_metadata_offset\",\n        \"sites_ancestral_state\",\n        \"sites_ancestral_state_offset\",\n        \"mutations_derived_state\",\n        \"mutations_derived_state_offset\",\n        \"provenances_record\",\n        \"provenances_record_offset\",\n        \"provenances_timestamp\",\n        \"provenances_timestamp_offset\",\n        \"migrations_left\",\n        \"migrations_right\",\n        \"migrations_node\",\n        \"migrations_source\",\n        \"migrations_dest\",\n        \"migrations_time\",\n        \"migrations_metadata\",\n        \"migrations_metadata_offset\",\n        \"populations_metadata\",\n        \"populations_metadata_offset\",\n        \"indexes_edge_insertion_order\",\n        \"indexes_edge_removal_order\",\n    ]\n\n    def setUp(self):\n        fd, self.temp_file = tempfile.mkstemp(prefix=\"msp_ll_ts_\")\n        os.close(fd)\n\n    def tearDown(self):\n        os.unlink(self.temp_file)\n\n    def test_skip_tables(self, tmp_path):\n        ts = self.get_example_tree_sequence()\n        with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n            ts.dump(f)\n        tc = _tskit.TableCollection(1)\n        ts.dump_tables(tc)\n\n        for good_bool in [1, True]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                ts_skip = _tskit.TreeSequence()\n                ts_skip.load(f, skip_tables=good_bool)\n            tc_skip = _tskit.TableCollection()\n            ts_skip.dump_tables(tc_skip)\n            assert not tc.equals(tc_skip)\n            assert tc.equals(tc_skip, ignore_tables=True)\n\n        for bad_bool in [\"x\", 0.5, {}]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                ts_skip = _tskit.TreeSequence()\n                with pytest.raises(TypeError):\n                    ts_skip.load(f, skip_tables=bad_bool)\n\n    def test_skip_reference_sequence(self, tmp_path):\n        tc = _tskit.TableCollection(1)\n        self.get_example_tree_sequence().dump_tables(tc)\n        tc.reference_sequence.data = \"ACGT\"\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        with open(tmp_path / \"tmp.trees\", \"wb\") as f:\n            ts.dump(f)\n\n        for good_bool in [1, True]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                ts_skip = _tskit.TreeSequence()\n                ts_skip.load(f, skip_reference_sequence=good_bool)\n            tc_skip = _tskit.TableCollection()\n            ts_skip.dump_tables(tc_skip)\n            assert not tc.equals(tc_skip)\n            assert tc.equals(tc_skip, ignore_reference_sequence=True)\n\n        for bad_bool in [\"x\", 0.5, {}]:\n            with open(tmp_path / \"tmp.trees\", \"rb\") as f:\n                ts_skip = _tskit.TreeSequence()\n                with pytest.raises(TypeError):\n                    ts_skip.load(f, skip_reference_sequence=bad_bool)\n\n    def test_file_errors(self):\n        ts1 = self.get_example_tree_sequence()\n\n        def loader(*args):\n            ts2 = _tskit.TreeSequence()\n            ts2.load(*args)\n\n        for func in [ts1.dump, loader]:\n            with pytest.raises(TypeError):\n                func()\n            for bad_type in [None, [], {}]:\n                with pytest.raises(TypeError):\n                    func(bad_type)\n\n    def test_initial_state(self):\n        # Check the initial state to make sure that it is empty.\n        ts = _tskit.TreeSequence()\n        with pytest.raises(ValueError):\n            ts.get_num_samples()\n        with pytest.raises(ValueError):\n            ts.get_sequence_length()\n        with pytest.raises(ValueError):\n            ts.get_num_trees()\n        with pytest.raises(ValueError):\n            ts.get_num_edges()\n        with pytest.raises(ValueError):\n            ts.get_num_mutations()\n        with pytest.raises(ValueError):\n            ts.get_num_migrations()\n        with pytest.raises(ValueError):\n            ts.get_num_migrations()\n        with pytest.raises(ValueError):\n            ts.dump()\n\n    def test_num_nodes(self):\n        for ts in self.get_example_tree_sequences():\n            max_node = 0\n            for j in range(ts.get_num_edges()):\n                _, _, parent, child, _ = ts.get_edge(j)\n                for node in [parent, child]:\n                    if node > max_node:\n                        max_node = node\n            assert max_node + 1 == ts.get_num_nodes()\n\n    def test_dump_equality(self, tmp_path):\n        for ts in self.get_example_tree_sequences():\n            tables = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts.dump_tables(tables)\n            tables.compute_mutation_times()\n            ts = _tskit.TreeSequence()\n            ts.load_tables(tables)\n            with open(tmp_path / \"temp.trees\", \"wb\") as f:\n                ts.dump(f)\n            with open(tmp_path / \"temp.trees\", \"rb\") as f:\n                ts2 = _tskit.TreeSequence()\n                ts2.load(f)\n            tc = _tskit.TableCollection(ts.get_sequence_length())\n            ts.dump_tables(tc)\n            tc2 = _tskit.TableCollection(ts2.get_sequence_length())\n            ts2.dump_tables(tc2)\n            assert tc.equals(tc2)\n\n    def test_get_mutation_interface(self):\n        for ts in self.get_example_tree_sequences():\n            mutations = [ts.get_mutation(j) for j in range(ts.get_num_mutations())]\n            assert len(mutations) == ts.get_num_mutations()\n            # Check the form of the mutations\n            for packed in mutations:\n                (\n                    site,\n                    node,\n                    derived_state,\n                    parent,\n                    metadata,\n                    time,\n                    edge,\n                    inherited_state,\n                ) = packed\n                assert isinstance(site, int)\n                assert isinstance(node, int)\n                assert isinstance(derived_state, str)\n                assert isinstance(parent, int)\n                assert isinstance(metadata, bytes)\n                assert isinstance(time, float)\n                assert isinstance(edge, int)\n                assert isinstance(inherited_state, str)\n\n    def test_get_edge_interface(self):\n        for ts in self.get_example_tree_sequences():\n            num_edges = ts.get_num_edges()\n            # We don't accept Python negative indexes here.\n            with pytest.raises(IndexError):\n                ts.get_edge(-1)\n            for j in [0, 10, 10**6]:\n                with pytest.raises(IndexError):\n                    ts.get_edge(num_edges + j)\n            for x in [None, \"\", {}, []]:\n                with pytest.raises(TypeError):\n                    ts.get_edge(x)\n\n    def test_get_node_interface(self):\n        for ts in self.get_example_tree_sequences():\n            num_nodes = ts.get_num_nodes()\n            # We don't accept Python negative indexes here.\n            with pytest.raises(IndexError):\n                ts.get_node(-1)\n            for j in [0, 10, 10**6]:\n                with pytest.raises(IndexError):\n                    ts.get_node(num_nodes + j)\n            for x in [None, \"\", {}, []]:\n                with pytest.raises(TypeError):\n                    ts.get_node(x)\n\n    def test_get_migration_interface(self):\n        ts = self.get_example_migration_tree_sequence()\n        for bad_type in [\"\", None, {}]:\n            with pytest.raises(TypeError):\n                ts.get_migration(bad_type)\n        num_records = ts.get_num_migrations()\n        # We don't accept Python negative indexes here.\n        with pytest.raises(IndexError):\n            ts.get_migration(-1)\n        for j in [0, 10, 10**6]:\n            with pytest.raises(IndexError):\n                ts.get_migration(num_records + j)\n\n    def test_get_samples(self):\n        for ts in self.get_example_tree_sequences():\n            # get_samples takes no arguments.\n            with pytest.raises(TypeError):\n                ts.get_samples(0)\n            assert np.array_equal(\n                np.arange(ts.get_num_samples(), dtype=np.int32), ts.get_samples()\n            )\n\n    def test_genealogical_nearest_neighbours(self):\n        for ts in self.get_example_tree_sequences():\n            with pytest.raises(TypeError):\n                ts.genealogical_nearest_neighbours()\n            with pytest.raises(TypeError):\n                ts.genealogical_nearest_neighbours(focal=None)\n            with pytest.raises(TypeError):\n                ts.genealogical_nearest_neighbours(\n                    focal=ts.get_samples(),\n                    reference_sets={},\n                )\n            with pytest.raises(ValueError):\n                ts.genealogical_nearest_neighbours(\n                    focal=ts.get_samples(),\n                    reference_sets=[],\n                )\n\n            bad_array_values = [\"\", {}, \"x\", [[[0], [1, 2]]]]\n            for bad_array_value in bad_array_values:\n                with pytest.raises(ValueError):\n                    ts.genealogical_nearest_neighbours(\n                        focal=bad_array_value,\n                        reference_sets=[[0], [1]],\n                    )\n                with pytest.raises(ValueError):\n                    ts.genealogical_nearest_neighbours(\n                        focal=ts.get_samples(),\n                        reference_sets=[[0], bad_array_value],\n                    )\n                with pytest.raises(ValueError):\n                    ts.genealogical_nearest_neighbours(\n                        focal=ts.get_samples(),\n                        reference_sets=[bad_array_value],\n                    )\n            focal = ts.get_samples()\n            A = ts.genealogical_nearest_neighbours(focal, [focal[2:], focal[:2]])\n            assert A.shape == (len(focal), 2)\n\n    def test_mean_descendants(self):\n        for ts in self.get_example_tree_sequences():\n            with pytest.raises(TypeError):\n                ts.mean_descendants()\n            with pytest.raises(TypeError):\n                ts.mean_descendants(reference_sets={})\n            with pytest.raises(ValueError):\n                ts.mean_descendants(reference_sets=[])\n\n            bad_array_values = [\"\", {}, \"x\", [[[0], [1, 2]]]]\n            for bad_array_value in bad_array_values:\n                with pytest.raises(ValueError):\n                    ts.mean_descendants(\n                        reference_sets=[[0], bad_array_value],\n                    )\n                with pytest.raises(ValueError):\n                    ts.mean_descendants(reference_sets=[bad_array_value])\n            focal = ts.get_samples()\n            A = ts.mean_descendants([focal[2:], focal[:2]])\n            assert A.shape == (ts.get_num_nodes(), 2)\n\n    def test_link_ancestors_bad_args(self):\n        ts = self.get_example_tree_sequence()\n        with pytest.raises(TypeError):\n            ts.link_ancestors()\n        with pytest.raises(TypeError):\n            ts.link_ancestors([0, 1])\n        with pytest.raises(ValueError):\n            ts.link_ancestors(samples=[0, 1], ancestors=\"sdf\")\n        with pytest.raises(ValueError):\n            ts.link_ancestors(samples=\"sdf\", ancestors=[0, 1])\n        with pytest.raises(_tskit.LibraryError):\n            ts.link_ancestors(samples=[0, 1], ancestors=[ts.get_num_nodes(), -1])\n        with pytest.raises(_tskit.LibraryError):\n            ts.link_ancestors(samples=[0, -1], ancestors=[0])\n\n    def test_link_ancestors(self):\n        # Check that the low-level method runs and does not mutate the tree sequence\n        # and that it matches the TableCollection implementation.\n        high_ts = msprime.simulate(4, random_seed=1)\n        ts = high_ts.ll_tree_sequence\n        samples = list(range(ts.get_num_samples()))\n        ancestors = list(range(ts.get_num_nodes()))\n        num_edges_before = ts.get_num_edges()\n        edges = ts.link_ancestors(samples, ancestors)\n        assert isinstance(edges, _tskit.EdgeTable)\n        assert edges.num_rows >= 0\n        if edges.num_rows > 0:\n            assert np.all(edges.left >= 0)\n            assert np.all(edges.right <= ts.get_sequence_length())\n            assert np.all(edges.left < edges.right)\n            assert np.all(edges.parent >= 0)\n            assert np.all(edges.parent < ts.get_num_nodes())\n            assert np.all(edges.child >= 0)\n            assert np.all(edges.child < ts.get_num_nodes())\n        assert ts.get_num_edges() == num_edges_before\n\n        # Parity with low-level TableCollection.link_ancestors\n        tc = high_ts.dump_tables()._ll_tables\n        edges_from_tables = tc.link_ancestors(samples, ancestors)\n        assert edges.equals(edges_from_tables)\n\n    def test_metadata_schemas(self):\n        tables = _tskit.TableCollection(1.0)\n        # Set the schema\n        for table_name in self.metadata_tables:\n            table = getattr(tables, f\"{table_name}s\")\n            table.metadata_schema = f\"{table_name} test metadata schema\"\n        # Read back via ll tree sequence\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        schemas = ts.get_table_metadata_schemas()\n        for table_name in self.metadata_tables:\n            assert getattr(schemas, table_name) == f\"{table_name} test metadata schema\"\n        # Clear and read back again\n        for table_name in self.metadata_tables:\n            getattr(tables, f\"{table_name}s\").metadata_schema = \"\"\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        schemas = ts.get_table_metadata_schemas()\n        for table_name in self.metadata_tables:\n            assert getattr(schemas, table_name) == \"\"\n\n    def test_metadata(self):\n        tables = _tskit.TableCollection(1)\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        assert ts.get_metadata() == b\"\"\n        for value in [b\"foo\", b\"\", \"💩\".encode(), b\"null char \\0 in string\"]:\n            tables.metadata = value\n            ts = _tskit.TreeSequence()\n            ts.load_tables(tables)\n            assert ts.get_metadata() == value\n\n    def test_metadata_schema(self):\n        tables = _tskit.TableCollection(1)\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        assert ts.get_metadata_schema() == \"\"\n        for value in [\"foo\", \"\", \"💩\", \"null char \\0 in string\"]:\n            tables.metadata_schema = value\n            ts = _tskit.TreeSequence()\n            ts.load_tables(tables)\n            assert ts.get_metadata_schema() == value\n\n    def test_time_units(self):\n        tables = _tskit.TableCollection(1)\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        assert ts.get_time_units() == tskit.TIME_UNITS_UNKNOWN\n        for value in [\"foo\", \"\", \"💩\", \"null char \\0 in string\"]:\n            tables.time_units = value\n            ts = _tskit.TreeSequence()\n            ts.load_tables(tables)\n            assert ts.get_time_units() == value\n\n    def test_extend_haplotypes(self):\n        ts = self.get_example_tree_sequence(6)\n        ets2 = ts.extend_haplotypes(2)\n        ets4 = ts.extend_haplotypes(4)\n        assert ets2.get_num_nodes() == ts.get_num_nodes()\n        assert ets4.get_num_nodes() == ts.get_num_nodes()\n\n    def test_extend_haplotypes_bad_args(self):\n        ts1 = self.get_example_tree_sequence(10)\n        with pytest.raises(TypeError):\n            ts1.extend_haplotypes()\n        with pytest.raises(TypeError, match=\"an integer\"):\n            ts1.extend_haplotypes(\"sdf\")\n        with pytest.raises(_tskit.LibraryError, match=\"positive\"):\n            ts1.extend_haplotypes(0)\n        with pytest.raises(_tskit.LibraryError, match=\"positive\"):\n            ts1.extend_haplotypes(-1)\n        tsm = self.get_example_migration_tree_sequence()\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_MIGRATIONS_NOT_SUPPORTED\"\n        ):\n            tsm.extend_haplotypes(1)\n\n    @pytest.mark.parametrize(\n        \"stat_method_name\",\n        [\n            \"D_matrix\",\n            \"D2_matrix\",\n            \"r2_matrix\",\n            \"D_prime_matrix\",\n            \"r_matrix\",\n            \"Dz_matrix\",\n            \"pi2_matrix\",\n            \"D2_unbiased_matrix\",\n            \"Dz_unbiased_matrix\",\n            \"pi2_unbiased_matrix\",\n        ],\n    )\n    def test_ld_matrix(self, stat_method_name):\n        ts = self.get_example_tree_sequence(10)\n        stat_method = getattr(ts, stat_method_name)\n\n        ss = ts.get_samples()  # sample sets\n        ss_sizes = np.array([len(ss)], dtype=np.uint32)\n        row_sites = np.arange(ts.get_num_sites(), dtype=np.int32)\n        col_sites = row_sites\n        row_pos = ts.get_breakpoints()[:-1]\n        col_pos = row_pos\n        row_pos_list = list(map(float, ts.get_breakpoints()[:-1]))\n        col_pos_list = row_pos_list\n        row_sites_list = list(range(ts.get_num_sites()))\n        col_sites_list = row_sites_list\n\n        # happy path\n        a = stat_method(ss_sizes, ss, row_sites, col_sites, None, None, \"site\")\n        assert a.shape == (10, 10, 1)\n        a = stat_method(ss_sizes, ss, row_sites_list, col_sites_list, None, None, \"site\")\n        assert a.shape == (10, 10, 1)\n        a = stat_method(ss_sizes, ss, None, None, None, None, \"site\")\n        assert a.shape == (10, 10, 1)\n\n        a = stat_method(ss_sizes, ss, None, None, row_pos, col_pos, \"branch\")\n        assert a.shape == (2, 2, 1)\n        a = stat_method(ss_sizes, ss, None, None, row_pos_list, col_pos_list, \"branch\")\n        assert a.shape == (2, 2, 1)\n        a = stat_method(ss_sizes, ss, None, None, None, None, \"branch\")\n        assert a.shape == (2, 2, 1)\n\n        # CPython API errors\n        with pytest.raises(ValueError, match=\"Sum of sample_set_sizes\"):\n            bad_ss = np.array([], dtype=np.int32)\n            stat_method(ss_sizes, bad_ss, row_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError, match=\"cast array data\"):\n            bad_ss = np.array(ts.get_samples(), dtype=np.uint32)\n            stat_method(ss_sizes, bad_ss, row_sites, col_sites, None, None, \"site\")\n        with pytest.raises(ValueError, match=\"Unrecognised stats mode\"):\n            stat_method(ss_sizes, ss, row_sites, col_sites, None, None, \"bla\")\n        with pytest.raises(TypeError, match=\"at most\"):\n            stat_method(ss_sizes, ss, row_sites, col_sites, None, None, \"site\", \"abc\")\n        with pytest.raises(ValueError, match=\"invalid literal\"):\n            bad_sites = [\"abadsite\", 0, 3, 2]\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [None, 0, 3, 2]\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [{}, 0, 3, 2]\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError, match=\"Cannot cast array data\"):\n            bad_sites = np.array([0, 1, 2], dtype=np.uint32)\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(ValueError, match=\"invalid literal\"):\n            bad_sites = [\"abadsite\", 0, 3, 2]\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [None, 0, 3, 2]\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [{}, 0, 3, 2]\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(TypeError, match=\"Cannot cast array data\"):\n            bad_sites = np.array([0, 1, 2], dtype=np.uint32)\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(ValueError):\n            bad_pos = [\"abadpos\", 0.1, 0.2, 2.0]\n            stat_method(ss_sizes, ss, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(TypeError):\n            bad_pos = [{}, 0.1, 0.2, 2.0]\n            stat_method(ss_sizes, ss, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(ValueError):\n            bad_pos = [\"abadpos\", 0, 3, 2]\n            stat_method(ss_sizes, ss, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(TypeError):\n            bad_pos = [{}, 0, 3, 2]\n            stat_method(ss_sizes, ss, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(ValueError, match=\"Cannot specify sites in branch mode\"):\n            stat_method(ss_sizes, ss, row_sites, col_sites, None, None, \"branch\")\n        with pytest.raises(ValueError, match=\"Cannot specify positions in site mode\"):\n            stat_method(ss_sizes, ss, None, None, row_pos, col_pos, \"site\")\n        # C API errors\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_SITES\"):\n            bad_sites = np.array([1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_SITES\"):\n            bad_sites = np.array([1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_SITES\"):\n            bad_sites = np.array([1, 1, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_SITES\"):\n            bad_sites = np.array([1, 1, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_SITE_OUT_OF_BOUNDS\"):\n            bad_sites = np.array([-1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_SITE_OUT_OF_BOUNDS\"):\n            bad_sites = np.array([-1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_POSITIONS\"):\n            bad_pos = np.array([0.7, 0, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_POSITIONS\"):\n            bad_pos = np.array([0.7, 0, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_POSITIONS\"):\n            bad_pos = np.array([0.7, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_POSITIONS\"):\n            bad_pos = np.array([0.7, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_POSITION_OUT_OF_BOUNDS\"):\n            bad_pos = np.array([-0.1, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_POSITION_OUT_OF_BOUNDS\"):\n            bad_pos = np.array([-0.1, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INSUFFICIENT_SAMPLE_SETS\"\n        ):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, row_sites, col_sites, None, None, \"site\")\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INSUFFICIENT_SAMPLE_SETS\"\n        ):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, None, None, row_pos, col_pos, \"branch\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_EMPTY_SAMPLE_SET\"):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([0], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, row_sites, col_sites, None, None, \"site\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_EMPTY_SAMPLE_SET\"):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([0], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, None, None, row_pos, col_pos, \"branch\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n            bad_ss = np.array([1000], dtype=np.int32)\n            bad_ss_sizes = np.array([1], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, row_sites, col_sites, None, None, \"site\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n            bad_ss = np.array([1000], dtype=np.int32)\n            bad_ss_sizes = np.array([1], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, None, None, row_pos, col_pos, \"branch\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_DUPLICATE_SAMPLE\"):\n            bad_ss = np.array([2, 2], dtype=np.int32)\n            bad_ss_sizes = np.array([2], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, row_sites, col_sites, None, None, \"site\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_DUPLICATE_SAMPLE\"):\n            bad_ss = np.array([2, 2], dtype=np.int32)\n            bad_ss_sizes = np.array([2], dtype=np.uint32)\n            stat_method(bad_ss_sizes, bad_ss, None, None, row_pos, col_pos, \"branch\")\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_UNSUPPORTED_STAT_MODE\"):\n            stat_method(ss_sizes, ss, col_sites, row_sites, None, None, \"node\")\n\n    @pytest.mark.parametrize(\n        \"stat_method_name\",\n        [\n            \"D2_ij_matrix\",\n            \"r2_ij_matrix\",\n            \"D2_ij_unbiased_matrix\",\n        ],\n    )\n    def test_ld_matrix_multipop(self, stat_method_name):\n        ts = self.get_example_tree_sequence(10)\n        stat_method = getattr(ts, stat_method_name)\n\n        num_samples = len(ts.get_samples())\n        ss = np.hstack([ts.get_samples(), ts.get_samples()])  # sample sets\n        ss_sizes = np.array([num_samples, num_samples], dtype=np.uint32)\n        indexes = [(0, 0), (0, 1)]\n        row_sites = np.arange(ts.get_num_sites(), dtype=np.int32)\n        col_sites = row_sites\n        row_pos = ts.get_breakpoints()[:-1]\n        col_pos = row_pos\n        row_pos_list = list(map(float, ts.get_breakpoints()[:-1]))\n        col_pos_list = row_pos_list\n        row_sites_list = list(range(ts.get_num_sites()))\n        col_sites_list = row_sites_list\n\n        # happy path\n        a = stat_method(ss_sizes, ss, indexes, row_sites, col_sites, None, None, \"site\")\n        assert a.shape == (10, 10, 2)\n        a = stat_method(\n            ss_sizes, ss, indexes, row_sites_list, col_sites_list, None, None, \"site\"\n        )\n        assert a.shape == (10, 10, 2)\n        a = stat_method(ss_sizes, ss, indexes, None, None, None, None, \"site\")\n        assert a.shape == (10, 10, 2)\n\n        a = stat_method(ss_sizes, ss, indexes, None, None, row_pos, col_pos, \"branch\")\n        assert a.shape == (2, 2, 2)\n        a = stat_method(\n            ss_sizes, ss, indexes, None, None, row_pos_list, col_pos_list, \"branch\"\n        )\n        assert a.shape == (2, 2, 2)\n        a = stat_method(ss_sizes, ss, indexes, None, None, None, None, \"branch\")\n        assert a.shape == (2, 2, 2)\n\n        # CPython API errors\n        with pytest.raises(ValueError, match=\"Sum of sample_set_sizes\"):\n            bad_ss = np.array([], dtype=np.int32)\n            stat_method(\n                ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(TypeError, match=\"cast array data\"):\n            bad_ss = np.array(ts.get_samples(), dtype=np.uint32)\n            stat_method(\n                ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(ValueError, match=\"Unrecognised stats mode\"):\n            stat_method(ss_sizes, ss, indexes, row_sites, col_sites, None, None, \"bla\")\n        with pytest.raises(TypeError, match=\"at most\"):\n            stat_method(\n                ss_sizes, ss, indexes, row_sites, col_sites, None, None, \"site\", \"abc\"\n            )\n        with pytest.raises(ValueError, match=\"invalid literal\"):\n            bad_sites = [\"abadsite\", 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [None, 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [{}, 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(TypeError, match=\"Cannot cast array data\"):\n            bad_sites = np.array([0, 1, 2], dtype=np.uint32)\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(ValueError, match=\"invalid literal\"):\n            bad_sites = [\"abadsite\", 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [None, 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(TypeError):\n            bad_sites = [{}, 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(TypeError, match=\"Cannot cast array data\"):\n            bad_sites = np.array([0, 1, 2], dtype=np.uint32)\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(ValueError):\n            bad_pos = [\"abadpos\", 0.1, 0.2, 2.0]\n            stat_method(ss_sizes, ss, indexes, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(TypeError):\n            bad_pos = [{}, 0.1, 0.2, 2.0]\n            stat_method(ss_sizes, ss, indexes, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(ValueError):\n            bad_pos = [\"abadpos\", 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(TypeError):\n            bad_pos = [{}, 0, 3, 2]\n            stat_method(ss_sizes, ss, indexes, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(ValueError, match=\"Cannot specify sites in branch mode\"):\n            stat_method(\n                ss_sizes, ss, indexes, row_sites, col_sites, None, None, \"branch\"\n            )\n        with pytest.raises(ValueError, match=\"Cannot specify positions in site mode\"):\n            stat_method(ss_sizes, ss, indexes, None, None, row_pos, col_pos, \"site\")\n        # C API errors\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_SITES\"):\n            bad_sites = np.array([1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_SITES\"):\n            bad_sites = np.array([1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_SITES\"):\n            bad_sites = np.array([1, 1, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_SITES\"):\n            bad_sites = np.array([1, 1, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_SITE_OUT_OF_BOUNDS\"):\n            bad_sites = np.array([-1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, indexes, bad_sites, col_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_SITE_OUT_OF_BOUNDS\"):\n            bad_sites = np.array([-1, 0, 2], dtype=np.int32)\n            stat_method(ss_sizes, ss, indexes, row_sites, bad_sites, None, None, \"site\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_POSITIONS\"):\n            bad_pos = np.array([0.7, 0, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, indexes, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_UNSORTED_POSITIONS\"):\n            bad_pos = np.array([0.7, 0, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, indexes, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_POSITIONS\"):\n            bad_pos = np.array([0.7, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, indexes, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_STAT_DUPLICATE_POSITIONS\"):\n            bad_pos = np.array([0.7, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, indexes, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_POSITION_OUT_OF_BOUNDS\"):\n            bad_pos = np.array([-0.1, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, indexes, None, None, bad_pos, col_pos, \"branch\")\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_POSITION_OUT_OF_BOUNDS\"):\n            bad_pos = np.array([-0.1, 0.7, 0.8], dtype=np.float64)\n            stat_method(ss_sizes, ss, indexes, None, None, row_pos, bad_pos, \"branch\")\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INSUFFICIENT_SAMPLE_SETS\"\n        ):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(\n            _tskit.LibraryError, match=\"TSK_ERR_INSUFFICIENT_SAMPLE_SETS\"\n        ):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, None, None, row_pos, col_pos, \"branch\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_SAMPLE_SET_INDEX\"):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([0], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_SAMPLE_SET_INDEX\"):\n            bad_ss = np.array([], dtype=np.int32)\n            bad_ss_sizes = np.array([0], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, None, None, row_pos, col_pos, \"branch\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n            bad_ss = np.array([1000, 1000], dtype=np.int32)\n            bad_ss_sizes = np.array([1, 1], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n            bad_ss = np.array([1000, 1000], dtype=np.int32)\n            bad_ss_sizes = np.array([1, 1], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, None, None, row_pos, col_pos, \"branch\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_DUPLICATE_SAMPLE\"):\n            bad_ss = np.array([1, 1, 2, 3], dtype=np.int32)\n            bad_ss_sizes = np.array([2, 2], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_DUPLICATE_SAMPLE\"):\n            bad_ss = np.array([1, 1, 2, 3], dtype=np.int32)\n            bad_ss_sizes = np.array([2, 2], dtype=np.uint32)\n            stat_method(\n                bad_ss_sizes, bad_ss, indexes, None, None, row_pos, col_pos, \"branch\"\n            )\n        with pytest.raises(ValueError, match=\"indexes must be a\"):\n            bad_indexes = np.array([[0, 0, 1, 1], [0, 0, 1, 1]], dtype=np.int32)\n            stat_method(\n                ss_sizes, ss, bad_indexes, row_sites, col_sites, None, None, \"site\"\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_UNSUPPORTED_STAT_MODE\"):\n            stat_method(ss_sizes, ss, indexes, col_sites, row_sites, None, None, \"node\")\n\n    def test_kc_distance_errors(self):\n        ts1 = self.get_example_tree_sequence(10)\n        with pytest.raises(TypeError):\n            ts1.get_kc_distance()\n        with pytest.raises(TypeError):\n            ts1.get_kc_distance(ts1)\n        for bad_tree in [None, \"tree\", 0]:\n            with pytest.raises(TypeError):\n                ts1.get_kc_distance(bad_tree, lambda_=0)\n        for bad_value in [\"tree\", [], None]:\n            with pytest.raises(TypeError):\n                ts1.get_kc_distance(ts1, lambda_=bad_value)\n\n        # Different numbers of samples fail.\n        ts2 = self.get_example_tree_sequence(11)\n        self.verify_kc_library_error(ts1, ts2)\n\n        # Different sequence lengths fail.\n        ts2 = self.get_example_tree_sequence(10, length=11)\n        self.verify_kc_library_error(ts1, ts2)\n\n    def verify_kc_library_error(self, ts1, ts2):\n        with pytest.raises(_tskit.LibraryError):\n            ts1.get_kc_distance(ts2, 0)\n\n    def test_kc_distance(self):\n        ts1 = self.get_example_tree_sequence(10, random_seed=123456)\n        ts2 = self.get_example_tree_sequence(10, random_seed=1234)\n        for lambda_ in [-1, 0, 1, 1000, -1e300]:\n            x1 = ts1.get_kc_distance(ts2, lambda_)\n            x2 = ts2.get_kc_distance(ts1, lambda_)\n            assert x1 == x2\n\n    def test_divergence_matrix(self):\n        n = 10\n        ts = self.get_example_tree_sequence(n, random_seed=12)\n        windows = [0, ts.get_sequence_length()]\n        ids = np.arange(n, dtype=np.int32)\n        sizes = np.ones(n, dtype=np.uint64)\n        D = ts.divergence_matrix(windows, sizes, ids)\n        assert D.shape == (1, n, n)\n        D = ts.divergence_matrix(windows, sample_set_sizes=[1, 1], sample_sets=[0, 1])\n        assert D.shape == (1, 2, 2)\n        D = ts.divergence_matrix(\n            windows, sample_set_sizes=[1, 1], sample_sets=[0, 1], span_normalise=True\n        )\n        assert D.shape == (1, 2, 2)\n\n        for bad_node in [-1, -2, 1000]:\n            with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n                ts.divergence_matrix(windows, [1, 1], [0, bad_node])\n        with pytest.raises(ValueError, match=\"Sum of sample_set_sizes\"):\n            ts.divergence_matrix(windows, [1, 2], [0, 1])\n        with pytest.raises((ValueError, OverflowError), match=\"Overflow|out of bounds\"):\n            ts.divergence_matrix(windows, [-1, 2], [0])\n\n        with pytest.raises(TypeError, match=\"str\"):\n            ts.divergence_matrix(windows, sizes, ids, span_normalise=\"xdf\")\n        with pytest.raises(TypeError):\n            ts.divergence_matrix(windoze=[0, 1])\n        with pytest.raises(ValueError, match=\"at least 2\"):\n            ts.divergence_matrix(\n                [0],\n                sizes,\n                ids,\n            )\n        with pytest.raises(_tskit.LibraryError, match=\"BAD_WINDOWS\"):\n            ts.divergence_matrix([-1, 0, 1], sizes, ids)\n        with pytest.raises(ValueError, match=\"Unrecognised stats mode\"):\n            ts.divergence_matrix([0, 1], sizes, ids, mode=\"sdf\")\n        with pytest.raises(_tskit.LibraryError, match=\"UNSUPPORTED_STAT_MODE\"):\n            ts.divergence_matrix([0, 1], sizes, ids, mode=\"node\")\n\n    def test_load_tables_build_indexes(self):\n        for ts in self.get_example_tree_sequences():\n            tables = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts.dump_tables(tables)\n            tables.drop_index()\n\n            # Tables not in tc but rebuilt\n            ts2 = _tskit.TreeSequence()\n            ts2.load_tables(tables, build_indexes=True)\n            tables2 = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts2.dump_tables(tables2)\n            assert tables2.has_index()\n\n            # Tables not in tc, not rebuilt so error\n            ts3 = _tskit.TreeSequence()\n            with pytest.raises(\n                _tskit.LibraryError, match=\"Table collection must be indexed\"\n            ):\n                ts3.load_tables(tables)\n\n            # Tables in tc, not rebuilt\n            tables.build_index()\n            ts4 = _tskit.TreeSequence()\n            ts4.load_tables(tables, build_indexes=False)\n            tables4 = _tskit.TableCollection(sequence_length=ts.get_sequence_length())\n            ts4.dump_tables(tables4)\n            assert tables4.has_index()\n\n    def test_clear_table(self, ts_fixture):\n        tables = _tskit.TableCollection(sequence_length=ts_fixture.get_sequence_length())\n        ts_fixture.ll_tree_sequence.dump_tables(tables)\n        tables.clear()\n        data_tables = [t for t in tskit.TABLE_NAMES if t != \"provenances\"]\n        for table in data_tables:\n            assert getattr(tables, f\"{table}\").num_rows == 0\n            assert len(getattr(tables, f\"{table}\").metadata_schema) != 0\n        assert tables.provenances.num_rows > 0\n        assert len(tables.metadata) > 0\n        assert len(tables.metadata_schema) > 0\n\n        tables.clear(clear_provenance=True)\n        assert tables.provenances.num_rows == 0\n        for table in data_tables:\n            assert len(getattr(tables, f\"{table}\").metadata_schema) != 0\n        assert len(tables.metadata) > 0\n        assert len(tables.metadata_schema) > 0\n\n        tables.clear(clear_metadata_schemas=True)\n        for table in data_tables:\n            assert len(getattr(tables, f\"{table}\").metadata_schema) == 0\n        assert len(tables.metadata) > 0\n        assert len(tables.metadata_schema) > 0\n\n        tables.clear(clear_ts_metadata_and_schema=True)\n        assert len(tables.metadata) == 0\n        assert len(tables.metadata_schema) == 0\n\n        # Check for attributes that are not cleared\n        assert tables.sequence_length == ts_fixture.tables.sequence_length\n        assert tables.time_units == ts_fixture.tables.time_units\n\n    def test_discrete_genome(self):\n        tables = _tskit.TableCollection(1)\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        assert ts.get_discrete_genome() == 1\n\n    def test_discrete_time(self):\n        tables = _tskit.TableCollection(1)\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        assert ts.get_discrete_time() == 1\n\n    def test_min_time(self):\n        ts = self.get_example_tree_sequence(5)\n        assert isinstance(ts.get_min_time(), float)\n\n    def test_max_time(self):\n        ts = self.get_example_tree_sequence(5)\n        assert isinstance(ts.get_max_time(), float)\n\n    def test_split_edges_return_type(self):\n        ts = self.get_example_tree_sequence()\n        split = ts.split_edges(time=0, flags=0, population=0, metadata=b\"\")\n        assert isinstance(split, _tskit.TreeSequence)\n\n    def test_split_edges_bad_types(self):\n        ts = self.get_example_tree_sequence()\n\n        def f(time=0, flags=0, population=0, metadata=b\"\"):\n            return ts.split_edges(\n                time=time,\n                flags=flags,\n                population=population,\n                metadata=metadata,\n            )\n\n        with pytest.raises(TypeError):\n            f(time=\"0\")\n        with pytest.raises(TypeError):\n            f(flags=\"0\")\n        with pytest.raises(TypeError):\n            f(metadata=\"0\")\n\n    def test_split_edges_bad_population(self):\n        ts = self.get_example_tree_sequence()\n        with pytest.raises(_tskit.LibraryError, match=\"POPULATION_OUT_OF_BOUNDS\"):\n            ts.split_edges(\n                time=0,\n                flags=0,\n                population=ts.get_num_populations(),\n                metadata=b\"\",\n            )\n\n    @pytest.mark.parametrize(\"name\", ARRAY_NAMES)\n    def test_array_read_only(self, name, ts_fixture):\n        ts_fixture = ts_fixture.ll_tree_sequence\n        with pytest.raises(AttributeError, match=\"not writable\"):\n            setattr(ts_fixture, name, None)\n        with pytest.raises(AttributeError, match=\"not writable\"):\n            delattr(ts_fixture, name)\n\n        a = getattr(ts_fixture, name)\n        with pytest.raises(ValueError, match=\"assignment destination\"):\n            a[:] = 0\n        with pytest.raises(ValueError, match=\"assignment destination\"):\n            a[0] = 0\n        with pytest.raises(ValueError, match=\"cannot set WRITEABLE\"):\n            a.setflags(write=True)\n\n    @pytest.mark.parametrize(\"name\", ARRAY_NAMES)\n    def test_array_properties(self, name, ts_fixture):\n        ts_fixture = ts_fixture.ll_tree_sequence\n        a = getattr(ts_fixture, name)\n        assert not a.flags.writeable\n        assert a.flags.aligned\n        assert a.flags.c_contiguous\n        assert not a.flags.owndata\n        assert a.base == ts_fixture\n        b = getattr(ts_fixture, name)\n        assert a is not b\n        assert np.all(a == b)\n        # This checks that the underlying pointer to memory is the same in\n        # both arrays.\n        assert a.__array_interface__ == b.__array_interface__\n\n    @pytest.mark.parametrize(\"name\", ARRAY_NAMES)\n    def test_array_lifetime(self, name, ts_fixture):\n        ts_fixture = ts_fixture.ll_tree_sequence\n        a1 = getattr(ts_fixture, name)\n        a2 = a1.copy()\n        assert a1 is not a2\n        del ts_fixture\n        # Do some memory operations\n        a3 = np.ones(10**6)\n        assert np.all(a1 == a2)\n        del a1\n        # Just do something to touch memory\n        a2[:] = 0\n        assert a3 is not a2\n\n    @pytest.mark.parametrize(\"name\", (\"individuals_nodes\", \"mutations_edge\"))\n    def test_generated_columns(self, ts_fixture, name):\n        name = f\"get_{name}\"\n        ts_fixture = ts_fixture.ll_tree_sequence\n\n        # Properties\n        a = getattr(ts_fixture, name)()\n        assert a.flags.aligned\n        assert a.flags.c_contiguous\n        assert a.flags.owndata\n        b = getattr(ts_fixture, name)()\n        assert a is not b\n        assert np.all(a == b)\n\n        # Lifetime\n        a1 = getattr(ts_fixture, name)()\n        a2 = a1.copy()\n        assert a1 is not a2\n        del ts_fixture\n        # Do some memory operations\n        a3 = np.ones(10**6)\n        assert np.all(a1 == a2)\n        del a1\n        # Just do something to touch memory\n        a2[:] = 0\n        assert a3 is not a2\n\n    @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason=\"Requires NumPy 2.0+\")\n    @pytest.mark.parametrize(\n        \"string_array\",\n        [\n            \"sites_ancestral_state_string\",\n            \"mutations_derived_state_string\",\n            \"mutations_inherited_state_string\",\n            \"provenances_timestamp_string\",\n            \"provenances_record_string\",\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"str_lengths\",\n        [\"none\", \"all-0\", \"all-1\", \"all-2\", \"mixed\", \"very_long\", \"unicode\"],\n    )\n    def test_string_arrays(self, ts_fixture, str_lengths, string_array):\n        if str_lengths == \"none\":\n            ts = tskit.TableCollection(1.0).tree_sequence()\n        else:\n            if str_lengths == \"all-1\":\n                ts = ts_fixture\n                if string_array == \"sites_ancestral_state_string\":\n                    assert ts.num_sites > 0\n                    assert {len(site.ancestral_state) for site in ts.sites()} == {1}\n                elif string_array == \"mutations_derived_state_string\":\n                    assert ts.num_mutations > 0\n                    assert {len(mut.derived_state) for mut in ts.mutations()} == {1}\n                elif string_array == \"mutations_inherited_state_string\":\n                    assert ts.num_mutations > 0\n                    assert {len(mut.inherited_state) for mut in ts.mutations()} == {1}\n                elif string_array == \"provenances_timestamp_string\":\n                    assert ts.num_provenances > 0\n                    assert len(ts.provenance(3).timestamp) == 1\n                elif string_array == \"provenances_record_string\":\n                    assert ts.num_provenances > 0\n                    assert len(ts.provenance(3).record) == 1\n            else:\n                tables = ts_fixture.dump_tables()\n\n                str_map = {\n                    \"all-0\": lambda i, item: \"\",\n                    \"all-2\": lambda i, item: chr(ord(\"A\") + (i % 26)) * 2,\n                    \"mixed\": lambda i, item: chr(ord(\"A\") + (i % 26)) * (i % 20),\n                    \"very_long\": lambda i, item: \"A\" * 100_000_000 if i == 1 else \"T\",\n                    \"unicode\": lambda i, item: \"🧬\" * (i + 1),\n                }\n\n                if string_array == \"sites_ancestral_state_string\":\n                    sites = tables.sites.copy()\n                    tables.sites.clear()\n                    get_ancestral_state = str_map[str_lengths]\n                    for i, site in enumerate(sites):\n                        tables.sites.append(\n                            site.replace(ancestral_state=get_ancestral_state(i, site))\n                        )\n                elif string_array == \"mutations_derived_state_string\":\n                    mutations = tables.mutations.copy()\n                    tables.mutations.clear()\n                    get_derived_state = str_map[str_lengths]\n                    for i, mutation in enumerate(mutations):\n                        tables.mutations.append(\n                            mutation.replace(\n                                derived_state=get_derived_state(i, mutation)\n                            )\n                        )\n                elif string_array == \"mutations_inherited_state_string\":\n                    # For inherited state, we modify sites and mutations to create\n                    # varied lengths\n                    sites = tables.sites.copy()\n                    tables.sites.clear()\n                    get_ancestral_state = str_map[str_lengths]\n                    for i, site in enumerate(sites):\n                        tables.sites.append(\n                            site.replace(ancestral_state=get_ancestral_state(i, site))\n                        )\n                    mutations = tables.mutations.copy()\n                    tables.mutations.clear()\n                    get_derived_state = str_map[str_lengths]\n                    for i, mutation in enumerate(mutations):\n                        tables.mutations.append(\n                            mutation.replace(\n                                derived_state=get_derived_state(i, mutation)\n                            )\n                        )\n                elif string_array == \"provenances_timestamp_string\":\n                    provenances = tables.provenances.copy()\n                    tables.provenances.clear()\n                    get_timestamp = str_map[str_lengths]\n                    for i, provenance in enumerate(provenances):\n                        tables.provenances.append(\n                            provenance.replace(timestamp=get_timestamp(i, provenance))\n                        )\n                elif string_array == \"provenances_record_string\":\n                    provenances = tables.provenances.copy()\n                    tables.provenances.clear()\n                    get_record = str_map[str_lengths]\n                    for i, provenance in enumerate(provenances):\n                        tables.provenances.append(\n                            provenance.replace(record=get_record(i, provenance))\n                        )\n\n                ts = tables.tree_sequence()\n        ll_ts = ts.ll_tree_sequence\n\n        a = getattr(ll_ts, string_array)\n\n        # Contents\n        if str_lengths == \"none\":\n            assert a.size == 0\n        else:\n            if string_array == \"sites_ancestral_state_string\":\n                for site in ts.sites():\n                    assert a[site.id] == site.ancestral_state\n            elif string_array == \"mutations_derived_state_string\":\n                for mutation in ts.mutations():\n                    assert a[mutation.id] == mutation.derived_state\n            elif string_array == \"mutations_inherited_state_string\":\n                for mutation in ts.mutations():\n                    assert a[mutation.id] == mutation.inherited_state\n            elif string_array == \"provenances_timestamp_string\":\n                for provenance in ts.provenances():\n                    assert a[provenance.id] == provenance.timestamp\n            elif string_array == \"provenances_record_string\":\n                for provenance in ts.provenances():\n                    assert a[provenance.id] == provenance.record\n\n        # Read only\n        with pytest.raises(AttributeError, match=\"not writable\"):\n            setattr(ll_ts, string_array, None)\n        with pytest.raises(AttributeError, match=\"not writable\"):\n            delattr(ll_ts, string_array)\n\n        with pytest.raises(ValueError, match=\"assignment destination\"):\n            a[:] = 0\n        with pytest.raises(ValueError, match=\"assignment destination\"):\n            a[0] = 0\n\n        # Properties\n        assert a.dtype == np.dtypes.StringDType()\n        assert a.flags.aligned\n        assert a.flags.c_contiguous\n        b = getattr(ll_ts, string_array)\n        assert a is not b\n        assert np.all(a == b)\n\n        # Lifetime\n        a1 = getattr(ll_ts, string_array)\n        a2 = a1.copy()\n        assert a1 is not a2\n        del ll_ts\n        # Do some memory operations\n        a3 = np.ones(10**6)\n        assert np.all(a1 == a2)\n        del a1\n        # Just do something to touch memory\n        a2[:] = 0\n        assert a3 is not a2\n\n\nclass StatsInterfaceMixin:\n    \"\"\"\n    Tests for the interface on specific stats.\n    \"\"\"\n\n    def test_mode_errors(self):\n        _, f, params = self.get_example()\n        for bad_mode in [\"\", \"not a mode\", \"SITE\", \"x\" * 8192]:\n            with pytest.raises(ValueError):\n                f(mode=bad_mode, **params)\n\n        for bad_type in [123, {}, None, [[]]]:\n            with pytest.raises(TypeError):\n                f(mode=bad_type, **params)\n\n    def test_window_errors(self):\n        ts, f, params = self.get_example()\n        del params[\"windows\"]\n        for bad_array in [\"asdf\", None, [[[[]], [[]]]], np.zeros((10, 3, 4))]:\n            with pytest.raises(ValueError):\n                f(windows=bad_array, **params)\n\n        for bad_windows in [[], [0]]:\n            with pytest.raises(ValueError):\n                f(windows=bad_windows, **params)\n        L = ts.get_sequence_length()\n        bad_windows = [\n            [L, 0],\n            [0.1, L],\n            [-1, L],\n            [0, L + 0.1],\n            [0, 0.1, 0.1, L],\n            [0, -1, L],\n            [0, 0.1, 0.05, 0.2, L],\n        ]\n        for bad_window in bad_windows:\n            with pytest.raises(_tskit.LibraryError):\n                f(windows=bad_window, **params)\n\n    def test_time_window_errors(self):\n        ts, f, params = self.get_example()\n        if \"time_windows\" in params:\n            del params[\"time_windows\"]\n\n            for bad_time_windows in [[], [0]]:\n                with pytest.raises(ValueError, match=\"must have at least 2\"):\n                    f(\n                        time_windows=bad_time_windows,\n                        **params,\n                    )\n            bad_time_windows = [\n                [-1, np.inf],\n                [0, 0, np.inf],\n                [0, 10, 5, np.inf],\n                [0, np.inf, np.inf],\n            ]\n            for bad_time_window in bad_time_windows:\n                with pytest.raises(\n                    _tskit.LibraryError, match=\"TSK_ERR_BAD_TIME_WINDOWS\"\n                ):\n                    f(time_windows=bad_time_window, **params)\n\n    def test_polarisation(self):\n        ts, f, params = self.get_example()\n        with pytest.raises(TypeError):\n            f(polarised=\"sdf\", **params)\n        x1 = f(polarised=False, **params)\n        x2 = f(polarised=True, **params)\n        # Basic check just to run both code paths\n        assert x1.shape == x2.shape\n\n    def test_windows_output(self):\n        ts, f, params = self.get_example()\n        del params[\"windows\"]\n        for num_windows in range(1, 10):\n            windows = np.linspace(0, ts.get_sequence_length(), num=num_windows + 1)\n            assert windows.shape[0] == num_windows + 1\n            sigma = f(windows=windows, **params)\n            assert sigma.shape[0] == num_windows\n\n\nclass WeightMixin(StatsInterfaceMixin):\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"weights\": np.ones((ts.get_num_samples(), 2)),\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_bad_weights(self):\n        ts, f, params = self.get_example()\n        del params[\"weights\"]\n        n = ts.get_num_samples()\n\n        with pytest.raises(_tskit.LibraryError):\n            f(weights=np.ones((n, 0)), **params)\n\n        for bad_weight_shape in [(n - 1, 1), (n + 1, 1), (0, 3)]:\n            with pytest.raises(ValueError):\n                f(weights=np.ones(bad_weight_shape), **params)\n\n    def test_output_dims(self):\n        ts, method, params = self.get_example()\n        weights = params[\"weights\"]\n        nw = weights.shape[1]\n        windows = [0, ts.get_sequence_length()]\n\n        for mode in [\"site\", \"branch\"]:\n            out = method(weights[:, [0]], windows, mode=mode)\n            assert out.shape == (1, 1)\n            out = method(weights, windows, mode=mode)\n            assert out.shape == (1, nw)\n            out = method(weights[:, [0, 0, 0]], windows, mode=mode)\n            assert out.shape == (1, 3)\n        mode = \"node\"\n        N = ts.get_num_nodes()\n        out = method(weights[:, [0]], windows, mode=mode)\n        assert out.shape == (1, N, 1)\n        out = method(weights, windows, mode=mode)\n        assert out.shape == (1, N, nw)\n        out = method(weights[:, [0, 0, 0]], windows, mode=mode)\n        assert out.shape == (1, N, 3)\n\n\nclass WeightCovariateMixin(StatsInterfaceMixin):\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"weights\": np.ones((ts.get_num_samples(), 2)),\n            \"covariates\": np.array(\n                [np.arange(ts.get_num_samples()), np.arange(ts.get_num_samples()) ** 2]\n            ).T,\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_output_dims(self):\n        ts, method, params = self.get_example()\n        weights = params[\"weights\"]\n        nw = weights.shape[1]\n        windows = [0, ts.get_sequence_length()]\n        for covariates in (params[\"covariates\"], params[\"covariates\"][:, :0]):\n            for mode in [\"site\", \"branch\"]:\n                out = method(weights[:, [0]], covariates, windows, mode=mode)\n                assert out.shape == (1, 1)\n                out = method(weights, covariates, windows, mode=mode)\n                assert out.shape == (1, nw)\n                out = method(weights[:, [0, 0, 0]], covariates, windows, mode=mode)\n                assert out.shape == (1, 3)\n            mode = \"node\"\n            N = ts.get_num_nodes()\n            out = method(weights[:, [0]], covariates, windows, mode=mode)\n            assert out.shape == (1, N, 1)\n            out = method(weights, covariates, windows, mode=mode)\n            assert out.shape == (1, N, nw)\n            out = method(weights[:, [0, 0, 0]], covariates, windows, mode=mode)\n            assert out.shape == (1, N, 3)\n\n\nclass SampleSetMixin(StatsInterfaceMixin):\n    def test_bad_sample_sets(self):\n        ts, f, params = self.get_example()\n        del params[\"sample_set_sizes\"]\n        del params[\"sample_sets\"]\n\n        with pytest.raises(_tskit.LibraryError):\n            f(sample_sets=[], sample_set_sizes=[], **params)\n\n        n = ts.get_num_samples()\n        samples = ts.get_samples()\n        for bad_set_sizes in [[], [1], [n - 1], [n + 1], [n - 3, 1, 1], [1, n - 2]]:\n            with pytest.raises(ValueError):\n                f(sample_set_sizes=bad_set_sizes, sample_sets=samples, **params)\n\n        N = ts.get_num_nodes()\n        for bad_node in [-1, N, N + 1, -N]:\n            with pytest.raises(_tskit.LibraryError):\n                f(sample_set_sizes=[2], sample_sets=[0, bad_node], **params)\n\n        for bad_sample in [n, n + 1, N - 1]:\n            with pytest.raises(_tskit.LibraryError):\n                f(sample_set_sizes=[2], sample_sets=[0, bad_sample], **params)\n\n\nclass OneWaySampleStatsMixin(SampleSetMixin):\n    \"\"\"\n    Tests for one-way sample stats.\n    \"\"\"\n\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"sample_set_sizes\": [ts.get_num_samples()],\n            \"sample_sets\": ts.get_samples(),\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_basic_example(self):\n        ts, method = self.get_method()\n        result = method(\n            [ts.get_num_samples()], ts.get_samples(), [0, ts.get_sequence_length()]\n        )\n        assert result.shape == (1, 1)\n        result = method(\n            [ts.get_num_samples()],\n            ts.get_samples(),\n            [0, ts.get_sequence_length()],\n            mode=\"node\",\n        )\n        assert result.shape == (1, ts.get_num_nodes(), 1)\n        result = method(\n            [ts.get_num_samples()], ts.get_samples(), ts.get_breakpoints(), mode=\"node\"\n        )\n        assert result.shape == (ts.get_num_trees(), ts.get_num_nodes(), 1)\n\n    def test_output_dims(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n\n        for mode in [\"site\", \"branch\"]:\n            pi = method([n], samples, windows, mode=mode)\n            assert pi.shape == (1, 1)\n            pi = method([2, n - 2], samples, windows, mode=mode)\n            assert pi.shape == (1, 2)\n            pi = method([2, 2, n - 4], samples, windows, mode=mode)\n            assert pi.shape == (1, 3)\n            pi = method(np.ones(n).astype(np.uint32), samples, windows, mode=mode)\n            assert pi.shape == (1, n)\n        mode = \"node\"\n        N = ts.get_num_nodes()\n        pi = method([n], samples, windows, mode=mode)\n        assert pi.shape == (1, N, 1)\n        pi = method([2, n - 2], samples, windows, mode=mode)\n        assert pi.shape == (1, N, 2)\n        pi = method([2, 2, n - 4], samples, windows, mode=mode)\n        assert pi.shape == (1, N, 3)\n        pi = method(np.ones(n).astype(np.uint32), samples, windows, mode=mode)\n        assert pi.shape == (1, N, n)\n\n    def test_polarised(self):\n        # TODO move this to the top level.\n        ts, method, params = self.get_example()\n        out_u = method(**params, polarised=True)\n        out_p = method(**params, polarised=False)\n        assert np.all(out_u.shape == out_p.shape)\n\n\nclass TestDiversity(LowLevelTestCase, OneWaySampleStatsMixin):\n    \"\"\"\n    Tests for the diversity method.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.diversity\n\n\nclass TestTraitCovariance(LowLevelTestCase, WeightMixin):\n    \"\"\"\n    Tests for trait covariance.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.trait_covariance\n\n\nclass TestTraitCorrelation(LowLevelTestCase, WeightMixin):\n    \"\"\"\n    Tests for trait correlation.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.trait_correlation\n\n\nclass TestTraitLinearModel(LowLevelTestCase, WeightCovariateMixin):\n    \"\"\"\n    Tests for trait correlation.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.trait_linear_model\n\n\nclass TestSegregatingSites(LowLevelTestCase, OneWaySampleStatsMixin):\n    \"\"\"\n    Tests for the diversity method.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.segregating_sites\n\n\nclass TestY1(LowLevelTestCase, OneWaySampleStatsMixin):\n    \"\"\"\n    Tests for the diversity method.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.Y1\n\n\nclass TestAlleleFrequencySpectrum(LowLevelTestCase, OneWaySampleStatsMixin):\n    \"\"\"\n    Tests for the diversity method.\n    \"\"\"\n\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.allele_frequency_spectrum\n\n    def get_example(self):\n        # temporary duplicate from OneWaySampleStatsMixin to include time windows\n        ts, method = self.get_method()\n        params = {\n            \"sample_set_sizes\": [ts.get_num_samples()],\n            \"sample_sets\": ts.get_samples(),\n            \"windows\": [0, ts.get_sequence_length()],\n            \"time_windows\": [0, np.inf],\n        }\n        return ts, method, params\n\n    @pytest.mark.parametrize(\"mode\", [\"site\", \"branch\"])\n    def test_basic_example(self, mode):\n        ts = self.get_example_tree_sequence()\n        n = ts.get_num_samples()\n        result = ts.allele_frequency_spectrum(\n            [n],\n            ts.get_samples(),\n            [0, ts.get_sequence_length()],\n            time_windows=[0, np.inf],\n            mode=mode,\n        )\n        assert result.shape == (1, 1, n + 1)\n        result = ts.allele_frequency_spectrum(\n            [n],\n            ts.get_samples(),\n            [0, ts.get_sequence_length()],\n            time_windows=[0, np.inf],\n            mode=mode,\n            polarised=True,\n        )\n        assert result.shape == (1, 1, n + 1)\n\n    @pytest.mark.parametrize(\"mode\", [\"site\", \"branch\"])\n    def test_output_dims(self, mode):\n        ts = self.get_example_tree_sequence()\n        samples = ts.get_samples()\n        L = ts.get_sequence_length()\n        n = len(samples)\n        time_windows = [0, np.inf]\n\n        for s in [[n], [n - 2, 2], [n - 4, 2, 2], [1] * n]:\n            s = np.array(s, dtype=np.uint32)\n            windows = [0, L]\n            for windows in [[0, L], [0, L / 2, L], np.linspace(0, L, num=10)]:\n                jafs = ts.allele_frequency_spectrum(\n                    s,\n                    samples,\n                    windows,\n                    mode=mode,\n                    time_windows=time_windows,\n                    polarised=True,\n                )\n                assert jafs.shape == tuple(\n                    [len(windows) - 1] + [len(time_windows) - 1] + list(s + 1)\n                )\n                jafs = ts.allele_frequency_spectrum(\n                    s,\n                    samples,\n                    windows,\n                    mode=mode,\n                    time_windows=time_windows,\n                    polarised=False,\n                )\n                assert jafs.shape == tuple(\n                    [len(windows) - 1] + [len(time_windows) - 1] + list(s + 1)\n                )\n\n    def test_node_mode_not_supported(self):\n        ts = self.get_example_tree_sequence()\n        with pytest.raises(_tskit.LibraryError):\n            ts.allele_frequency_spectrum(\n                [ts.get_num_samples()],\n                ts.get_samples(),\n                [0, ts.get_sequence_length()],\n                mode=\"node\",\n                time_windows=[0, np.inf],\n            )\n\n\nclass TwoWaySampleStatsMixin(SampleSetMixin):\n    \"\"\"\n    Tests for the two way sample stats.\n    \"\"\"\n\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"sample_set_sizes\": [2, ts.get_num_samples() - 2],\n            \"sample_sets\": ts.get_samples(),\n            \"indexes\": [[0, 1]],\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_basic_example(self):\n        ts, method = self.get_method()\n        div = method(\n            [2, ts.get_num_samples() - 2],\n            ts.get_samples(),\n            [[0, 1]],\n            windows=[0, ts.get_sequence_length()],\n        )\n        assert div.shape == (1, 1)\n\n    def test_output_dims(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n        for mode in [\"site\", \"branch\"]:\n            div = method([2, 2, n - 4], samples, [[0, 1]], windows, mode=mode)\n            assert div.shape == (1, 1)\n            div = method([2, 2, n - 4], samples, [[0, 1], [1, 2]], windows, mode=mode)\n            assert div.shape == (1, 2)\n            div = method(\n                [2, 2, n - 4], samples, [[0, 1], [1, 2], [0, 1]], windows, mode=mode\n            )\n            assert div.shape == (1, 3)\n\n        N = ts.get_num_nodes()\n        mode = \"node\"\n        div = method([2, 2, n - 4], samples, [[0, 1]], windows, mode=mode)\n        assert div.shape == (1, N, 1)\n        div = method([2, 2, n - 4], samples, [[0, 1], [1, 2]], windows, mode=mode)\n        assert div.shape == (1, N, 2)\n        div = method(\n            [2, 2, n - 4], samples, [[0, 1], [1, 2], [0, 1]], windows, mode=mode\n        )\n        assert div.shape == (1, N, 3)\n\n    def test_set_index_errors(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n\n        def f(indexes):\n            method([2, 2, n - 4], samples, indexes, windows)\n\n        for bad_array in [\"wer\", {}, [[[], []], [[], []]]]:\n            with pytest.raises(ValueError):\n                f(bad_array)\n        for bad_dim in [[[]], [[1], [1]]]:\n            with pytest.raises(ValueError):\n                f(bad_dim)\n\n\nclass TwoWayWeightedStatsMixin(StatsInterfaceMixin):\n    \"\"\"\n    Tests for the weighted two way sample stats.\n    \"\"\"\n\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"weights\": np.zeros((ts.get_num_samples(), 2)) + 0.5,\n            \"indexes\": [[0, 1]],\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_basic_example(self):\n        ts, method = self.get_method()\n        div = method(\n            np.zeros((ts.get_num_samples(), 1)) + 0.5,\n            [[0, 1]],\n            windows=[0, ts.get_sequence_length()],\n        )\n        assert div.shape == (1, 1)\n\n    def test_bad_weights(self):\n        ts, f, params = self.get_example()\n        del params[\"weights\"]\n        n = ts.get_num_samples()\n\n        for bad_weight_type in [None, [None, None]]:\n            with pytest.raises(ValueError, match=\"object of too small depth\"):\n                f(weights=bad_weight_type, **params)\n\n        for bad_weight_shape in [(n - 1, 1), (n + 1, 1), (0, 3)]:\n            with pytest.raises(ValueError, match=\"First dimension must be num_samples\"):\n                f(weights=np.ones(bad_weight_shape), **params)\n\n    def test_output_dims(self):\n        ts, method, params = self.get_example()\n        weights = params.pop(\"weights\")\n        params[\"windows\"] = [0, ts.get_sequence_length()]\n\n        for mode in [\"site\", \"branch\"]:\n            out = method(weights[:, [0]], mode=mode, **params)\n            assert out.shape == (1, 1)\n            out = method(weights, mode=mode, **params)\n            assert out.shape == (1, 1)\n            out = method(weights[:, [0, 0, 0]], mode=mode, **params)\n            assert out.shape == (1, 1)\n        mode = \"node\"\n        N = ts.get_num_nodes()\n        out = method(weights[:, [0]], mode=mode, **params)\n        assert out.shape == (1, N, 1)\n        out = method(weights, mode=mode, **params)\n        assert out.shape == (1, N, 1)\n        out = method(weights[:, [0, 0, 0]], mode=mode, **params)\n        assert out.shape == (1, N, 1)\n\n    def test_set_index_errors(self):\n        ts, method, params = self.get_example()\n        del params[\"indexes\"]\n\n        def f(indexes):\n            method(indexes=indexes, **params)\n\n        for bad_array in [\"wer\", {}, [[[], []], [[], []]]]:\n            with pytest.raises(ValueError):\n                f(bad_array)\n        for bad_dim in [[[]], [[1], [1]]]:\n            with pytest.raises(ValueError):\n                f(bad_dim)\n\n\nclass ThreeWaySampleStatsMixin(SampleSetMixin):\n    \"\"\"\n    Tests for the two way sample stats.\n    \"\"\"\n\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"sample_set_sizes\": [1, 1, ts.get_num_samples() - 2],\n            \"sample_sets\": ts.get_samples(),\n            \"indexes\": [[0, 1, 2]],\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_basic_example(self):\n        ts, method = self.get_method()\n        div = method(\n            [1, 1, ts.get_num_samples() - 2],\n            ts.get_samples(),\n            [[0, 1, 2]],\n            windows=[0, ts.get_sequence_length()],\n        )\n        assert div.shape == (1, 1)\n\n    def test_output_dims(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n        for mode in [\"site\", \"branch\"]:\n            div = method([2, 2, n - 4], samples, [[0, 1, 2]], windows, mode=mode)\n            assert div.shape == (1, 1)\n            div = method(\n                [1, 1, 2, n - 4], samples, [[0, 1, 2], [1, 2, 3]], windows, mode=mode\n            )\n            assert div.shape == (1, 2)\n            div = method(\n                [1, 1, 2, n - 4],\n                samples,\n                [[0, 1, 2], [1, 2, 3], [0, 1, 2]],\n                windows,\n                mode=mode,\n            )\n            assert div.shape == (1, 3)\n\n        N = ts.get_num_nodes()\n        mode = \"node\"\n        div = method([2, 2, n - 4], samples, [[0, 1, 2]], windows, mode=mode)\n        assert div.shape == (1, N, 1)\n        div = method(\n            [1, 1, 2, n - 4], samples, [[0, 1, 2], [1, 2, 3]], windows, mode=mode\n        )\n        assert div.shape == (1, N, 2)\n        div = method(\n            [1, 1, 2, n - 4],\n            samples,\n            [[0, 1, 2], [1, 2, 3], [0, 1, 2]],\n            windows,\n            mode=mode,\n        )\n        assert div.shape == (1, N, 3)\n\n    def test_set_index_errors(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n\n        def f(indexes):\n            method([2, 2, n - 4], samples, indexes, windows)\n\n        for bad_array in [\"wer\", {}, [[[], []], [[], []]]]:\n            with pytest.raises(ValueError):\n                f(bad_array)\n        for bad_dim in [[[]], [[1], [1]], [(0, 1)], [(0, 1, 2, 3)]]:\n            with pytest.raises(ValueError):\n                f(bad_dim)\n\n\nclass FourWaySampleStatsMixin(SampleSetMixin):\n    \"\"\"\n    Tests for the four way sample stats.\n    \"\"\"\n\n    def get_example(self):\n        ts, method = self.get_method()\n        params = {\n            \"sample_set_sizes\": [1, 1, 1, ts.get_num_samples() - 3],\n            \"sample_sets\": ts.get_samples(),\n            \"indexes\": [[0, 1, 2, 3]],\n            \"windows\": [0, ts.get_sequence_length()],\n        }\n        return ts, method, params\n\n    def test_basic_example(self):\n        ts, method = self.get_method()\n        div = method(\n            [1, 1, 1, ts.get_num_samples() - 3],\n            ts.get_samples(),\n            [[0, 1, 2, 3]],\n            windows=[0, ts.get_sequence_length()],\n        )\n        assert div.shape == (1, 1)\n\n    def test_output_dims(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n        for mode in [\"site\", \"branch\"]:\n            div = method([2, 1, 1, n - 4], samples, [[0, 1, 2, 3]], windows, mode=mode)\n            assert div.shape == (1, 1)\n            div = method(\n                [1, 1, 1, 1, n - 4],\n                samples,\n                [[0, 1, 2, 3], [1, 2, 3, 4]],\n                windows,\n                mode=mode,\n            )\n            assert div.shape == (1, 2)\n            div = method(\n                [1, 1, 1, 1, n - 4],\n                samples,\n                [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 4]],\n                windows,\n                mode=mode,\n            )\n            assert div.shape == (1, 3)\n\n        N = ts.get_num_nodes()\n        mode = \"node\"\n        div = method([2, 1, 1, n - 4], samples, [[0, 1, 2, 3]], windows, mode=mode)\n        assert div.shape == (1, N, 1)\n        div = method(\n            [1, 1, 1, 1, n - 4],\n            samples,\n            [[0, 1, 2, 3], [1, 2, 3, 4]],\n            windows,\n            mode=mode,\n        )\n        assert div.shape == (1, N, 2)\n        div = method(\n            [1, 1, 1, 1, n - 4],\n            samples,\n            [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 4]],\n            windows,\n            mode=mode,\n        )\n        assert div.shape == (1, N, 3)\n\n    def test_set_index_errors(self):\n        ts, method = self.get_method()\n        samples = ts.get_samples()\n        windows = [0, ts.get_sequence_length()]\n        n = len(samples)\n\n        def f(indexes):\n            method([2, 1, 1, n - 4], samples, indexes, windows)\n\n        for bad_array in [\"wer\", {}, [[[], []], [[], []]]]:\n            with pytest.raises(ValueError):\n                f(bad_array)\n        for bad_dim in [[[]], [[1], [1]], [(0, 1)], [(0, 1, 2, 3, 4)]]:\n            with pytest.raises(ValueError):\n                f(bad_dim)\n\n\nclass TestDivergence(LowLevelTestCase, TwoWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.divergence\n\n\nclass TestY2(LowLevelTestCase, TwoWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.Y2\n\n\nclass Testf2(LowLevelTestCase, TwoWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.f2\n\n\nclass TestGeneticRelatedness(LowLevelTestCase, TwoWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.genetic_relatedness\n\n    def test_options(self):\n        ts, _, params = self.get_example()\n        x = ts.genetic_relatedness(**params)\n        new_params = params.copy()\n        new_params[\"centre\"] = False\n        y = ts.genetic_relatedness(**new_params)\n        assert x.shape == y.shape\n        new_params[\"polarised\"] = False\n        y = ts.genetic_relatedness(**new_params)\n        assert x.shape == y.shape\n        del new_params[\"centre\"]\n        y = ts.genetic_relatedness(**new_params)\n        assert x.shape == y.shape\n\n        del new_params[\"indexes\"]\n        with pytest.raises(ValueError, match=\"object of too small depth\"):\n            ts.genetic_relatedness(**new_params, indexes=\"foo\")\n\n\nclass TestY3(LowLevelTestCase, ThreeWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.Y3\n\n\nclass Testf3(LowLevelTestCase, ThreeWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.f3\n\n\nclass Testf4(LowLevelTestCase, FourWaySampleStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.f4\n\n\nclass TestWeightedGeneticRelatedness(LowLevelTestCase, TwoWayWeightedStatsMixin):\n    def get_method(self):\n        ts = self.get_example_tree_sequence()\n        return ts, ts.genetic_relatedness_weighted\n\n    def test_options(self):\n        ts, _, params = self.get_example()\n        x = ts.genetic_relatedness_weighted(**params)\n\n        new_params = params.copy()\n        new_params[\"centre\"] = False\n        y = ts.genetic_relatedness_weighted(**new_params)\n        assert x.shape == y.shape\n        new_params[\"polarised\"] = False\n        y = ts.genetic_relatedness_weighted(**new_params)\n        assert x.shape == y.shape\n        del new_params[\"centre\"]\n        y = ts.genetic_relatedness_weighted(**new_params)\n        assert x.shape == y.shape\n\n        del new_params[\"weights\"]\n        with pytest.raises(ValueError, match=\"First dimension\"):\n            ts.genetic_relatedness_weighted(\n                **new_params, weights=np.ones((ts.get_num_samples() + 2, 1))\n            )\n\n\nclass TestGeneticRelatednessVector(LowLevelTestCase):\n    def get_example(self, num_weights=2):\n        ts = self.get_example_tree_sequence()\n        num_samples = ts.get_num_samples()\n        params = {\n            \"weights\": np.linspace(0, 1, num_weights * num_samples).reshape(\n                (num_samples, num_weights)\n            ),\n            \"windows\": [0, ts.get_sequence_length()],\n            \"nodes\": list(ts.get_samples()),\n        }\n        return ts, params\n\n    @pytest.mark.parametrize(\"mode\", [\"branch\"])\n    @pytest.mark.parametrize(\"num_weights\", [1, 3])\n    def test_basic_example(self, mode, num_weights):\n        ts, params = self.get_example(num_weights)\n        ns = ts.get_num_samples()\n        params[\"mode\"] = mode\n        for a, b in ([True, True], [True, False], [False, True]):\n            params[\"span_normalise\"] = a\n            params[\"centre\"] = b\n            result = ts.genetic_relatedness_vector(**params)\n            assert result.shape == (1, ns, num_weights)\n\n    @pytest.mark.parametrize(\"mode\", [\"branch\"])\n    def test_good_nodes(self, mode):\n        num_weights = 2\n        ts, params = self.get_example(num_weights)\n        params[\"mode\"] = mode\n        for nodes in [\n            list(ts.get_samples())[:3],\n            list(ts.get_samples())[:1],\n            [0, ts.get_num_nodes() - 1],\n        ]:\n            params[\"nodes\"] = nodes\n            result = ts.genetic_relatedness_vector(**params)\n            assert result.shape == (1, len(nodes), num_weights)\n\n    def test_bad_nodes(self):\n        ts, params = self.get_example()\n        params[\"mode\"] = \"branch\"\n        for nodes in [\"abc\", [[1, 2]]]:\n            params[\"nodes\"] = nodes\n            with pytest.raises(ValueError, match=\"array\"):\n                ts.genetic_relatedness_vector(**params)\n        for nodes in [[-1, 3], [3, 2 * ts.get_num_nodes()]]:\n            params[\"nodes\"] = nodes\n            with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n                ts.genetic_relatedness_vector(**params)\n\n    def test_bad_args(self):\n        ts, params = self.get_example()\n        for mode in (\"\", \"abc\"):\n            with pytest.raises(ValueError, match=\"stats mode\"):\n                ts.genetic_relatedness_vector(\n                    params[\"weights\"], params[\"windows\"], mode, True, True\n                )\n        for mode in (None, []):\n            with pytest.raises(TypeError):\n                ts.genetic_relatedness_vector(\n                    params[\"weights\"], params[\"windows\"], mode, True, True\n                )\n        with pytest.raises(TypeError):\n            ts.genetic_relatedness_vector(\n                params[\"weights\"], params[\"windows\"], \"branch\", \"yes\", True\n            )\n        with pytest.raises(TypeError):\n            ts.genetic_relatedness_vector(\n                params[\"weights\"], params[\"windows\"], \"branch\", True, \"no\"\n            )\n\n    @pytest.mark.parametrize(\"mode\", [\"site\", \"node\"])\n    def test_modes_not_supported(self, mode):\n        ts, params = self.get_example()\n        params[\"mode\"] = mode\n        with pytest.raises(_tskit.LibraryError):\n            ts.genetic_relatedness_vector(**params)\n\n    @pytest.mark.parametrize(\"mode\", [\"branch\"])\n    def test_bad_weights(self, mode):\n        ts, params = self.get_example()\n        del params[\"weights\"]\n        ns = ts.get_num_samples()\n        for bad_weight_type in [None, [None, None]]:\n            with pytest.raises(ValueError, match=\"object of too small depth\"):\n                ts.genetic_relatedness_vector(\n                    weights=bad_weight_type, mode=mode, **params\n                )\n        for bad_weight_shape in [(ns - 1, 1), (ns + 1, 1), (0, 3)]:\n            with pytest.raises(ValueError, match=\"First dimension must be num_samples\"):\n                ts.genetic_relatedness_vector(\n                    weights=np.ones(bad_weight_shape), mode=mode, **params\n                )\n\n    def test_window_errors(self):\n        ts, params = self.get_example()\n        del params[\"windows\"]\n        for bad_array in [\"asdf\", None, [[[[]], [[]]]], np.zeros((10, 3, 4))]:\n            with pytest.raises(ValueError):\n                ts.genetic_relatedness_vector(windows=bad_array, mode=\"branch\", **params)\n\n        for bad_windows in [[], [0]]:\n            with pytest.raises(ValueError):\n                ts.genetic_relatedness_vector(\n                    windows=bad_windows, mode=\"branch\", **params\n                )\n        L = ts.get_sequence_length()\n        bad_windows = [\n            [L, 0],\n            [-1, L],\n            [0, L + 0.1],\n            [0, 0.1, 0.1, L],\n            [0, -1, L],\n        ]\n        for bad_window in bad_windows:\n            with pytest.raises(_tskit.LibraryError):\n                ts.genetic_relatedness_vector(\n                    windows=bad_window, mode=\"branch\", **params\n                )\n\n\nclass TestDecodeAlignmentsLowLevel(LowLevelTestCase):\n    def get_simple_example(self):\n        # Simple 3-sample balanced tree with two sites, mirroring high-level tests.\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        return tables.tree_sequence().ll_tree_sequence\n\n    def test_basic_bytes_roundtrip(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        buf = ts.decode_alignments(\n            ref,\n            nodes,\n            0,\n            ts.get_sequence_length(),\n            \"N\",\n            True,\n        )\n        assert isinstance(buf, (bytes, bytearray))\n        L = int(ts.get_sequence_length())\n        rows = [buf[i * L : (i + 1) * L].decode(\"ascii\") for i in range(nodes.shape[0])]\n        assert rows == [\"NNGNNNNNNT\", \"NNANNNNNNC\", \"NNANNNNNNC\"]\n\n    def test_nodes_type_and_bounds(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        # Bad nodes type\n        with pytest.raises(ValueError, match=\"array\"):\n            ts.decode_alignments(ref, [[0, 1]], 0, ts.get_sequence_length(), \"N\", True)\n        # Out of bounds\n        bad_nodes = np.array([ts.get_num_nodes()], dtype=np.int32)\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n            ts.decode_alignments(ref, bad_nodes, 0, ts.get_sequence_length(), \"N\", True)\n\n    def test_missing_char_validation(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        # missing_data_character must be str of length 1\n        with pytest.raises(TypeError, match=\"single character\"):\n            ts.decode_alignments(ref, nodes, 0, ts.get_sequence_length(), \"NN\", True)\n\n    def test_argument_parsing_error(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        # left must be a float-like value\n        with pytest.raises(TypeError):\n            ts.decode_alignments(\n                ref, nodes, \"bad_left\", ts.get_sequence_length(), \"N\", True\n            )\n\n    def test_reference_sequence_type_validation(self):\n        ts = self.get_simple_example()\n        ref = \"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        with pytest.raises(TypeError, match=\"must be bytes\"):\n            ts.decode_alignments(ref, nodes, 0, ts.get_sequence_length(), \"N\", True)\n\n    def test_missing_char_type_validation(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        with pytest.raises(TypeError, match=\"length 1\"):\n            ts.decode_alignments(ref, nodes, 0, ts.get_sequence_length(), b\"N\", True)\n\n    def test_missing_char_unicode_error(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        with pytest.raises(UnicodeEncodeError):\n            ts.decode_alignments(\n                ref,\n                nodes,\n                0,\n                ts.get_sequence_length(),\n                NON_UTF8_STRING,\n                True,\n            )\n\n    def test_isolated_as_missing_flag_false(self):\n        ts = self.get_simple_example()\n        ref = b\"NNNNNNNNNN\"\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        buf = ts.decode_alignments(\n            ref,\n            nodes,\n            0,\n            ts.get_sequence_length(),\n            \"N\",\n            False,\n        )\n        assert isinstance(buf, (bytes, bytearray))\n        L = int(ts.get_sequence_length())\n        rows = [buf[i * L : (i + 1) * L].decode(\"ascii\") for i in range(nodes.shape[0])]\n        assert rows == [\"NNGNNNNNNT\", \"NNANNNNNNC\", \"NNANNNNNNC\"]\n\n    def test_length_and_interval_validation(self):\n        ts = self.get_simple_example()\n        nodes = np.array(ts.get_samples(), dtype=np.int32)\n        # Wrong reference length\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_PARAM_VALUE\"):\n            ts.decode_alignments(\n                b\"NNNNNNNNN\", nodes, 0, ts.get_sequence_length(), \"N\", True\n            )\n        # Negative left\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_PARAM_VALUE\"):\n            ts.decode_alignments(\n                b\"NNNNNNNNNN\", nodes, -1, ts.get_sequence_length(), \"N\", True\n            )\n        # Non-integer left\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_PARAM_VALUE\"):\n            ts.decode_alignments(\n                b\"NNNNNNNNNN\", nodes, 0.5, ts.get_sequence_length(), \"N\", True\n            )\n\n\nclass TestGeneralStatsInterface(LowLevelTestCase, StatsInterfaceMixin):\n    \"\"\"\n    Tests for the general stats interface.\n    \"\"\"\n\n    def get_example(self):\n        ts = self.get_example_tree_sequence()\n        W = np.zeros((ts.get_num_samples(), 1))\n        params = {\n            \"weights\": W,\n            \"summary_func\": lambda x: np.cumsum(x),\n            \"output_dim\": 1,\n            \"windows\": ts.get_breakpoints(),\n        }\n        return ts, ts.general_stat, params\n\n    def test_basic_example(self):\n        ts = self.get_example_tree_sequence()\n        W = np.zeros((ts.get_num_samples(), 1))\n        sigma = ts.general_stat(\n            W, lambda x: np.cumsum(x), 1, ts.get_breakpoints(), mode=\"branch\"\n        )\n        assert sigma.shape == (ts.get_num_trees(), 1)\n\n    def test_non_numpy_return(self):\n        ts = self.get_example_tree_sequence()\n        W = np.ones((ts.get_num_samples(), 3))\n        sigma = ts.general_stat(\n            W, lambda x: [sum(x)], 1, ts.get_breakpoints(), mode=\"branch\"\n        )\n        assert sigma.shape == (ts.get_num_trees(), 1)\n        sigma = ts.general_stat(\n            W, lambda x: [2, 2], 2, ts.get_breakpoints(), mode=\"branch\"\n        )\n        assert sigma.shape == (ts.get_num_trees(), 2)\n\n    def test_complicated_numpy_function(self):\n        ts = self.get_example_tree_sequence(sample_size=20, length=30, random_seed=325)\n        W = np.zeros((ts.get_num_samples(), 4))\n\n        def f(x):\n            y = np.sum(x * x), np.prod(x + np.arange(x.shape[0]))\n            return y\n\n        sigma = ts.general_stat(W, f, 2, ts.get_breakpoints(), mode=\"branch\")\n        assert sigma.shape == (ts.get_num_trees(), 2)\n\n    def test_input_dims(self):\n        ts = self.get_example_tree_sequence()\n        for k in range(1, 20):\n            W = np.zeros((ts.get_num_samples(), k))\n            sigma = ts.general_stat(\n                W, lambda x: np.cumsum(x), k, ts.get_breakpoints(), mode=\"branch\"\n            )\n            assert sigma.shape == (ts.get_num_trees(), k)\n            sigma = ts.general_stat(\n                W, lambda x: [np.sum(x)], 1, ts.get_breakpoints(), mode=\"branch\"\n            )\n            assert sigma.shape == (ts.get_num_trees(), 1)\n\n    def test_W_errors(self):\n        ts = self.get_example_tree_sequence()\n        n = ts.get_num_samples()\n        for bad_array in [[], [0, 1], [[[[]], [[]]]], np.zeros((10, 3, 4))]:\n            with pytest.raises(ValueError):\n                ts.general_stat(bad_array, lambda x: x, 1, ts.get_breakpoints())\n\n        for bad_size in [n - 1, n + 1, 0]:\n            W = np.zeros((bad_size, 1))\n            with pytest.raises(ValueError):\n                ts.general_stat(W, lambda x: x, 1, ts.get_breakpoints())\n\n    def test_summary_func_errors(self):\n        ts = self.get_example_tree_sequence()\n        W = np.zeros((ts.get_num_samples(), 1))\n        for bad_type in [\"sdf\", 1, {}]:\n            with pytest.raises(TypeError):\n                ts.general_stat(W, bad_type, 1, ts.get_breakpoints())\n\n        # Wrong numbers of arguments to f\n        with pytest.raises(TypeError):\n            ts.general_stat(W, lambda: 0, 1, ts.get_breakpoints())\n        with pytest.raises(TypeError):\n            ts.general_stat(W, lambda x, y: None, 1, ts.get_breakpoints())\n\n        # Exceptions within f are correctly raised.\n        for exception in [ValueError, TypeError]:\n\n            def f(x):\n                raise exception(\"test\")  # noqa: B023\n\n            with pytest.raises(exception):\n                ts.general_stat(W, f, 1, ts.get_breakpoints())\n\n        # Wrong output dimensions\n        for bad_array in [[1, 1], range(10)]:\n            with pytest.raises(ValueError):\n                ts.general_stat(\n                    W,\n                    lambda x: bad_array,  # noqa:B023\n                    1,\n                    ts.get_breakpoints(),\n                )\n        with pytest.raises(ValueError):\n            ts.general_stat(W, lambda x: [1], 2, ts.get_breakpoints())\n\n        # Bad arrays returned from f\n        for bad_array in [[\"sdf\"], 0, \"w4\", None]:\n            with pytest.raises(ValueError):\n                ts.general_stat(\n                    W,\n                    lambda x: bad_array,  # noqa:B023\n                    1,\n                    ts.get_breakpoints(),\n                )\n\n\nclass TestVariant(LowLevelTestCase):\n    \"\"\"\n    Tests for the Variant class.\n    \"\"\"\n\n    def test_uninitialised_tree_sequence(self):\n        ts = _tskit.TreeSequence()\n        with pytest.raises(ValueError):\n            _tskit.Variant(ts)\n\n    def test_constructor(self):\n        with pytest.raises(TypeError):\n            _tskit.Variant()\n        with pytest.raises(TypeError):\n            _tskit.Variant(None)\n        ts = self.get_example_tree_sequence()\n        with pytest.raises(ValueError):\n            _tskit.Variant(ts, samples={})\n        with pytest.raises(TypeError):\n            _tskit.Variant(ts, isolated_as_missing=None)\n        with pytest.raises(_tskit.LibraryError):\n            _tskit.Variant(ts, samples=[-1, 2])\n        with pytest.raises(TypeError):\n            _tskit.Variant(ts, alleles=1234)\n\n    def test_bad_decode(self):\n        ts = self.get_example_tree_sequence()\n        variant = _tskit.Variant(ts)\n        with pytest.raises(tskit.LibraryError, match=\"Site out of bounds\"):\n            variant.decode(-1)\n        with pytest.raises(TypeError):\n            variant.decode(\"42\")\n        with pytest.raises(TypeError):\n            variant.decode({})\n        with pytest.raises(TypeError):\n            variant.decode()\n\n    def test_alleles(self):\n        ts = self.get_example_tree_sequence()\n        for bad_type in [[\"a\", \"b\"], \"sdf\", 234]:\n            with pytest.raises(TypeError):\n                _tskit.Variant(ts, samples=[1, 2], alleles=bad_type)\n        with pytest.raises(ValueError):\n            _tskit.Variant(ts, samples=[1, 2], alleles=tuple())\n\n        for bad_allele_type in [None, 0, b\"x\", []]:\n            with pytest.raises(TypeError):\n                _tskit.Variant(ts, samples=[1, 2], alleles=(bad_allele_type,))\n\n    def test_samples(self):\n        ts = self.get_example_tree_sequence()\n        v = _tskit.Variant(ts, samples=None, alleles=None)\n        assert np.array_equal(\n            v.samples, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int32)\n        )\n        v = _tskit.Variant(ts, samples=[4, 2], alleles=None)\n        assert np.array_equal(v.samples, np.array([4, 2], dtype=np.int32))\n        v = _tskit.Variant(ts, samples=[], alleles=None)\n        assert np.array_equal(v.samples, np.array([], dtype=np.int32))\n        with pytest.raises(AttributeError):\n            v.samples = [1]\n\n    def test_isolated_as_missing(self):\n        ts = self.get_example_tree_sequence()\n        v = _tskit.Variant(ts)\n        assert v.isolated_as_missing\n        v = _tskit.Variant(ts, isolated_as_missing=True)\n        assert v.isolated_as_missing\n        v = _tskit.Variant(ts, isolated_as_missing=False)\n        assert not v.isolated_as_missing\n\n    def test_undecoded(self):\n        tables = _tskit.TableCollection(1)\n        tables.build_index()\n        ts = _tskit.TreeSequence(0)\n        ts.load_tables(tables)\n        variant = _tskit.Variant(ts)\n        assert variant.site_id == tskit.NULL\n        assert np.array_equal(variant.genotypes, [])\n        assert variant.alleles == ()\n        assert np.array_equal(variant.samples, ())\n        assert variant.isolated_as_missing\n\n    def test_properties_unwritable(self):\n        ts = self.get_example_tree_sequence()\n        variant = _tskit.Variant(ts)\n        with pytest.raises(AttributeError):\n            variant.site_id = 1\n        with pytest.raises(AttributeError):\n            variant.genotypes = [1]\n        with pytest.raises(AttributeError):\n            variant.samples = [1]\n        with pytest.raises(AttributeError):\n            variant.isolated_as_missing = False\n        with pytest.raises(AttributeError):\n            variant.alleles = \"A\"\n\n    def test_missing_data(self):\n        tables = _tskit.TableCollection(1)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.sites.add_row(0.1, \"A\")\n        tables.build_index()\n        ts = _tskit.TreeSequence(0)\n        ts.load_tables(tables)\n        variant = _tskit.Variant(ts)\n        variant.decode(0)\n        assert variant.site_id == 0\n        assert np.array_equal(variant.genotypes, [-1, -1])\n        assert variant.alleles == (\"A\", None)\n\n    def test_variants_lifecycle(self):\n        ts = self.get_example_tree_sequence(random_seed=42)\n        variant = _tskit.Variant(ts)\n        variant.decode(0)\n        genotypes = variant.genotypes\n        expected = [1, 0, 0, 1, 0, 0, 0, 0, 1, 1]\n        assert np.array_equal(genotypes, expected)\n        del variant\n        assert np.array_equal(genotypes, expected)\n        variant = _tskit.Variant(ts)\n        del ts\n        variant.decode(0)\n        del variant\n        assert np.array_equal(genotypes, expected)\n\n    @pytest.mark.parametrize(\"isolated_as_missing\", [True, False])\n    @pytest.mark.parametrize(\"samples\", [None, [], (0,)])\n    @pytest.mark.parametrize(\"alleles\", [None, (\"1\", \"0\")])\n    def test_copy(self, isolated_as_missing, samples, alleles):\n        ts = self.get_example_tree_sequence(random_seed=42)\n        variant = _tskit.Variant(\n            ts,\n            isolated_as_missing=isolated_as_missing,\n            samples=samples,\n            alleles=alleles,\n        )\n\n        # Test taking a copy before decode\n        variant2 = variant.restricted_copy()\n        assert variant.site_id == variant2.site_id\n        assert variant.alleles == variant2.alleles\n        assert np.array_equal(variant.genotypes, variant2.genotypes)\n        assert np.array_equal(variant.samples, variant2.samples)\n        assert variant.isolated_as_missing == variant2.isolated_as_missing\n\n        variant.decode(0)\n        # Everything below should work even if the Python ts is free'd\n        del ts\n        variant2 = variant.restricted_copy()\n        assert variant.site_id == variant2.site_id\n        assert variant.alleles == variant2.alleles\n        assert np.array_equal(variant.genotypes, variant2.genotypes)\n        assert np.array_equal(variant.samples, variant2.samples)\n        assert variant.isolated_as_missing == variant2.isolated_as_missing\n\n        # Take a copy for comparison, then move the variant to check the copy\n        # doesn't move too\n        genotypes = variant.genotypes\n        genotypes_copy = np.array(variant.genotypes)\n        alleles = variant.alleles\n        site_id = variant.site_id\n        variant.decode(1)\n        with pytest.raises(tskit.LibraryError, match=\"Can't decode a copy of a variant\"):\n            variant2.decode(1)\n        assert site_id == variant2.site_id\n        assert alleles == variant2.alleles\n        # Other properties shouldn't have changed\n        assert np.array_equal(variant.samples, variant2.samples)\n        assert variant.isolated_as_missing == variant2.isolated_as_missing\n\n        # Variant should be equal to the copy we took earlier\n        assert np.array_equal(genotypes_copy, variant2.genotypes)\n        # But not equal to the un-copies genotypes anymore as they\n        # have decoded a new site as a side effect of reusing the\n        # array when decoding\n        assert len(variant.samples) == 0 or not np.array_equal(\n            genotypes, variant2.genotypes\n        )\n\n        # Check the lifecycle of copies and copies of copies\n        del variant\n        variant3 = variant2.restricted_copy()\n        del variant2\n        assert np.array_equal(genotypes_copy, variant3.genotypes)\n        genotypes3 = variant3.genotypes\n        del variant3\n        assert np.array_equal(genotypes_copy, genotypes3)\n\n\nclass TestLdCalculator(LowLevelTestCase):\n    \"\"\"\n    Tests for the LdCalculator class.\n    \"\"\"\n\n    def test_uninitialised_tree_sequence(self):\n        ts = _tskit.TreeSequence()\n        with pytest.raises(ValueError):\n            _tskit.LdCalculator(ts)\n\n    def test_constructor(self):\n        with pytest.raises(TypeError):\n            _tskit.LdCalculator()\n        with pytest.raises(TypeError):\n            _tskit.LdCalculator(None)\n\n    def test_get_r2(self):\n        ts = self.get_example_tree_sequence()\n        calc = _tskit.LdCalculator(ts)\n        n = ts.get_num_sites()\n        for bad_id in [-1, n, n + 1]:\n            with pytest.raises(_tskit.LibraryError):\n                calc.get_r2(0, bad_id)\n            with pytest.raises(_tskit.LibraryError):\n                calc.get_r2(bad_id, 0)\n\n    def test_get_r2_array(self):\n        ts = self.get_example_tree_sequence()\n        calc = _tskit.LdCalculator(ts)\n\n        n = ts.get_num_sites()\n        assert n > 2\n\n        with pytest.raises(ValueError):\n            calc.get_r2_array(0, max_distance=-1)\n        with pytest.raises(ValueError):\n            calc.get_r2_array(0, direction=1000)\n\n        for bad_max_sites in [-2, -3]:\n            with pytest.raises(ValueError, match=\"cannot be negative\"):\n                calc.get_r2_array(0, max_sites=bad_max_sites)\n        for bad_start_pos in [-1, n, n + 1]:\n            with pytest.raises(_tskit.LibraryError):\n                calc.get_r2_array(bad_start_pos)\n\n    def test_r2_array_properties(self):\n        ts = self.get_example_tree_sequence()\n        calc = _tskit.LdCalculator(ts)\n        n = ts.get_num_sites()\n        a = calc.get_r2_array(0)\n        assert a.shape == (n - 1,)\n        assert a.dtype == np.float64\n        assert not a.flags.writeable\n        assert a.flags.aligned\n        assert a.flags.c_contiguous\n        assert a.flags.owndata\n\n    def test_r2_array_lifetime(self):\n        ts = self.get_example_tree_sequence()\n        calc = _tskit.LdCalculator(ts)\n        n = ts.get_num_sites()\n\n        a1 = calc.get_r2_array(0)\n        assert a1.shape[0] == n - 1\n        a2 = a1.copy()\n        assert a1 is not a2\n        del calc\n        # Do some memory operations\n        a3 = np.ones(10**6)\n        assert np.all(a1 == a2)\n        del ts\n        assert np.all(a1 == a2)\n        del a1\n        # Just do something to touch memory\n        a2[:] = 0\n        assert a3 is not a2\n\n\nclass TestLsHmm(LowLevelTestCase):\n    \"\"\"\n    Tests for the LsHmm class.\n    \"\"\"\n\n    def test_uninitialised_tree_sequence(self):\n        ts = _tskit.TreeSequence()\n        with pytest.raises(ValueError):\n            _tskit.LsHmm(ts, None, None)\n\n    def test_constructor(self):\n        ts = self.get_example_tree_sequence()\n        with pytest.raises(TypeError):\n            _tskit.LsHmm()\n        with pytest.raises(TypeError):\n            _tskit.LsHmm(None)\n        values = np.zeros(ts.get_num_sites())\n        for bad_array in [\"asdf\", [[], []], None]:\n            with pytest.raises(ValueError):\n                _tskit.LsHmm(ts, bad_array, values)\n            with pytest.raises(ValueError):\n                _tskit.LsHmm(ts, values, bad_array)\n\n    def test_bad_rate_arrays(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        assert m > 0\n        values = np.zeros(m)\n        for bad_size in [0, m - 1, m + 1, m + 2]:\n            bad_array = np.zeros(bad_size)\n            with pytest.raises(ValueError):\n                _tskit.LsHmm(ts, bad_array, values)\n            with pytest.raises(ValueError):\n                _tskit.LsHmm(ts, values, bad_array)\n\n    def test_haplotype_input(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        fm = _tskit.CompressedMatrix(ts)\n        vm = _tskit.ViterbiMatrix(ts)\n        norm = np.ones(m)\n        ls_hmm = _tskit.LsHmm(ts, np.zeros(m), np.zeros(m))\n        for bad_size in [0, m - 1, m + 1, m + 2]:\n            bad_array = np.zeros(bad_size, dtype=np.int8)\n            with pytest.raises(ValueError, match=\"haplotype array\"):\n                ls_hmm.forward_matrix(bad_array, fm)\n            with pytest.raises(ValueError, match=\"haplotype array\"):\n                ls_hmm.backward_matrix(bad_array, norm, fm)\n            with pytest.raises(ValueError, match=\"haplotype array\"):\n                ls_hmm.viterbi_matrix(bad_array, vm)\n        for bad_array in [[0.002], [[], []], None]:\n            with pytest.raises(ValueError):\n                ls_hmm.forward_matrix(bad_array, fm)\n            with pytest.raises(ValueError):\n                ls_hmm.viterbi_matrix(bad_array, vm)\n            with pytest.raises(ValueError):\n                ls_hmm.backward_matrix(bad_array, norm, fm)\n\n    def test_norm_input(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        cm = _tskit.CompressedMatrix(ts)\n        h = np.zeros(m, dtype=np.int32)\n        ls_hmm = _tskit.LsHmm(ts, np.zeros(m), np.zeros(m))\n        for bad_size in [0, m - 1, m + 1, m + 2]:\n            bad_array = np.zeros(bad_size)\n            with pytest.raises(ValueError, match=\"forward_norm array\"):\n                ls_hmm.backward_matrix(h, bad_array, cm)\n\n        for bad_array in [[0.002], [[], []], None]:\n            with pytest.raises(ValueError):\n                ls_hmm.backward_matrix(h, bad_array, cm)\n\n    def test_output_type_errors(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        h = np.zeros(m, dtype=np.int8)\n        norm = np.ones(m)\n        ls_hmm = _tskit.LsHmm(ts, np.zeros(m), np.zeros(m))\n        for bad_type in [ls_hmm, None, m, []]:\n            with pytest.raises(TypeError):\n                ls_hmm.forward_matrix(h, bad_type)\n            with pytest.raises(TypeError):\n                ls_hmm.viterbi_matrix(h, bad_type)\n            with pytest.raises(TypeError):\n                ls_hmm.backward_matrix(h, norm, bad_type)\n\n        other_ts = self.get_example_tree_sequence()\n        output = _tskit.CompressedMatrix(other_ts)\n        with pytest.raises(_tskit.LibraryError):\n            ls_hmm.forward_matrix(h, output)\n        with pytest.raises(_tskit.LibraryError):\n            ls_hmm.backward_matrix(h, norm, output)\n        output = _tskit.ViterbiMatrix(other_ts)\n        with pytest.raises(_tskit.LibraryError):\n            ls_hmm.viterbi_matrix(h, output)\n\n    def test_empty_forward_matrix(self):\n        for mu in [0, 1]:\n            ts = self.get_example_tree_sequence(mutation_rate=mu)\n            m = ts.get_num_sites()\n            fm = _tskit.CompressedMatrix(ts)\n            assert fm.num_sites == m\n            assert np.array_equal(np.zeros(m), fm.normalisation_factor)\n            assert np.array_equal(np.zeros(m, dtype=np.uint32), fm.num_transitions)\n            F = fm.decode()\n            assert np.all(F >= 0)\n            for j in range(m):\n                assert fm.get_site(j) == []\n\n    def test_empty_viterbi_matrix(self):\n        for mu in [0, 1]:\n            ts = self.get_example_tree_sequence(mutation_rate=mu)\n            m = ts.get_num_sites()\n            vm = _tskit.ViterbiMatrix(ts)\n            assert vm.num_sites == m\n            # TODO we should have the same semantics for 0 sites\n            if m == 0:\n                h = vm.traceback()\n                assert len(h) == 0\n            else:\n                with pytest.raises(_tskit.LibraryError):\n                    vm.traceback()\n\n    def verify_compressed_matrix(self, ts, output):\n        S = output.normalisation_factor\n        N = output.num_transitions\n        assert np.all(0 < S)\n        assert np.all(S < 1)\n        assert np.all(N > 0)\n        F = output.decode()\n        assert F.shape == (ts.get_num_sites(), ts.get_num_samples())\n        assert np.all(F >= 0)\n        m = ts.get_num_sites()\n        for j in range(m):\n            site_list = output.get_site(j)\n            assert len(site_list) == N[j]\n            for item in site_list:\n                assert len(item) == 2\n                node, value = item\n                assert 0 <= node < ts.get_num_nodes()\n                assert value >= 0\n        for site in [m, m + 1, 2 * m]:\n            with pytest.raises(ValueError):\n                output.get_site(site)\n\n    def test_forward_matrix(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        output = _tskit.CompressedMatrix(ts)\n        ls_hmm = _tskit.LsHmm(ts, np.zeros(m) + 0.1, np.zeros(m) + 0.1)\n        rv = ls_hmm.forward_matrix([0 for _ in range(m)], output)\n        assert rv is None\n        self.verify_compressed_matrix(ts, output)\n\n    def test_backward_matrix(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        fm = _tskit.CompressedMatrix(ts)\n        bm = _tskit.CompressedMatrix(ts)\n        h = np.zeros(m, dtype=np.int32)\n        ls_hmm = _tskit.LsHmm(ts, np.zeros(m) + 0.1, np.zeros(m) + 0.1)\n        ls_hmm.forward_matrix(h, fm)\n        ls_hmm.backward_matrix(h, fm.normalisation_factor, bm)\n        self.verify_compressed_matrix(ts, bm)\n\n    def test_viterbi_matrix(self):\n        ts = self.get_example_tree_sequence()\n        m = ts.get_num_sites()\n        output = _tskit.ViterbiMatrix(ts)\n        ls_hmm = _tskit.LsHmm(ts, np.zeros(m) + 0.1, np.zeros(m) + 0.1)\n        rv = ls_hmm.viterbi_matrix([0 for _ in range(m)], output)\n        assert rv is None\n        self.verify_compressed_matrix(ts, output)\n        h = output.traceback()\n        assert isinstance(h, np.ndarray)\n\n\nclass TestTree(LowLevelTestCase):\n    \"\"\"\n    Tests on the low-level tree interface.\n    \"\"\"\n\n    ARRAY_NAMES = [\n        \"parent\",\n        \"left_child\",\n        \"right_child\",\n        \"left_sib\",\n        \"right_sib\",\n        \"num_children\",\n        \"edge\",\n    ]\n\n    def test_options(self):\n        ts = self.get_example_tree_sequence()\n        st = _tskit.Tree(ts)\n        assert st.get_options() == 0\n        all_options = [\n            0,\n            _tskit.NO_SAMPLE_COUNTS,\n            _tskit.SAMPLE_LISTS,\n            _tskit.NO_SAMPLE_COUNTS | _tskit.SAMPLE_LISTS,\n        ]\n        for options in all_options:\n            tree = _tskit.Tree(ts, options=options)\n            copy = tree.copy()\n            for st in [tree, copy]:\n                assert st.get_options() == options\n                assert st.get_num_samples(0) == 1\n                if options & _tskit.NO_SAMPLE_COUNTS:\n                    # We should still be able to count the samples, just inefficiently.\n                    assert st.get_num_samples(0) == 1\n                    with pytest.raises(_tskit.LibraryError):\n                        st.get_num_tracked_samples(0)\n                else:\n                    assert st.get_num_tracked_samples(0) == 0\n                if options & _tskit.SAMPLE_LISTS:\n                    assert 0 == st.get_left_sample(0)\n                    assert 0 == st.get_right_sample(0)\n                else:\n                    with pytest.raises(ValueError):\n                        st.get_left_sample(0)\n                    with pytest.raises(ValueError):\n                        st.get_right_sample(0)\n                    with pytest.raises(ValueError):\n                        st.get_next_sample(0)\n\n    def test_site_errors(self):\n        ts = self.get_example_tree_sequence()\n        for bad_index in [-1, ts.get_num_sites(), ts.get_num_sites() + 1]:\n            with pytest.raises(IndexError):\n                ts.get_site(bad_index)\n\n    def test_mutation_errors(self):\n        ts = self.get_example_tree_sequence()\n        for bad_index in [-1, ts.get_num_mutations(), ts.get_num_mutations() + 1]:\n            with pytest.raises(IndexError):\n                ts.get_mutation(bad_index)\n\n    def test_individual_errors(self):\n        ts = self.get_example_tree_sequence()\n        for bad_index in [-1, ts.get_num_individuals(), ts.get_num_individuals() + 1]:\n            with pytest.raises(IndexError):\n                ts.get_individual(bad_index)\n\n    def test_population_errors(self):\n        ts = self.get_example_tree_sequence()\n        for bad_index in [-1, ts.get_num_populations(), ts.get_num_populations() + 1]:\n            with pytest.raises(IndexError):\n                ts.get_population(bad_index)\n\n    def test_provenance_errors(self):\n        ts = self.get_example_tree_sequence()\n        for bad_index in [-1, ts.get_num_provenances(), ts.get_num_provenances() + 1]:\n            with pytest.raises(IndexError):\n                ts.get_provenance(bad_index)\n\n    def test_sites(self):\n        for ts in self.get_example_tree_sequences():\n            st = _tskit.Tree(ts)\n            all_sites = [ts.get_site(j) for j in range(ts.get_num_sites())]\n            all_tree_sites = []\n            j = 0\n            mutation_id = 0\n            while st.next():\n                tree_sites = st.get_sites()\n                assert st.get_num_sites() == len(tree_sites)\n                all_tree_sites.extend(tree_sites)\n                for (\n                    position,\n                    ancestral_state,\n                    mutations,\n                    index,\n                    metadata,\n                ) in tree_sites:\n                    assert st.get_left() <= position < st.get_right()\n                    assert isinstance(ancestral_state, str)\n                    assert index == j\n                    assert metadata == b\"\"\n                    for mut_id in mutations:\n                        (\n                            site,\n                            node,\n                            derived_state,\n                            parent,\n                            metadata,\n                            time,\n                            edge,\n                            inherited_state,\n                        ) = ts.get_mutation(mut_id)\n                        assert site == index\n                        assert mutation_id == mut_id\n                        assert st.get_parent(node) != _tskit.NULL\n                        assert metadata == b\"\"\n                        assert edge != _tskit.NULL\n                        mutation_id += 1\n                    j += 1\n            assert all_tree_sites == all_sites\n\n    def test_root_threshold_errors(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        for bad_type in [\"\", \"x\", {}]:\n            with pytest.raises(TypeError):\n                tree.set_root_threshold(bad_type)\n\n        with pytest.raises(_tskit.LibraryError):\n            tree.set_root_threshold(0)\n        tree.set_root_threshold(2)\n        # Setting when not in the null state raises an error\n        tree.next()\n        with pytest.raises(_tskit.LibraryError):\n            tree.set_root_threshold(2)\n\n    def test_seek_errors(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        for bad_type in [\"\", \"x\", {}]:\n            with pytest.raises(TypeError):\n                tree.seek(bad_type)\n        for bad_pos in [-1, 1e6]:\n            with pytest.raises(_tskit.LibraryError):\n                tree.seek(bad_pos)\n\n    def seek_skip_errors(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        for bad_type in [\"\", \"x\", {}]:\n            with pytest.raises(TypeError):\n                tree.seek(0, bad_type)\n            with pytest.raises(TypeError):\n                tree.seek_index(0, bad_type)\n\n    def test_seek_index_errors(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        for bad_type in [\"\", \"x\", {}]:\n            with pytest.raises(TypeError):\n                tree.seek_index(bad_type)\n        for bad_index in [-1, 10**6]:\n            with pytest.raises(_tskit.LibraryError):\n                tree.seek_index(bad_index)\n\n    @pytest.mark.parametrize(\"skip\", [True, False])\n    def test_seek_zero(self, skip):\n        ts = self.get_example_tree_sequence()\n        tree1 = _tskit.Tree(ts)\n        tree1.seek_index(0, skip)\n        assert tree1.get_left() == 0\n        tree2 = _tskit.Tree(ts)\n        tree2.seek(0, skip)\n        assert tree2.get_left() == 0\n\n    def test_root_threshold(self):\n        for ts in self.get_example_tree_sequences():\n            tree = _tskit.Tree(ts)\n            for root_threshold in [1, 2, ts.get_num_samples() * 2]:\n                tree.set_root_threshold(root_threshold)\n                assert tree.get_root_threshold() == root_threshold\n                while tree.next():\n                    assert tree.get_root_threshold() == root_threshold\n                    with pytest.raises(_tskit.LibraryError):\n                        tree.set_root_threshold(2)\n                assert tree.get_root_threshold() == root_threshold\n\n    def test_constructor(self):\n        with pytest.raises(TypeError):\n            _tskit.Tree()\n        for bad_type in [\"\", {}, [], None, 0]:\n            with pytest.raises(TypeError):\n                _tskit.Tree(bad_type)\n        ts = self.get_example_tree_sequence()\n        for bad_type in [\"\", {}, True, 1, None]:\n            with pytest.raises(TypeError):\n                _tskit.Tree(ts, tracked_samples=bad_type)\n        for bad_type in [\"\", {}, None, []]:\n            with pytest.raises(TypeError):\n                _tskit.Tree(ts, options=bad_type)\n        for ts in self.get_example_tree_sequences():\n            st = _tskit.Tree(ts)\n            # An uninitialised tree should always be zero.\n            samples = ts.get_samples()\n            assert st.get_left_child(st.get_virtual_root()) == samples[0]\n            assert st.get_right_child(st.get_virtual_root()) == samples[-1]\n            assert st.get_left() == 0\n            assert st.get_right() == 0\n            for j in range(ts.get_num_samples()):\n                assert st.get_parent(j) == _tskit.NULL\n                assert st.get_children(j) == tuple()\n                assert st.get_time(j) == 0\n\n    def test_bad_tracked_samples(self):\n        ts = self.get_example_tree_sequence()\n        options = 0\n        for bad_type in [\"\", {}, [], None]:\n            with pytest.raises(TypeError):\n                _tskit.Tree(ts, options=options, tracked_samples=[bad_type])\n            with pytest.raises(TypeError):\n                _tskit.Tree(\n                    ts,\n                    options=options,\n                    tracked_samples=[1, bad_type],\n                )\n        for bad_sample in [10**6, -1e6]:\n            with pytest.raises(ValueError):\n                _tskit.Tree(\n                    ts,\n                    options=options,\n                    tracked_samples=[bad_sample],\n                )\n            with pytest.raises(ValueError):\n                _tskit.Tree(\n                    ts,\n                    options=options,\n                    tracked_samples=[1, bad_sample],\n                )\n            with pytest.raises(ValueError):\n                _tskit.Tree(ts, tracked_samples=[1, bad_sample, 1])\n\n    def test_while_loop_semantics(self):\n        for ts in self.get_example_tree_sequences():\n            tree = _tskit.Tree(ts)\n            # Any mixture of prev and next is OK and gives a valid iteration.\n            for _ in range(2):\n                j = 0\n                while tree.next():\n                    assert tree.get_index() == j\n                    j += 1\n                assert j == ts.get_num_trees()\n            for _ in range(2):\n                j = ts.get_num_trees()\n                while tree.prev():\n                    assert tree.get_index() == j - 1\n                    j -= 1\n                assert j == 0\n            j = 0\n            while tree.next():\n                assert tree.get_index() == j\n                j += 1\n            assert j == ts.get_num_trees()\n\n    def test_count_all_samples(self):\n        for ts in self.get_example_tree_sequences():\n            st = _tskit.Tree(ts)\n            # Without initialisation we should be 0 samples for every node\n            # that is not a sample.\n            for j in range(ts.get_num_nodes()):\n                count = 1 if j < ts.get_num_samples() else 0\n                assert st.get_num_samples(j) == count\n                assert st.get_num_tracked_samples(j) == 0\n            while st.next():\n                nu = get_sample_counts(ts, st)\n                nu_prime = [st.get_num_samples(j) for j in range(ts.get_num_nodes())]\n                assert nu == nu_prime\n                # For tracked samples, this should be all zeros.\n                nu = [st.get_num_tracked_samples(j) for j in range(ts.get_num_nodes())]\n                assert nu == list(0 for _ in nu)\n\n    def test_count_tracked_samples(self):\n        # Ensure that there are some non-binary nodes.\n        non_binary = False\n        for ts in self.get_example_tree_sequences():\n            st = _tskit.Tree(ts)\n            while st.next():\n                for u in range(ts.get_num_nodes()):\n                    if len(st.get_children(u)) > 1:\n                        non_binary = True\n            samples = [j for j in range(ts.get_num_samples())]\n            powerset = itertools.chain.from_iterable(\n                itertools.combinations(samples, r) for r in range(len(samples) + 1)\n            )\n            max_sets = 100\n            for _, subset in zip(range(max_sets), map(list, powerset)):\n                # Ordering shouldn't make any difference.\n                random.shuffle(subset)\n                st = _tskit.Tree(ts, tracked_samples=subset)\n                while st.next():\n                    nu = get_tracked_sample_counts(ts, st, subset)\n                    nu_prime = [\n                        st.get_num_tracked_samples(j) for j in range(ts.get_num_nodes())\n                    ]\n                    assert nu == nu_prime\n            # Passing duplicated values should raise an error\n            sample = 1\n            for j in range(2, 20):\n                tracked_samples = [sample for _ in range(j)]\n                with pytest.raises(_tskit.LibraryError):\n                    _tskit.Tree(\n                        ts,\n                        tracked_samples=tracked_samples,\n                    )\n        assert non_binary\n\n    def test_bounds_checking(self):\n        for ts in self.get_example_tree_sequences():\n            n = ts.get_num_nodes()\n            st = _tskit.Tree(ts, options=_tskit.SAMPLE_LISTS)\n            for v in [-100, -1, n + 1, n + 100, n * 100]:\n                with pytest.raises(ValueError):\n                    st.get_parent(v)\n                with pytest.raises(ValueError):\n                    st.get_children(v)\n                with pytest.raises(ValueError):\n                    st.get_time(v)\n                with pytest.raises(ValueError):\n                    st.get_left_sample(v)\n                with pytest.raises(ValueError):\n                    st.get_right_sample(v)\n                with pytest.raises(ValueError):\n                    st.is_descendant(v, 0)\n                with pytest.raises(ValueError):\n                    st.is_descendant(0, v)\n                with pytest.raises(ValueError):\n                    st.depth(v)\n            n = ts.get_num_samples()\n            for v in [-100, -1, n + 1, n + 100, n * 100]:\n                with pytest.raises(ValueError):\n                    st.get_next_sample(v)\n\n    def test_mrca_interface(self):\n        for ts in self.get_example_tree_sequences():\n            num_nodes = ts.get_num_nodes()\n            st = _tskit.Tree(ts)\n            for v in [num_nodes + 1, 10**6, _tskit.NULL]:\n                with pytest.raises(ValueError):\n                    st.get_mrca(v, v)\n                with pytest.raises(ValueError):\n                    st.get_mrca(v, 1)\n                with pytest.raises(ValueError):\n                    st.get_mrca(1, v)\n            # All the mrcas for an uninitialised tree should be _tskit.NULL\n            for u, v in itertools.combinations(range(num_nodes), 2):\n                assert st.get_mrca(u, v) == _tskit.NULL\n\n    def test_newick_precision(self):\n        ts = self.get_example_tree_sequence()\n        st = _tskit.Tree(ts)\n        assert st.next()\n        with pytest.raises(ValueError):\n            st.get_newick(root=0, precision=-1)\n        with pytest.raises(ValueError):\n            st.get_newick(root=0, precision=18)\n        with pytest.raises(ValueError):\n            st.get_newick(root=0, precision=100)\n\n    def test_newick_legacy_ms(self):\n        ts = self.get_example_tree_sequence()\n        st = _tskit.Tree(ts)\n        assert st.next()\n        root = st.get_left_child(st.get_virtual_root())\n        ns = st.get_newick(root)\n        assert \"n0\" in ns\n        assert ns == st.get_newick(root, legacy_ms_labels=False)\n        assert ns != st.get_newick(root, legacy_ms_labels=True)\n\n    def test_cleared_tree(self):\n        ts = self.get_example_tree_sequence()\n        samples = ts.get_samples()\n\n        def check_tree(tree):\n            assert tree.get_index() == -1\n            assert tree.get_left_child(tree.get_virtual_root()) == samples[0]\n            assert tree.get_num_edges() == 0\n            assert tree.get_mrca(0, 1) == _tskit.NULL\n            for u in range(ts.get_num_nodes()):\n                assert tree.get_parent(u) == _tskit.NULL\n                assert tree.get_left_child(u) == _tskit.NULL\n                assert tree.get_right_child(u) == _tskit.NULL\n                assert tree.get_num_children(u) == 0\n                assert tree.get_edge(u) == _tskit.NULL\n\n        tree = _tskit.Tree(ts)\n        check_tree(tree)\n        while tree.next():\n            pass\n        check_tree(tree)\n        while tree.prev():\n            pass\n        check_tree(tree)\n\n    def test_newick_interface(self):\n        ts = self.get_example_tree_sequence()\n        st = _tskit.Tree(ts)\n        # TODO this will break when we correctly handle multiple roots.\n        assert st.get_newick(0) == \"n0;\"\n        for bad_type in [None, \"\", [], {}]:\n            with pytest.raises(TypeError):\n                st.get_newick(0, precision=bad_type)\n            with pytest.raises(TypeError):\n                st.get_newick(0, buffer_size=bad_type)\n            with pytest.raises(TypeError):\n                st.get_newick(0, legacy_ms_labels=bad_type)\n\n    def test_newick_buffer_size(self):\n        ts = self.get_example_tree_sequence()\n        st = _tskit.Tree(ts)\n        assert st.next\n        u = st.get_left_child(st.get_virtual_root())\n        newick = st.get_newick(u)\n        assert newick.endswith(\";\")\n        with pytest.raises(ValueError):\n            st.get_newick(u, buffer_size=-1)\n        with pytest.raises(_tskit.LibraryError):\n            st.get_newick(u, buffer_size=1)\n        newick2 = st.get_newick(u, len(newick))\n        assert newick2 == newick\n        with pytest.raises(_tskit.LibraryError):\n            st.get_newick(u, buffer_size=len(newick) - 1)\n\n    def test_index(self):\n        for ts in self.get_example_tree_sequences():\n            st = _tskit.Tree(ts)\n            index = 0\n            while st.next():\n                assert index == st.get_index()\n                index += 1\n\n    def test_bad_mutations(self):\n        ts = self.get_example_tree_sequence()\n        tables = _tskit.TableCollection()\n        ts.dump_tables(tables)\n\n        def f(mutations):\n            position = []\n            node = []\n            site = []\n            ancestral_state = []\n            ancestral_state_offset = [0]\n            derived_state = []\n            derived_state_offset = [0]\n            for j, (p, n) in enumerate(mutations):\n                site.append(j)\n                position.append(p)\n                ancestral_state.append(\"0\")\n                ancestral_state_offset.append(ancestral_state_offset[-1] + 1)\n                derived_state.append(\"1\")\n                derived_state_offset.append(derived_state_offset[-1] + 1)\n                node.append(n)\n            tables.sites.set_columns(\n                dict(\n                    position=position,\n                    ancestral_state=ancestral_state,\n                    ancestral_state_offset=ancestral_state_offset,\n                    metadata=None,\n                    metadata_offset=None,\n                )\n            )\n            tables.mutations.set_columns(\n                dict(\n                    site=site,\n                    node=node,\n                    derived_state=derived_state,\n                    derived_state_offset=derived_state_offset,\n                    parent=None,\n                    metadata=None,\n                    metadata_offset=None,\n                )\n            )\n            ts2 = _tskit.TreeSequence()\n            ts2.load_tables(tables)\n\n        with pytest.raises(_tskit.LibraryError):\n            f([(0.1, -1)])\n        length = ts.get_sequence_length()\n        u = ts.get_num_nodes()\n        for bad_node in [u, u + 1, 2 * u]:\n            with pytest.raises(_tskit.LibraryError):\n                f([(0.1, bad_node)])\n        for bad_pos in [-1, length, length + 1]:\n            with pytest.raises(_tskit.LibraryError):\n                f([(bad_pos, 0)])\n\n    def test_sample_list(self):\n        options = _tskit.SAMPLE_LISTS\n        # Note: we're assuming that samples are 0-n here.\n        for ts in self.get_example_tree_sequences():\n            t = _tskit.Tree(ts, options=options)\n            while t.next():\n                # All sample nodes should have themselves.\n                for j in range(ts.get_num_samples()):\n                    assert t.get_left_sample(j) == j\n                    assert t.get_right_sample(j) == j\n\n                # All non-tree nodes should have 0\n                for j in range(ts.get_num_nodes()):\n                    if (\n                        t.get_parent(j) == _tskit.NULL\n                        and t.get_left_child(j) == _tskit.NULL\n                    ):\n                        assert t.get_left_sample(j) == _tskit.NULL\n                        assert t.get_right_sample(j) == _tskit.NULL\n                # The roots should have all samples.\n                u = t.get_left_child(t.get_virtual_root())\n                samples = []\n                while u != _tskit.NULL:\n                    sample = t.get_left_sample(u)\n                    end = t.get_right_sample(u)\n                    while True:\n                        samples.append(sample)\n                        if sample == end:\n                            break\n                        sample = t.get_next_sample(sample)\n                    u = t.get_right_sib(u)\n                assert sorted(samples) == list(range(ts.get_num_samples()))\n\n    def test_equality(self):\n        last_ts = None\n        for ts in self.get_example_tree_sequences():\n            t1 = _tskit.Tree(ts)\n            t2 = _tskit.Tree(ts)\n            assert t1.equals(t2)\n            assert t2.equals(t1)\n            while True:\n                assert t1.equals(t2)\n                assert t2.equals(t1)\n                n1 = t1.next()\n                assert not t1.equals(t2)\n                assert not t2.equals(t1)\n                n2 = t2.next()\n                assert n1 == n2\n                if not n1:\n                    break\n            if last_ts is not None:\n                t2 = _tskit.Tree(last_ts)\n                assert not t1.equals(t2)\n                assert not t2.equals(t1)\n            last_ts = ts\n\n    def test_b2_errors(self):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        t1.first()\n        with pytest.raises(TypeError):\n            t1.get_b2_index()\n        with pytest.raises(TypeError):\n            t1.get_b2_index(\"asdf\")\n\n    def test_b2(self):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        t1.first()\n        assert t1.get_b2_index(10) > 0\n\n    def test_num_lineages_errors(self):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        t1.first()\n        with pytest.raises(TypeError):\n            t1.get_num_lineages()\n        with pytest.raises(TypeError):\n            t1.get_num_lineages(\"asdf\")\n        with pytest.raises(_tskit.LibraryError, match=\"TIME_NONFINITE\"):\n            t1.get_num_lineages(np.inf)\n\n    def test_num_lineages(self):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        t1.first()\n        assert t1.get_num_lineages(0) == 10\n\n    def test_kc_distance_errors(self):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1, options=_tskit.SAMPLE_LISTS)\n        t1.first()\n        with pytest.raises(TypeError):\n            t1.get_kc_distance()\n        with pytest.raises(TypeError):\n            t1.get_kc_distance(t1)\n        for bad_tree in [None, \"tree\", 0]:\n            with pytest.raises(TypeError):\n                t1.get_kc_distance(bad_tree, lambda_=0)\n        for bad_value in [\"tree\", [], None]:\n            with pytest.raises(TypeError):\n                t1.get_kc_distance(t1, lambda_=bad_value)\n\n        t2 = _tskit.Tree(ts1, options=_tskit.SAMPLE_LISTS)\n        # If we don't seek to a specific tree, it has multiple roots (i.e., it's\n        # in the null state). This fails because we don't accept multiple roots.\n        self.verify_kc_library_error(t2, t2)\n\n        # Different numbers of samples fail.\n        ts2 = self.get_example_tree_sequence(11)\n        t2 = _tskit.Tree(ts2, options=_tskit.SAMPLE_LISTS)\n        t2.first()\n        self.verify_kc_library_error(t1, t2)\n\n        # Error when tree not initialized with sample lists\n        ts2 = self.get_example_tree_sequence(10)\n        t2 = _tskit.Tree(ts2)\n        t2.first()\n        with pytest.raises(\n            _tskit.NoSampleListsError, match=\"requires that sample lists are stored\"\n        ):\n            self.verify_kc_library_error(t1, t2)\n\n        # Unary nodes cause errors.\n        tables = _tskit.TableCollection(1.0)\n        tables.nodes.add_row(flags=1)\n        tables.nodes.add_row(flags=1, time=1)\n        tables.edges.add_row(0, 1, 1, 0)\n        tables.build_index()\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables)\n        t1 = _tskit.Tree(ts, options=_tskit.SAMPLE_LISTS)\n        t1.first()\n        self.verify_kc_library_error(t1, t1)\n\n    def verify_kc_library_error(self, t1, t2):\n        with pytest.raises(_tskit.LibraryError):\n            t1.get_kc_distance(t2, 0)\n\n    def test_kc_distance(self):\n        ts1 = self.get_example_tree_sequence(10, random_seed=123456)\n        t1 = _tskit.Tree(ts1, options=_tskit.SAMPLE_LISTS)\n        t1.first()\n        ts2 = self.get_example_tree_sequence(10, random_seed=1234)\n        t2 = _tskit.Tree(ts2, options=_tskit.SAMPLE_LISTS)\n        t2.first()\n        for lambda_ in [-1, 0, 1, 1000, -1e300]:\n            x1 = t1.get_kc_distance(t2, lambda_)\n            x2 = t2.get_kc_distance(t1, lambda_)\n            assert x1 == x2\n\n    def test_copy(self):\n        for ts in self.get_example_tree_sequences():\n            t1 = _tskit.Tree(ts)\n            t2 = t1.copy()\n            assert t1.get_index() == t2.get_index()\n            assert t1 is not t2\n            while t1.next():\n                t2 = t1.copy()\n                assert t1.get_index() == t2.get_index()\n\n    def test_map_mutations_null(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        n = ts.get_num_samples()\n        genotypes = np.zeros(n, dtype=np.int8)\n        ancestral_state, transitions = tree.map_mutations(genotypes)\n        assert ancestral_state == 0\n        assert len(transitions) == 0\n\n        genotypes = np.arange(n, dtype=np.int8)\n        ancestral_state, transitions = tree.map_mutations(genotypes)\n        assert ancestral_state == 0\n        assert len(transitions) == n - 1\n        for j in range(n - 1):\n            x = n - j - 1\n            assert transitions[j][0] == x\n            assert transitions[j][1] == -1\n            assert transitions[j][2] == x\n\n    def test_map_mutations(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        tree.next()\n        n = ts.get_num_samples()\n        genotypes = np.zeros(n, dtype=np.int8)\n        ancestral_state, transitions = tree.map_mutations(genotypes)\n        assert ancestral_state == 0\n        assert len(transitions) == 0\n\n    def test_map_mutations_fixed_ancestral_state(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        tree.next()\n        n = ts.get_num_samples()\n        genotypes = np.ones(n, dtype=np.int8)\n        ancestral_state, transitions = tree.map_mutations(genotypes, 0)\n        assert ancestral_state == 0\n        assert len(transitions) == 1\n\n    def test_map_mutations_errors(self):\n        ts = self.get_example_tree_sequence()\n        tree = _tskit.Tree(ts)\n        n = ts.get_num_samples()\n        with pytest.raises(TypeError):\n            tree.map_mutations()\n        for bad_size in [0, 1, n - 1, n + 1]:\n            with pytest.raises(ValueError):\n                tree.map_mutations(np.zeros(bad_size, dtype=np.int8))\n        for bad_type in [None, {}, set()]:\n            with pytest.raises(TypeError):\n                tree.map_mutations([bad_type] * n)\n        for bad_type in [np.uint32, np.uint64, np.float32]:\n            with pytest.raises(TypeError):\n                tree.map_mutations(np.zeros(n, dtype=bad_type))\n        genotypes = np.zeros(n, dtype=np.int8)\n        tree.map_mutations(genotypes)\n        for bad_value in [64, 65, 127, -2]:\n            genotypes[0] = bad_value\n            with pytest.raises(_tskit.LibraryError):\n                tree.map_mutations(genotypes)\n\n        genotypes = np.zeros(n, dtype=np.int8)\n        tree.map_mutations(genotypes)\n        for bad_type in [\"d\", []]:\n            with pytest.raises(TypeError):\n                tree.map_mutations(genotypes, bad_type)\n        for bad_state in [-2, -1, 127, 255]:\n            with pytest.raises(_tskit.LibraryError, match=\"Bad ancestral\"):\n                tree.map_mutations(genotypes, bad_state)\n\n    @pytest.mark.parametrize(\"array\", ARRAY_NAMES)\n    def test_array_read_only(self, array):\n        name = array + \"_array\"\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        t1.first()\n        with pytest.raises(AttributeError, match=\"not writable\"):\n            setattr(t1, name, None)\n        with pytest.raises(AttributeError, match=\"not writable\"):\n            delattr(t1, name)\n\n        a = getattr(t1, name)\n        with pytest.raises(ValueError, match=\"assignment destination\"):\n            a[:] = 0\n        with pytest.raises(ValueError, match=\"assignment destination\"):\n            a[0] = 0\n        with pytest.raises(ValueError, match=\"cannot set WRITEABLE\"):\n            a.setflags(write=True)\n\n    @pytest.mark.parametrize(\"array\", ARRAY_NAMES)\n    def test_array_properties(self, array):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        a = getattr(t1, array + \"_array\")\n        t1.first()\n        a = getattr(t1, array + \"_array\")\n        assert a.dtype == np.int32\n        assert a.shape == (ts1.get_num_nodes() + 1,)\n        assert a.base == t1\n        assert not a.flags.writeable\n        assert a.flags.aligned\n        assert a.flags.c_contiguous\n        assert not a.flags.owndata\n        b = getattr(t1, array + \"_array\")\n        assert a is not b\n        assert np.all(a == b)\n        a_copy = a.copy()\n        # This checks that the underlying pointer to memory is the same in\n        # both arrays.\n        assert a.__array_interface__ == b.__array_interface__\n        t1.next()\n        # NB! Because we are pointing to the underlying memory, the arrays\n        # will change as we iterate along the trees! This is a gotcha, but\n        # it's just something we have to document as it's a consequence of the\n        # zero copy semantics.\n        b = getattr(t1, array + \"_array\")\n        assert np.all(a == b)\n        assert np.any(a_copy != b)\n\n    @pytest.mark.parametrize(\"array\", ARRAY_NAMES)\n    def test_array_lifetime(self, array):\n        ts1 = self.get_example_tree_sequence(10)\n        t1 = _tskit.Tree(ts1)\n        t1.first()\n        a1 = getattr(t1, array + \"_array\")\n        a2 = a1.copy()\n        assert a1 is not a2\n        del t1\n        # Do some memory operations\n        a3 = np.ones(10**6)\n        assert np.all(a1 == a2)\n        del ts1\n        assert np.all(a1 == a2)\n        del a1\n        # Just do something to touch memory\n        a2[:] = 0\n        assert a3 is not a2\n\n    @pytest.mark.parametrize(\"ordering\", [\"preorder\", \"postorder\"])\n    def test_traversal_arrays(self, ordering):\n        ts = self.get_example_tree_sequence(10)\n        tree = _tskit.Tree(ts)\n        tree.first()\n        method = getattr(tree, \"get_\" + ordering)\n        for bad_type in [None, {}]:\n            with pytest.raises(TypeError):\n                method(bad_type)\n        for bad_node in [-2, 10**6]:\n            with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n                method(bad_node)\n        a = method(tree.get_virtual_root())\n        assert a.dtype == np.int32\n        assert not a.flags.writeable\n        assert a.flags.aligned\n        assert a.flags.c_contiguous\n        assert a.flags.owndata\n\n\nclass TestTableMetadataSchema(MetadataTestMixin):\n    def test_metadata_schema_attribute(self):\n        tables = _tskit.TableCollection(1.0)\n        for table in self.metadata_tables:\n            table = getattr(tables, f\"{table}s\")\n            # Check default value\n            assert table.metadata_schema == \"\"\n            # Set and read back\n            example = \"An example of metadata schema with unicode 🎄🌳🌴🌲🎋\"\n            table.metadata_schema = example\n            assert table.metadata_schema == example\n            # Can't del, or set to None\n            with pytest.raises(AttributeError):\n                del table.metadata_schema\n            with pytest.raises(TypeError):\n                table.metadata_schema = None\n            # Del or None had no effect\n            assert table.metadata_schema == example\n            # Clear and read back\n            table.metadata_schema = \"\"\n            assert table.metadata_schema == \"\"\n\n\nclass TestMetadataSchemaNamedTuple(MetadataTestMixin):\n    def test_named_tuple_init(self):\n        # Test init errors\n        with pytest.raises(TypeError):\n            metadata_schemas = _tskit.MetadataSchemas()\n        with pytest.raises(TypeError):\n            metadata_schemas = _tskit.MetadataSchemas([])\n        with pytest.raises(TypeError):\n            metadata_schemas = _tskit.MetadataSchemas([\"test_schema\"])\n        # Set and read back\n        metadata_schemas = _tskit.MetadataSchemas(\n            f\"{table}_test_schema\" for table in self.metadata_tables\n        )\n        assert metadata_schemas == tuple(\n            f\"{table}_test_schema\" for table in self.metadata_tables\n        )\n        for i, table in enumerate(self.metadata_tables):\n            # Read back via attr, index\n            assert getattr(metadata_schemas, table) == f\"{table}_test_schema\"\n            assert metadata_schemas[i] == f\"{table}_test_schema\"\n            # Check read-only\n            with pytest.raises(AttributeError):\n                setattr(metadata_schemas, table, \"\")\n            with pytest.raises(TypeError):\n                metadata_schemas[i] = \"\"\n        # Equality\n        metadata_schemas2 = _tskit.MetadataSchemas(\n            f\"{table}_test_schema\" for table in self.metadata_tables\n        )\n        assert metadata_schemas == metadata_schemas2\n        metadata_schemas3 = _tskit.MetadataSchemas(\n            f\"{table}_test_schema_diff\" for table in self.metadata_tables\n        )\n        assert metadata_schemas != metadata_schemas3\n\n\nclass TestReferenceSequenceInputErrors:\n    @pytest.mark.parametrize(\"bad_type\", [1234, b\"bytes\", None, {}])\n    @pytest.mark.parametrize(\"attr\", [\"data\", \"url\", \"metadata_schema\"])\n    def test_string_bad_type(self, attr, bad_type):\n        refseq = _tskit.TableCollection().reference_sequence\n        with pytest.raises(TypeError, match=f\"{attr} must be a string\"):\n            setattr(refseq, attr, bad_type)\n\n    @pytest.mark.parametrize(\"bad_type\", [1234, \"unicode\", None, {}])\n    def test_metadata_bad_type(self, bad_type):\n        refseq = _tskit.TableCollection().reference_sequence\n        with pytest.raises(TypeError):\n            refseq.metadata = bad_type\n\n    @pytest.mark.parametrize(\"attr\", [\"data\", \"url\", \"metadata_schema\"])\n    def test_unicode_error(self, attr):\n        refseq = _tskit.TableCollection().reference_sequence\n        with pytest.raises(UnicodeEncodeError):\n            setattr(refseq, attr, NON_UTF8_STRING)\n\n    @pytest.mark.parametrize(\"attr\", [\"data\", \"url\", \"metadata\", \"metadata_schema\"])\n    def test_del_attr(self, attr):\n        refseq = _tskit.TableCollection().reference_sequence\n        with pytest.raises(AttributeError, match=f\"Cannot del {attr}\"):\n            delattr(refseq, attr)\n\n\nclass TestReferenceSequenceUpdates:\n    @pytest.mark.parametrize(\"value\", [\"abc\", \"🎄🌳🌴🌲🎋\"])\n    @pytest.mark.parametrize(\"attr\", [\"data\", \"url\", \"metadata_schema\"])\n    def test_set_string(self, attr, value):\n        refseq = _tskit.TableCollection().reference_sequence\n        assert refseq.is_null()\n        setattr(refseq, attr, value)\n        assert getattr(refseq, attr) == value\n        assert not refseq.is_null()\n\n    @pytest.mark.parametrize(\"attr\", [\"data\", \"url\", \"metadata_schema\"])\n    def test_set_string_null_none(self, attr):\n        refseq = _tskit.TableCollection().reference_sequence\n        assert refseq.is_null()\n        setattr(refseq, attr, \"a\")\n        assert not refseq.is_null()\n        setattr(refseq, attr, \"\")\n        assert refseq.is_null()\n\n    @pytest.mark.parametrize(\"value\", [b\"x\", b\"{}\", b\"abc\\0defg\"])\n    def test_set_metadata(self, value):\n        refseq = _tskit.TableCollection().reference_sequence\n        assert refseq.is_null()\n        refseq.metadata = value\n        assert not refseq.is_null()\n        refseq.metadata = b\"\"\n        assert refseq.is_null()\n\n\nclass TestReferenceSequenceTableCollection:\n    def test_references(self):\n        tables = _tskit.TableCollection()\n        refseq = tables.reference_sequence\n        assert refseq is not tables.reference_sequence\n\n    def test_state(self):\n        tables = _tskit.TableCollection()\n        refseq = tables.reference_sequence\n        assert refseq.is_null()\n        assert not tables.has_reference_sequence()\n        # Setting any non empty string changes the state to \"non-null\"\n        refseq.data = \"x\"\n        assert tables.has_reference_sequence()\n        assert not refseq.is_null()\n\n    @pytest.mark.parametrize(\"ref_data\", [\"abc\", \"A\" * 10, \"🎄🌳🌴🌲🎋\"])\n    def test_data(self, ref_data):\n        tables = _tskit.TableCollection()\n        refseq = tables.reference_sequence\n        assert refseq.data == \"\"\n        refseq.data = ref_data\n        assert refseq.data == ref_data\n        assert tables.reference_sequence.data == ref_data\n\n    @pytest.mark.parametrize(\"url\", [\"\", \"abc\", \"A\" * 10, \"🎄🌳🌴🌲🎋\"])\n    def test_url(self, url):\n        tables = _tskit.TableCollection()\n        refseq = tables.reference_sequence\n        assert refseq.url == \"\"\n        refseq.url = url\n        assert refseq.url == url\n        assert tables.reference_sequence.url == url\n\n    def test_metadata_default_none(self):\n        tables = _tskit.TableCollection()\n        assert tables.reference_sequence.metadata_schema == \"\"\n        assert tables.reference_sequence.metadata == b\"\"\n\n    # we don't actually check the form here, just pass in and out strings\n    @pytest.mark.parametrize(\"schema\", [\"\", \"{}\", \"abcdefg\"])\n    def test_metadata_schema(self, schema):\n        tables = _tskit.TableCollection()\n        tables.reference_sequence.metadata_schema = schema\n        assert tables.has_reference_sequence\n        assert tables.reference_sequence.metadata_schema == schema\n\n    @pytest.mark.parametrize(\"metadata\", [b\"\", b\"{}\", b\"abcdefg\"])\n    def test_metadata(self, metadata):\n        tables = _tskit.TableCollection()\n        tables.reference_sequence.metadata = metadata\n        assert tables.has_reference_sequence\n        assert tables.reference_sequence.metadata == metadata\n\n\nclass TestReferenceSequenceTreeSequence:\n    def test_references(self):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        refseq = ts.reference_sequence\n        assert refseq is not ts.reference_sequence\n        assert refseq is not tc\n\n    def test_state(self):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        assert not ts.has_reference_sequence()\n\n    def test_write(self):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        refseq = ts.reference_sequence\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            refseq.data = \"asdf\"\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            refseq.url = \"asdf\"\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            refseq.metadata_schema = \"asdf\"\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            refseq.metadata = \"asdf\"\n\n    @pytest.mark.parametrize(\"ref_data\", [\"\", \"ACTG\" * 10, \"🎄🌳🌴🌲🎋\"])\n    def test_data(self, ref_data):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        tc.reference_sequence.data = ref_data\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        assert ts.reference_sequence.data == ref_data\n\n    @pytest.mark.parametrize(\"url\", [\"\", \"ACTG\" * 10, \"🎄🌳🌴🌲🎋\"])\n    def test_url(self, url):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        tc.reference_sequence.url = url\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        assert ts.reference_sequence.url == url\n\n    # we don't actually check the form here, just pass in and out strings\n    @pytest.mark.parametrize(\"schema\", [\"\", \"{}\", \"abcdefg\"])\n    def test_metadata_schema(self, schema):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        tc.reference_sequence.metadata_schema = schema\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        assert ts.has_reference_sequence\n        assert ts.reference_sequence.metadata_schema == schema\n\n    @pytest.mark.parametrize(\"metadata\", [b\"\", b\"{}\", b\"abcdefg\"])\n    def test_metadata(self, metadata):\n        tc = _tskit.TableCollection()\n        tc.sequence_length = 1\n        tc.reference_sequence.metadata = metadata\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tc, build_indexes=True)\n        assert ts.has_reference_sequence\n        assert ts.reference_sequence.metadata == metadata\n\n\nclass TestModuleFunctions:\n    \"\"\"\n    Tests for the module level functions.\n    \"\"\"\n\n    def test_kastore_version(self):\n        version = _tskit.get_kastore_version()\n        assert version == (2, 1, 2)\n\n    def test_tskit_version(self):\n        version = _tskit.get_tskit_version()\n        assert version == (1, 3, 1)\n\n    def test_tskit_version_file(self):\n        maj, min_, patch = _tskit.get_tskit_version()\n        with open(f\"{tskit.__path__[0]}/../../c/VERSION.txt\") as f:\n            assert f.read() == f\"{maj}.{min_}.{patch}\"\n\n\ndef test_uninitialised():\n    # These methods work from an instance that has a NULL ref so don't check\n    skip_list = [\n        \"TableCollection_load\",\n        \"TreeSequence_load\",\n        \"TreeSequence_load_tables\",\n    ]\n    for cls_name, cls in inspect.getmembers(_tskit):\n        if (\n            isinstance(cls, type)\n            and not issubclass(cls, Exception)\n            and not issubclass(cls, tuple)\n        ):\n            methods = []\n            attributes = []\n            for name, value in inspect.getmembers(cls):\n                if not name.startswith(\"__\") and f\"{cls_name}_{name}\" not in skip_list:\n                    if inspect.isdatadescriptor(value):\n                        attributes.append(name)\n                    else:\n                        methods.append(name)\n            uninitialised = cls.__new__(cls)\n            for attr in attributes:\n                with pytest.raises((SystemError, ValueError)):\n                    getattr(uninitialised, attr)\n                with pytest.raises((SystemError, ValueError, AttributeError)):\n                    setattr(uninitialised, attr, None)\n            for method_name in methods:\n                method = getattr(uninitialised, method_name)\n                with pytest.raises((SystemError, ValueError)):\n                    method()\n\n\ndef test_constants():\n    assert _tskit.TIME_UNITS_UNKNOWN == \"unknown\"\n    assert _tskit.TIME_UNITS_UNCALIBRATED == \"uncalibrated\"\n\n\nclass TestPairCoalescenceCountsErrors:\n    def example_ts(self, sample_size=10):\n        ts = msprime.sim_ancestry(\n            sample_size,\n            sequence_length=1e4,\n            recombination_rate=1e-8,\n            random_seed=1,\n            population_size=1e4,\n        )\n        return ts.ll_tree_sequence\n\n    @staticmethod\n    def pair_coalescence_counts(\n        ts,\n        sample_sets=None,\n        sample_set_sizes=None,\n        indexes=None,\n        windows=None,\n        node_bin_map=None,\n        span_normalise=False,\n        pair_normalise=False,\n    ):\n        n = ts.get_num_samples()\n        N = ts.get_num_nodes()\n        if sample_sets is None:\n            sample_sets = np.arange(n, dtype=np.int32)\n        if sample_set_sizes is None:\n            sample_set_sizes = [n // 2, n - n // 2]\n        if indexes is None:\n            pairs = itertools.combinations_with_replacement(\n                range(len(sample_set_sizes)), 2\n            )\n            indexes = [(i, j) for i, j in pairs]\n        if windows is None:\n            windows = np.array([0, 0.5, 1.0]) * ts.get_sequence_length()\n        if node_bin_map is None:\n            node_bin_map = np.arange(N, dtype=np.int32)\n        return ts.pair_coalescence_counts(\n            sample_sets=sample_sets,\n            sample_set_sizes=sample_set_sizes,\n            windows=windows,\n            indexes=indexes,\n            node_bin_map=node_bin_map,\n            span_normalise=span_normalise,\n            pair_normalise=pair_normalise,\n        )\n\n    def test_output_dims(self):\n        ts = self.example_ts()\n        coal = self.pair_coalescence_counts(ts)\n        dim = (2, 3, ts.get_num_nodes())\n        assert coal.shape == dim\n        coal = self.pair_coalescence_counts(ts, span_normalise=True)\n        assert coal.shape == dim\n        coal = self.pair_coalescence_counts(ts, pair_normalise=True)\n        assert coal.shape == dim\n\n    def test_node_shuffle(self):\n        rng = np.random.default_rng(1024)\n        ts = self.example_ts()\n        coal = self.pair_coalescence_counts(ts)\n        node_bin_map = np.arange(ts.get_num_nodes(), dtype=np.int32)\n        rng.shuffle(node_bin_map)\n        coal_shuffle = self.pair_coalescence_counts(ts, node_bin_map=node_bin_map)\n        np.testing.assert_allclose(coal_shuffle[..., node_bin_map], coal)\n\n    @pytest.mark.parametrize(\"bad_node\", [-1, -2, 1000])\n    def test_c_tsk_err_node_out_of_bounds(self, bad_node):\n        ts = self.example_ts()\n        ids = np.arange(ts.get_num_samples(), dtype=np.int32)\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n            self.pair_coalescence_counts(\n                ts, sample_sets=np.append(ids[:-1], bad_node).astype(np.int32)\n            )\n\n    def test_c_tsk_err_bad_windows(self):\n        ts = self.example_ts()\n        L = ts.get_sequence_length()\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_WINDOWS\"):\n            self.pair_coalescence_counts(ts, windows=[1.0, L])\n\n    def test_c_tsk_err_bad_node_bin_map(self):\n        ts = self.example_ts()\n        node_bin_map = np.arange(ts.get_num_nodes(), dtype=np.int32)\n        node_bin_map[0] = -10\n        with pytest.raises(_tskit.LibraryError, match=\"BAD_NODE_BIN_MAP\"):\n            self.pair_coalescence_counts(ts, node_bin_map=node_bin_map)\n\n    @pytest.mark.parametrize(\"bad_index\", [-1, 10])\n    def test_c_tsk_err_bad_sample_set_index(self, bad_index):\n        ts = self.example_ts()\n        with pytest.raises(_tskit.LibraryError, match=\"BAD_SAMPLE_SET_INDEX\"):\n            self.pair_coalescence_counts(ts, indexes=[(0, bad_index)])\n\n    @pytest.mark.parametrize(\"bad_ss_size\", [-1, 1000])\n    def test_cpy_bad_sample_sets(self, bad_ss_size):\n        ts = self.example_ts()\n        with pytest.raises(\n            (ValueError, OverflowError),\n            match=\"Sum of sample_set_sizes|Overflow|out of bounds\",\n        ):\n            self.pair_coalescence_counts(\n                ts, sample_set_sizes=[bad_ss_size, ts.get_num_samples()]\n            )\n\n    def test_cpy_bad_parse_inputs(self):\n        ts = self.example_ts()\n        with pytest.raises(TypeError, match=\"str\"):\n            self.pair_coalescence_counts(ts, span_normalise=\"foo\")\n\n    def test_cpy_bad_windows(self):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"at least 2\"):\n            self.pair_coalescence_counts(ts, windows=[0.0])\n\n    @pytest.mark.parametrize(\"indexes\", [[(0, 0, 0)], np.zeros((0, 2), dtype=np.int32)])\n    def test_cpy_bad_indexes(self, indexes):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"k x 2 array\"):\n            self.pair_coalescence_counts(ts, indexes=indexes)\n        with pytest.raises(ValueError, match=\"too small depth\"):\n            self.pair_coalescence_counts(ts, indexes=np.ravel(indexes))\n\n    def test_cpy_bad_node_bin_map(self):\n        ts = self.example_ts()\n        num_nodes = ts.get_num_nodes()\n        node_bin_map = np.full(num_nodes, tskit.NULL, dtype=np.int32)\n        with pytest.raises(ValueError, match=\"null values for all nodes\"):\n            self.pair_coalescence_counts(ts, node_bin_map=node_bin_map)\n        with pytest.raises(ValueError, match=\"a value per node\"):\n            self.pair_coalescence_counts(ts, node_bin_map=node_bin_map[:-1])\n        with pytest.raises(TypeError, match=\"cast array data\"):\n            self.pair_coalescence_counts(ts, node_bin_map=np.zeros(num_nodes))\n\n\nclass TestPairCoalescenceQuantilesErrors:\n    def example_ts(self, sample_size=10):\n        ts = msprime.sim_ancestry(\n            sample_size,\n            sequence_length=1e4,\n            recombination_rate=1e-8,\n            random_seed=1,\n            population_size=1e4,\n        )\n        return ts.ll_tree_sequence\n\n    @staticmethod\n    def pair_coalescence_quantiles(\n        ts,\n        quantiles=None,\n        sample_sets=None,\n        sample_set_sizes=None,\n        indexes=None,\n        windows=None,\n        node_bin_map=None,\n    ):\n        n = ts.get_num_samples()\n        if quantiles is None:\n            quantiles = np.linspace(0, 1, 4)\n        if sample_sets is None:\n            sample_sets = np.arange(n, dtype=np.int32)\n        if sample_set_sizes is None:\n            sample_set_sizes = [n // 2, n - n // 2]\n        if indexes is None:\n            pairs = itertools.combinations_with_replacement(\n                range(len(sample_set_sizes)), 2\n            )\n            indexes = [(i, j) for i, j in pairs]\n        if windows is None:\n            windows = np.array([0, 0.5, 1.0]) * ts.get_sequence_length()\n        if node_bin_map is None:\n            _, node_bin_map = np.unique(ts.nodes_time, return_inverse=True)\n            node_bin_map = node_bin_map.astype(np.int32)\n        return ts.pair_coalescence_quantiles(\n            sample_sets=sample_sets,\n            sample_set_sizes=sample_set_sizes,\n            windows=windows,\n            indexes=indexes,\n            node_bin_map=node_bin_map,\n            quantiles=quantiles,\n        )\n\n    def test_output_dims(self):\n        ts = self.example_ts()\n        coal = self.pair_coalescence_quantiles(ts)\n        dim = (2, 3, 4)\n        assert coal.shape == dim\n\n    @pytest.mark.parametrize(\"quantiles\", [[1.0, 0.0], [-1.0], [2.0]])\n    def test_c_tsk_err_bad_quantiles(self, quantiles):\n        ts = self.example_ts()\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_QUANTILES\"):\n            self.pair_coalescence_quantiles(ts, quantiles=quantiles)\n\n    def test_c_tsk_err_unsorted_times(self):\n        ts = self.example_ts()\n        _, node_bin_map = np.unique(ts.nodes_time, return_inverse=True)\n        node_bin_map = node_bin_map[::-1]\n        node_bin_map = node_bin_map.astype(np.int32)\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_UNSORTED_TIMES\"):\n            self.pair_coalescence_quantiles(ts, node_bin_map=node_bin_map)\n\n    def test_c_tsk_err_bad_windows(self):\n        ts = self.example_ts()\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_WINDOWS\"):\n            self.pair_coalescence_quantiles(ts, windows=[1.0, ts.get_sequence_length()])\n\n    @pytest.mark.parametrize(\"bad_ss_size\", [-1, 1000])\n    def test_cpy_bad_sample_sets(self, bad_ss_size):\n        ts = self.example_ts()\n        with pytest.raises(\n            (ValueError, OverflowError),\n            match=\"Sum of sample_set_sizes|Overflow|out of bounds\",\n        ):\n            self.pair_coalescence_quantiles(\n                ts, sample_set_sizes=[bad_ss_size, ts.get_num_samples()]\n            )\n\n    def test_cpy_bad_windows(self):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"at least 2\"):\n            self.pair_coalescence_quantiles(ts, windows=[0.0])\n\n    @pytest.mark.parametrize(\"indexes\", [[(0, 0, 0)], np.zeros((0, 2), dtype=np.int32)])\n    def test_cpy_bad_indexes(self, indexes):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"k x 2 array\"):\n            self.pair_coalescence_quantiles(ts, indexes=indexes)\n        with pytest.raises(ValueError, match=\"too small depth\"):\n            self.pair_coalescence_quantiles(ts, indexes=np.ravel(indexes))\n\n    def test_cpy_bad_node_bin_map(self):\n        ts = self.example_ts()\n        num_nodes = ts.get_num_nodes()\n        node_bin_map = np.full(num_nodes, tskit.NULL, dtype=np.int32)\n        with pytest.raises(ValueError, match=\"null values for all nodes\"):\n            self.pair_coalescence_quantiles(ts, node_bin_map=node_bin_map)\n        with pytest.raises(ValueError, match=\"a value per node\"):\n            self.pair_coalescence_quantiles(ts, node_bin_map=node_bin_map[:-1])\n        with pytest.raises(TypeError, match=\"cast array data\"):\n            self.pair_coalescence_quantiles(ts, node_bin_map=np.zeros(num_nodes))\n\n    def test_cpy_bad_inputs(self):\n        ts = self.example_ts()\n        with pytest.raises(TypeError, match=\"at most 6 keyword\"):\n            ts.pair_coalescence_quantiles(\n                sample_sets=None,\n                sample_set_sizes=None,\n                windows=None,\n                quantiles=None,\n                indexes=None,\n                node_bin_map=None,\n                foo=\"bar\",\n            )\n\n    def test_cpy_bad_quantiles(self):\n        ts = self.example_ts()\n        quantiles = np.zeros(0)\n        with pytest.raises(ValueError, match=\"at least one quantile\"):\n            self.pair_coalescence_quantiles(ts, quantiles=quantiles)\n        quantiles = np.zeros((3, 3))\n        with pytest.raises(ValueError, match=\"object too deep\"):\n            self.pair_coalescence_quantiles(ts, quantiles=quantiles)\n\n\nclass TestPairCoalescenceRatesErrors:\n    def example_ts(self, sample_size=10):\n        ts = msprime.sim_ancestry(\n            sample_size,\n            sequence_length=1e4,\n            recombination_rate=1e-8,\n            random_seed=1,\n            population_size=1e4,\n        )\n        return ts.ll_tree_sequence\n\n    @staticmethod\n    def pair_coalescence_rates(\n        ts,\n        time_windows=None,\n        sample_sets=None,\n        sample_set_sizes=None,\n        indexes=None,\n        windows=None,\n        node_bin_map=None,\n    ):\n        n = ts.get_num_samples()\n        if time_windows is None:\n            time_windows = np.array([0.0, np.mean(ts.nodes_time), np.inf])\n        if sample_sets is None:\n            sample_sets = np.arange(n, dtype=np.int32)\n        if sample_set_sizes is None:\n            sample_set_sizes = [n // 2, n - n // 2]\n        if indexes is None:\n            pairs = itertools.combinations_with_replacement(\n                range(len(sample_set_sizes)), 2\n            )\n            indexes = [(i, j) for i, j in pairs]\n        if windows is None:\n            windows = np.array([0, 0.5, 1.0]) * ts.get_sequence_length()\n        if node_bin_map is None:\n            node_bin_map = np.digitize(ts.nodes_time, time_windows) - 1\n            node_bin_map[node_bin_map == time_windows.size - 1] = tskit.NULL\n            node_bin_map = node_bin_map.astype(np.int32)\n        return ts.pair_coalescence_rates(\n            sample_sets=sample_sets,\n            sample_set_sizes=sample_set_sizes,\n            windows=windows,\n            indexes=indexes,\n            node_bin_map=node_bin_map,\n            time_windows=time_windows,\n        )\n\n    def test_output_dims(self):\n        ts = self.example_ts()\n        coal = self.pair_coalescence_rates(ts)\n        dim = (2, 3, 2)\n        assert coal.shape == dim\n\n    def test_c_tsk_err_bad_time_windows(self):\n        ts = self.example_ts()\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_TIME_WINDOWS\"):\n            self.pair_coalescence_rates(ts, time_windows=np.array([np.inf, 0.0]))\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_TIME_WINDOWS_END\"):\n            self.pair_coalescence_rates(ts, time_windows=np.array([0.0, 10.0]))\n\n    def test_c_tsk_err_bad_node_time_window(self):\n        ts = self.example_ts()\n        node_bin_map = np.zeros(ts.nodes_time.size, dtype=np.int32)\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_NODE_TIME_WINDOW\"):\n            self.pair_coalescence_rates(ts, node_bin_map=node_bin_map)\n\n    def test_c_tsk_err_bad_sample_pair_times(self):\n        ts = self.example_ts()\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_SAMPLE_PAIR_TIMES\"):\n            self.pair_coalescence_rates(ts, time_windows=np.array([-1.0, np.inf]))\n\n    def test_c_tsk_err_bad_windows(self):\n        ts = self.example_ts()\n        with pytest.raises(_tskit.LibraryError, match=\"TSK_ERR_BAD_WINDOWS\"):\n            self.pair_coalescence_rates(ts, windows=[1.0, ts.get_sequence_length()])\n\n    @pytest.mark.parametrize(\"bad_ss_size\", [-1, 1000])\n    def test_cpy_bad_sample_sets(self, bad_ss_size):\n        ts = self.example_ts()\n        with pytest.raises(\n            (ValueError, OverflowError),\n            match=\"Sum of sample_set_sizes|Overflow|out of bounds\",\n        ):\n            self.pair_coalescence_rates(\n                ts, sample_set_sizes=[bad_ss_size, ts.get_num_samples()]\n            )\n\n    @pytest.mark.parametrize(\"indexes\", [[(0, 0, 0)], np.zeros((0, 2), dtype=np.int32)])\n    def test_cpy_bad_indexes(self, indexes):\n        ts = self.example_ts()\n        with pytest.raises(ValueError, match=\"k x 2 array\"):\n            self.pair_coalescence_rates(ts, indexes=indexes)\n        with pytest.raises(ValueError, match=\"too small depth\"):\n            self.pair_coalescence_rates(ts, indexes=np.ravel(indexes))\n\n    def test_cpy_bad_node_bin_map(self):\n        ts = self.example_ts()\n        num_nodes = ts.get_num_nodes()\n        node_bin_map = np.full(num_nodes, tskit.NULL, dtype=np.int32)\n        with pytest.raises(ValueError, match=\"null values for all nodes\"):\n            self.pair_coalescence_rates(ts, node_bin_map=node_bin_map)\n        with pytest.raises(ValueError, match=\"a value per node\"):\n            self.pair_coalescence_rates(ts, node_bin_map=node_bin_map[:-1])\n        with pytest.raises(TypeError, match=\"cast array data\"):\n            self.pair_coalescence_rates(ts, node_bin_map=np.zeros(num_nodes))\n\n    def test_cpy_bad_time_windows(self):\n        ts = self.example_ts()\n        time_windows = np.zeros(1)\n        node_bin_map = np.zeros(ts.nodes_time.size, dtype=np.int32)\n        with pytest.raises(ValueError, match=\"at least two breakpoints\"):\n            self.pair_coalescence_rates(\n                ts, time_windows=time_windows, node_bin_map=node_bin_map\n            )\n        time_windows = np.zeros((3, 3))\n        with pytest.raises(ValueError, match=\"object too deep\"):\n            self.pair_coalescence_rates(\n                ts, time_windows=time_windows, node_bin_map=node_bin_map\n            )\n\n    def test_cpy_bad_inputs(self):\n        ts = self.example_ts()\n        with pytest.raises(TypeError, match=\"at most 6 keyword\"):\n            ts.pair_coalescence_rates(\n                sample_sets=None,\n                sample_set_sizes=None,\n                windows=None,\n                quantiles=None,\n                indexes=None,\n                node_bin_map=None,\n                foo=\"bar\",\n            )\n"
  },
  {
    "path": "python/tests/test_reference_sequence.py",
    "content": "# MIT License\n#\n# Copyright (c) 2021-2022 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for reference sequence support.\n\"\"\"\n\nimport pytest\n\nimport tskit\n\n\nclass TestTablesProperties:\n    def test_initially_not_set(self):\n        tables = tskit.TableCollection(1)\n        assert not tables.has_reference_sequence()\n        tables.reference_sequence.data = \"ABCDEF\"\n        assert tables.reference_sequence.data == \"ABCDEF\"\n        assert tables.has_reference_sequence()\n\n    def test_does_not_have_reference_sequence_if_empty(self):\n        tables = tskit.TableCollection(1)\n        assert not tables.has_reference_sequence()\n        tables.reference_sequence.data = \"\"\n        assert not tables.has_reference_sequence()\n\n    def test_same_object(self):\n        tables = tskit.TableCollection(1)\n        refseq = tables.reference_sequence\n        tables.reference_sequence.data = \"asdf\"\n        assert refseq.data == \"asdf\"\n        # Not clear we want to do this, but keeping the same pattern as the\n        # tables for now.\n        assert tables.reference_sequence is not refseq\n\n    def test_clear(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        tables.reference_sequence.clear()\n        assert not tables.has_reference_sequence()\n\n    def test_write_object_fails_bad_type(self):\n        tables = tskit.TableCollection(1)\n        with pytest.raises(AttributeError):\n            tables.reference_sequence = None\n\n    def test_write_object(self, ts_fixture):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence = ts_fixture.reference_sequence\n        tables.reference_sequence.assert_equals(ts_fixture.reference_sequence)\n\n    def test_asdict_no_reference(self):\n        tables = tskit.TableCollection(1)\n        d = tables.asdict()\n        assert \"reference_sequence\" not in d\n\n    def test_asdict_reference_no_metadata(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.data = \"ABCDEF\"\n        d = tables.asdict()[\"reference_sequence\"]\n        assert d[\"data\"] == \"ABCDEF\"\n        assert d[\"url\"] == \"\"\n        assert \"metadata\" not in d\n        assert \"metadata_schema\" not in d\n\n    def test_asdict_reference_metadata(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.metadata_schema = (\n            tskit.MetadataSchema.permissive_json()\n        )\n        tables.reference_sequence.metadata = {\"a\": \"ABCDEF\"}\n        d = tables.asdict()[\"reference_sequence\"]\n        assert d[\"data\"] == \"\"\n        assert d[\"url\"] == \"\"\n        assert d[\"metadata_schema\"] == '{\"codec\":\"json\"}'\n        assert d[\"metadata\"] == b'{\"a\":\"ABCDEF\"}'\n\n    def test_fromdict_reference_data(self):\n        d = tskit.TableCollection(1).asdict()\n        d[\"reference_sequence\"] = {\"data\": \"XYZ\"}\n        tables = tskit.TableCollection.fromdict(d)\n        assert tables.has_reference_sequence()\n        assert tables.reference_sequence.data == \"XYZ\"\n        assert tables.reference_sequence.url == \"\"\n        assert repr(tables.reference_sequence.metadata_schema) == \"\"\n        assert tables.reference_sequence.metadata == b\"\"\n\n    def test_fromdict_reference_url(self):\n        d = tskit.TableCollection(1).asdict()\n        d[\"reference_sequence\"] = {\"url\": \"file://file.fasta\"}\n        tables = tskit.TableCollection.fromdict(d)\n        assert tables.has_reference_sequence()\n        assert tables.reference_sequence.data == \"\"\n        assert tables.reference_sequence.url == \"file://file.fasta\"\n        assert repr(tables.reference_sequence.metadata_schema) == \"\"\n        assert tables.reference_sequence.metadata == b\"\"\n\n    def test_fromdict_reference_metadata(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.metadata_schema = (\n            tskit.MetadataSchema.permissive_json()\n        )\n        tables.reference_sequence.metadata = {\"a\": \"ABCDEF\"}\n        tables = tskit.TableCollection.fromdict(tables.asdict())\n        assert tables.has_reference_sequence()\n        assert tables.reference_sequence.data == \"\"\n        assert (\n            tables.reference_sequence.metadata_schema\n            == tskit.MetadataSchema.permissive_json()\n        )\n        assert tables.reference_sequence.metadata == {\"a\": \"ABCDEF\"}\n\n    def test_fromdict_no_reference(self):\n        d = tskit.TableCollection(1).asdict()\n        tables = tskit.TableCollection.fromdict(d)\n        assert not tables.has_reference_sequence()\n\n    def test_fromdict_all_values_empty(self):\n        d = tskit.TableCollection(1).asdict()\n        d[\"reference_sequence\"] = dict(data=\"\", url=\"\", metadata_schema=\"\", metadata=b\"\")\n        tables = tskit.TableCollection.fromdict(d)\n        assert not tables.has_reference_sequence()\n\n\nclass TestSummaries:\n    def test_repr(self):\n        tables = tskit.TableCollection(1)\n        refseq = tables.reference_sequence\n        # TODO add better tests when summaries are updated\n        assert repr(refseq).startswith(\"ReferenceSequence\")\n\n\nclass TestEquals:\n    def test_equal_self(self, ts_fixture):\n        ts_fixture.reference_sequence.assert_equals(ts_fixture.reference_sequence)\n        assert ts_fixture.reference_sequence == ts_fixture.reference_sequence\n        assert not ts_fixture.reference_sequence != ts_fixture.reference_sequence\n        assert ts_fixture.reference_sequence.equals(ts_fixture.reference_sequence)\n\n    def test_equal_empty(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.assert_equals(tables.reference_sequence)\n        assert tables.reference_sequence == tables.reference_sequence\n        assert tables.reference_sequence.equals(tables.reference_sequence)\n\n    @pytest.mark.parametrize(\"attr\", [\"url\", \"data\"])\n    def test_unequal_attr_missing(self, ts_fixture, attr):\n        t1 = ts_fixture.tables\n        d = t1.asdict()\n        del d[\"reference_sequence\"][attr]\n        t2 = tskit.TableCollection.fromdict(d)\n        with pytest.raises(AssertionError, match=attr):\n            t1.reference_sequence.assert_equals(t2.reference_sequence)\n        assert t1.reference_sequence != t2.reference_sequence\n        assert not t1.reference_sequence.equals(t2.reference_sequence)\n        with pytest.raises(AssertionError, match=attr):\n            t2.reference_sequence.assert_equals(t1.reference_sequence)\n        assert t2.reference_sequence != t1.reference_sequence\n        assert not t2.reference_sequence.equals(t1.reference_sequence)\n\n    @pytest.mark.parametrize(\n        (\"attr\", \"val\"),\n        [\n            (\"url\", \"foo\"),\n            (\"data\", \"bar\"),\n            (\"metadata\", {\"json\": \"runs the world\"}),\n            (\"metadata_schema\", tskit.MetadataSchema(None)),\n        ],\n    )\n    def test_different_not_equal(self, ts_fixture, attr, val):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        setattr(t1.reference_sequence, attr, val)\n\n        with pytest.raises(AssertionError):\n            t1.reference_sequence.assert_equals(t2.reference_sequence)\n        assert t1.reference_sequence != t2.reference_sequence\n        assert not t1.reference_sequence.equals(t2.reference_sequence)\n        with pytest.raises(AssertionError):\n            t2.reference_sequence.assert_equals(t1.reference_sequence)\n        assert t2.reference_sequence != t1.reference_sequence\n        assert not t2.reference_sequence.equals(t1.reference_sequence)\n\n    @pytest.mark.parametrize(\n        (\"attr\", \"val\"),\n        [\n            (\"metadata\", {\"json\": \"runs the world\"}),\n            (\"metadata_schema\", tskit.MetadataSchema(None)),\n        ],\n    )\n    def test_different_but_ignore(self, ts_fixture, attr, val):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        setattr(t1.reference_sequence, attr, val)\n\n        with pytest.raises(AssertionError):\n            t1.reference_sequence.assert_equals(t2.reference_sequence)\n        assert t1.reference_sequence != t2.reference_sequence\n        assert not t1.reference_sequence.equals(t2.reference_sequence)\n        with pytest.raises(AssertionError):\n            t2.reference_sequence.assert_equals(t1.reference_sequence)\n        assert t2.reference_sequence != t1.reference_sequence\n        assert not t2.reference_sequence.equals(t1.reference_sequence)\n\n        t2.reference_sequence.assert_equals(t1.reference_sequence, ignore_metadata=True)\n        assert t2.reference_sequence.equals(t1.reference_sequence, ignore_metadata=True)\n\n\nclass TestTreeSequenceProperties:\n    @pytest.mark.parametrize(\"data\", [\"abcd\", \"🎄🌳🌴\"])\n    def test_data_inherited_from_tables(self, data):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.data = data\n        ts = tables.tree_sequence()\n        assert ts.reference_sequence.data == data\n        assert ts.has_reference_sequence()\n\n    @pytest.mark.parametrize(\"url\", [\"http://xyx.z\", \"file://\"])\n    def test_url_inherited_from_tables(self, url):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.url = url\n        ts = tables.tree_sequence()\n        assert ts.reference_sequence.url == url\n        assert ts.has_reference_sequence()\n\n    def test_no_reference_sequence(self):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        assert not ts.has_reference_sequence()\n        assert ts.reference_sequence is None\n\n    def test_write_data_fails(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.data = \"abc\"\n        ts = tables.tree_sequence()\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            ts.reference_sequence.data = \"xyz\"\n\n    def test_write_url_fails(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.data = \"abc\"\n        ts = tables.tree_sequence()\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            ts.reference_sequence.url = \"xyz\"\n\n    def test_write_metadata_fails(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.data = \"abc\"\n        ts = tables.tree_sequence()\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            # NOTE: it can be slightly confusing here because we try to encode\n            # first, and so we don't get an AttributeError for all inputs.\n            ts.reference_sequence.metadata = b\"xyz\"\n\n    def test_write_metadata_schema_fails(self):\n        tables = tskit.TableCollection(1)\n        tables.reference_sequence.data = \"abc\"\n        ts = tables.tree_sequence()\n        with pytest.raises(AttributeError, match=\"read-only\"):\n            ts.reference_sequence.metadata_schema = (\n                tskit.MetadataSchema.permissive_json()\n            )\n\n    def test_write_object_fails(self, ts_fixture):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        with pytest.raises(AttributeError):\n            ts.reference_sequence = ts_fixture.reference_sequence\n"
  },
  {
    "path": "python/tests/test_relatedness_vector.py",
    "content": "# MIT License\n#\n# Copyright (c) 2025 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for matrix-vector product stats\n\"\"\"\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\nfrom tests import tsutil\nfrom tests.tsutil import get_example_tree_sequences\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this.\n\n\n# Implementation note: the class structure here, where we pass in all the\n# needed arrays through the constructor was determined by an older version\n# in which we used numba acceleration. We could just pass in a reference to\n# the tree sequence now, but it is useful to keep track of exactly what we\n# require, so leaving it as it is for now.\nclass RelatednessVector:\n    def __init__(\n        self,\n        sample_weights,\n        windows,\n        num_nodes,\n        samples,\n        focal_nodes,\n        nodes_time,\n        edges_left,\n        edges_right,\n        edges_parent,\n        edges_child,\n        sequence_length,\n        tree_pos,\n        verbosity=0,\n        internal_checks=False,\n        centre=True,\n        span_normalise=True,\n    ):\n        self.sample_weights = np.asarray(sample_weights, dtype=np.float64)\n        self.num_weights = self.sample_weights.shape[1]\n        self.windows = windows\n        N = num_nodes\n        self.parent = np.full(N, -1, dtype=np.int32)\n        # Edges and indexes\n        self.edges_left = edges_left\n        self.edges_right = edges_right\n        self.edges_parent = edges_parent\n        self.edges_child = edges_child\n        self.sequence_length = sequence_length\n        self.nodes_time = nodes_time\n        self.samples = samples\n        self.focal_nodes = focal_nodes\n        self.tree_pos = tree_pos\n        self.position = windows[0]\n        self.x = np.zeros(N, dtype=np.float64)\n        self.w = np.zeros((N, self.num_weights), dtype=np.float64)\n        self.v = np.zeros((N, self.num_weights), dtype=np.float64)\n        self.verbosity = verbosity\n        self.internal_checks = internal_checks\n        self.centre = centre\n        self.span_normalise = span_normalise\n\n        if self.centre:\n            self.sample_weights -= np.mean(self.sample_weights, axis=0)\n\n        for j, u in enumerate(samples):\n            self.w[u] = self.sample_weights[j]\n\n        if self.verbosity > 0:\n            self.print_state(\"init\")\n\n    def print_state(self, msg=\"\"):\n        num_nodes = len(self.parent)\n        print(f\"..........{msg}................\")\n        print(\"tree_pos:\")\n        print(self.tree_pos)\n        print(f\"position = {self.position}\")\n        for j in range(num_nodes):\n            st = f\"{self.nodes_time[j]}\"\n            pt = (\n                \"NaN\"\n                if self.parent[j] == tskit.NULL\n                else f\"{self.nodes_time[self.parent[j]]}\"\n            )\n            print(\n                f\"node {j} -> {self.parent[j]}: \"\n                f\"z = ({pt} - {st})\"\n                f\" * ({self.position} - {self.x[j]:.2})\"\n                f\" * {','.join(map(str, self.w[j].round(2)))}\"\n                f\" = {','.join(map(str, self.get_z(j).round(2)))}\"\n            )\n            print(f\"         value: {','.join(map(str, self.v[j].round(2)))}\")\n        roots = []\n        fmt = \"{:<6}{:>8}\\t{}\\t{}\\t{}\"\n        s = f\"roots = {roots}\\n\"\n        s += (\n            fmt.format(\n                \"node\",\n                \"parent\",\n                \"value\",\n                \"weight\",\n                \"z\",\n            )\n            + \"\\n\"\n        )\n        for u in range(num_nodes):\n            u_str = f\"{u}\"\n            s += (\n                fmt.format(\n                    u_str,\n                    self.parent[u],\n                    \",\".join(map(str, self.v[u].round(2))),\n                    \",\".join(map(str, self.w[u].round(2))),\n                    \",\".join(map(str, self.get_z(u).round(2))),\n                )\n                + \"\\n\"\n            )\n        print(s)\n\n        print(\"Current state:\")\n        state = self.current_state()\n        for j, x in enumerate(state):\n            print(f\"   {j}: {x}\")\n        print(\"..........................\")\n\n    def remove_edge(self, p, c):\n        if self.verbosity > 0:\n            self.print_state(f\"remove {int(p), int(c)}\")\n        assert p != -1\n        self.v[c] += self.get_z(c)\n        self.x[c] = self.position\n        self.parent[c] = -1\n        self.adjust_path_up(p, c, -1)\n\n    def insert_edge(self, p, c):\n        if self.verbosity > 0:\n            self.print_state(f\"insert {int(p), int(c)}\")\n        assert p != -1\n        assert self.parent[c] == -1, \"contradictory edges\"\n        self.adjust_path_up(p, c, +1)\n        self.x[c] = self.position\n        self.parent[c] = p\n\n    def adjust_path_up(self, p, c, sign):\n        # sign = -1 for removing edges, +1 for adding\n        while p != tskit.NULL:\n            self.v[p] += self.get_z(p)\n            self.x[p] = self.position\n            self.v[c] -= sign * self.v[p]\n            self.w[p] += sign * self.w[c]\n            p = self.parent[p]\n\n    def get_z(self, u):\n        p = self.parent[u]\n        if p == tskit.NULL:\n            return np.zeros(self.num_weights, dtype=np.float64)\n        time = self.nodes_time[p] - self.nodes_time[u]\n        span = self.position - self.x[u]\n        return time * span * self.w[u]\n\n    def mrca(self, a, b):\n        # just used for `current_state`\n        aa = [a]\n        while a != tskit.NULL:\n            a = self.parent[a]\n            aa.append(a)\n        while b not in aa:\n            b = self.parent[b]\n        return b\n\n    def write_output(self):\n        \"\"\"\n        Compute and return the current state, zero-ing out\n        all contributions (used for switching between windows).\n        \"\"\"\n        n = len(self.focal_nodes)\n        out = np.zeros((n, self.num_weights))\n        for j, c in enumerate(self.focal_nodes):\n            while c != tskit.NULL:\n                if self.x[c] != self.position:\n                    self.v[c] += self.get_z(c)\n                    self.x[c] = self.position\n                out[j] += self.v[c]\n                c = self.parent[c]\n        self.v *= 0.0\n        return out\n\n    def current_state(self):\n        \"\"\"\n        Compute the current output, for debugging.\n        \"\"\"\n        if self.verbosity > 2:\n            print(\"---------------\")\n        n = len(self.focal_nodes)\n        out = np.zeros((n, self.num_weights))\n        for j, a in enumerate(self.focal_nodes):\n            # edges on the path up from a\n            pa = a\n            while pa != tskit.NULL:\n                if self.verbosity > 2:\n                    print(\"edge:\", pa, self.get_z(pa))\n                out[j] += self.get_z(pa) + self.v[pa]\n                pa = self.parent[pa]\n        if self.verbosity > 2:\n            print(\"---------------\")\n        return out\n\n    def run(self):\n        M = self.edges_left.shape[0]\n        edges_left = self.edges_left\n        edges_right = self.edges_right\n        edges_parent = self.edges_parent\n        edges_child = self.edges_child\n        tree_pos = self.tree_pos\n        in_order = tree_pos.in_range.order\n        out_order = tree_pos.out_range.order\n        num_windows = len(self.windows) - 1\n        out = np.zeros(\n            (num_windows, len(self.focal_nodes), self.sample_weights.shape[1])\n        )\n\n        m = 0\n        self.position = self.windows[0]\n\n        # seek to first window\n        for j in range(tree_pos.in_range.start, tree_pos.in_range.stop, 1):\n            e = in_order[j]\n            if edges_left[e] <= self.position and self.position < edges_right[e]:\n                p = edges_parent[e]\n                c = edges_child[e]\n                self.insert_edge(p, c)\n\n        valid = tree_pos.next()\n        j = tree_pos.in_range.start - 1\n        k = tree_pos.out_range.start - 1\n        while m < num_windows:\n            if valid and self.position == tree_pos.interval.left:\n                for k in range(tree_pos.out_range.start, tree_pos.out_range.stop, 1):\n                    e = out_order[k]\n                    p = edges_parent[e]\n                    c = edges_child[e]\n                    self.remove_edge(p, c)\n                for j in range(tree_pos.in_range.start, tree_pos.in_range.stop, 1):\n                    e = in_order[j]\n                    p = edges_parent[e]\n                    c = edges_child[e]\n                    self.insert_edge(p, c)\n                    assert self.parent[p] == tskit.NULL or self.x[p] == self.position\n                valid = tree_pos.next()\n            next_position = self.windows[m + 1]\n            if j + 1 < M:\n                next_position = min(next_position, edges_left[in_order[j + 1]])\n            if k + 1 < M:\n                next_position = min(next_position, edges_right[out_order[k + 1]])\n            assert self.position < next_position\n            self.position = next_position\n            if self.position == self.windows[m + 1]:\n                out[m] = self.write_output()\n                m = m + 1\n\n        if self.verbosity > 1:\n            self.print_state()\n\n        if self.centre:\n            for m in range(num_windows):\n                out[m] -= np.mean(out[m], axis=0)\n\n        if self.span_normalise:\n            window_lengths = np.diff(self.windows)\n            for m in range(num_windows):\n                out[m] /= window_lengths[m]\n\n        return out\n\n\ndef relatedness_vector(ts, sample_weights, windows=None, nodes=None, **kwargs):\n    if len(sample_weights.shape) == 1:\n        sample_weights = sample_weights[:, np.newaxis]\n    if nodes is None:\n        nodes = np.fromiter(ts.samples(), dtype=np.int32)\n    drop_dimension = windows is None\n    if drop_dimension:\n        windows = [0, ts.sequence_length]\n\n    tree_pos = tsutil.TreeIndexes(ts)\n    breakpoints = np.fromiter(ts.breakpoints(), dtype=\"float\")\n    index = np.searchsorted(breakpoints, windows[0])\n    if breakpoints[index] > windows[0]:\n        index -= 1\n    tree_pos.seek_forward(index)\n\n    rv = RelatednessVector(\n        sample_weights,\n        windows,\n        ts.num_nodes,\n        samples=ts.samples(),\n        focal_nodes=nodes,\n        nodes_time=ts.nodes_time,\n        edges_left=ts.edges_left,\n        edges_right=ts.edges_right,\n        edges_parent=ts.edges_parent,\n        edges_child=ts.edges_child,\n        sequence_length=ts.sequence_length,\n        tree_pos=tree_pos,\n        **kwargs,\n    )\n    out = rv.run()\n    if drop_dimension:\n        assert len(out.shape) == 3 and out.shape[0] == 1\n        out = out[0]\n    return out\n\n\ndef relatedness_matrix(ts, windows, centre, nodes=None, span_normalise=True):\n    if nodes is None:\n        keep_rows = np.arange(ts.num_samples)\n        keep_cols = np.arange(ts.num_samples)\n    else:\n        orig_samples = list(ts.samples())\n        extra_nodes = set(nodes).difference(set(orig_samples))\n        tables = ts.dump_tables()\n        tables.nodes.clear()\n        for n in ts.nodes():\n            if n.id in extra_nodes:\n                n = n.replace(flags=n.flags | tskit.NODE_IS_SAMPLE)\n            tables.nodes.append(n)\n        ts = tables.tree_sequence()\n        all_samples = list(ts.samples())\n        keep_rows = np.array([all_samples.index(i) for i in nodes])\n        keep_cols = np.array([all_samples.index(i) for i in orig_samples])\n\n    use_windows = windows\n    drop_first = windows is not None and windows[0] > 0\n    if drop_first:\n        use_windows = np.concatenate([[0], np.array(use_windows)])\n    drop_last = windows is not None and windows[-1] < ts.sequence_length\n    if drop_last:\n        use_windows = np.concatenate([np.array(use_windows), [ts.sequence_length]])\n    Sigma = ts.genetic_relatedness(\n        sample_sets=[[i] for i in ts.samples()],\n        indexes=[(i, j) for i in range(ts.num_samples) for j in range(ts.num_samples)],\n        windows=use_windows,\n        mode=\"branch\",\n        span_normalise=span_normalise,\n        proportion=False,\n        centre=centre,\n    )\n    if windows is not None:\n        if drop_first:\n            Sigma = Sigma[1:]\n        if drop_last:\n            Sigma = Sigma[:-1]\n    nwin = 1 if windows is None else len(windows) - 1\n    shape = (nwin, ts.num_samples, ts.num_samples)\n    Sigma = Sigma.reshape(shape)\n    out = np.array([x[np.ix_(keep_rows, keep_cols)] for x in Sigma])\n    if windows is None:\n        out = out[0]\n    return out\n\n\ndef verify_relatedness_vector(\n    ts,\n    w,\n    windows,\n    *,\n    internal_checks=False,\n    verbosity=0,\n    centre=True,\n    nodes=None,\n    span_normalise=True,\n):\n    R1 = relatedness_vector(\n        ts,\n        sample_weights=w,\n        windows=windows,\n        internal_checks=internal_checks,\n        verbosity=verbosity,\n        centre=centre,\n        nodes=nodes,\n        span_normalise=span_normalise,\n    )\n    nrows = ts.num_samples if nodes is None else len(nodes)\n    wvec = w if len(w.shape) > 1 else w[:, np.newaxis]\n    Sigma = relatedness_matrix(\n        ts, windows=windows, centre=centre, nodes=nodes, span_normalise=span_normalise\n    )\n    if windows is None:\n        R2 = Sigma.dot(wvec)\n    else:\n        R2 = np.zeros((len(windows) - 1, nrows, wvec.shape[1]))\n        for k in range(len(windows) - 1):\n            R2[k] = Sigma[k].dot(wvec)\n    R3 = ts.genetic_relatedness_vector(\n        w,\n        windows=windows,\n        mode=\"branch\",\n        centre=centre,\n        nodes=nodes,\n        span_normalise=span_normalise,\n    )\n    if verbosity > 0:\n        print(ts.draw_text())\n        print(\"weights:\", w)\n        print(\"windows:\", windows)\n        print(\"centre:\", centre)\n        print(\"here:\", R1)\n        print(\"with ts:\", R2)\n        print(\"with lib:\", R3)\n        print(\"Sigma:\", Sigma)\n    if windows is None:\n        assert R1.shape == (nrows, wvec.shape[1])\n    else:\n        assert R1.shape == (len(windows) - 1, nrows, wvec.shape[1])\n    np.testing.assert_allclose(R1, R2, atol=1e-10)\n    np.testing.assert_allclose(R1, R3, atol=1e-10)\n    return R1\n\n\ndef check_relatedness_vector(\n    ts,\n    n=2,\n    num_windows=0,\n    *,\n    internal_checks=False,\n    verbosity=0,\n    seed=123,\n    centre=True,\n    span_normalise=True,\n    do_nodes=True,\n):\n    rng = np.random.default_rng(seed=seed)\n    if num_windows == 0:\n        windows = None\n    elif num_windows % 2 == 0:\n        windows = np.linspace(\n            0.2 * ts.sequence_length, 0.8 * ts.sequence_length, num_windows + 1\n        )\n    else:\n        windows = np.linspace(0, ts.sequence_length, num_windows + 1)\n    num_nodes_list = (0,) if (centre or not do_nodes) else (0, 3)\n    for num_nodes in num_nodes_list:\n        if num_nodes == 0:\n            nodes = None\n        else:\n            nodes = rng.choice(ts.num_nodes, num_nodes, replace=False)\n        for k in range(n):\n            if k == 0:\n                w = rng.normal(size=ts.num_samples)\n            else:\n                w = rng.normal(size=ts.num_samples * k).reshape((ts.num_samples, k))\n            w = np.round(len(w) * w)\n            R = verify_relatedness_vector(\n                ts,\n                w,\n                windows,\n                internal_checks=internal_checks,\n                verbosity=verbosity,\n                centre=centre,\n                nodes=nodes,\n                span_normalise=span_normalise,\n            )\n    return R\n\n\nclass TestRelatednessVector:\n    def test_bad_weights(self):\n        n = 5\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=2,\n            sequence_length=10,\n            random_seed=123,\n        )\n        for bad_W in (None, [1], np.ones((3 * n, 2)), np.ones((n - 1, 2))):\n            with pytest.raises(ValueError, match=\"number of samples\"):\n                ts.genetic_relatedness_vector(bad_W, mode=\"branch\")\n\n    def test_bad_windows(self):\n        n = 5\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=2,\n            sequence_length=10,\n            random_seed=123,\n        )\n        for bad_w in ([1], []):\n            with pytest.raises(ValueError, match=\"Windows array\"):\n                ts.genetic_relatedness_vector(\n                    np.ones(ts.num_samples), windows=bad_w, mode=\"branch\"\n                )\n\n    def test_nodes_centred_error(self):\n        ts = msprime.sim_ancestry(\n            5,\n            ploidy=2,\n            sequence_length=10,\n            random_seed=123,\n        )\n        with pytest.raises(ValueError, match=\"must have centre\"):\n            ts.genetic_relatedness_vector(\n                np.ones(ts.num_samples), mode=\"branch\", centre=True, nodes=[0, 1]\n            )\n\n    def test_bad_nodes(self):\n        n = 5\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=2,\n            sequence_length=10,\n            random_seed=123,\n        )\n        for bad_nodes in ([[]], \"foo\"):\n            with pytest.raises(ValueError):\n                ts.genetic_relatedness_vector(\n                    np.ones(ts.num_samples),\n                    mode=\"branch\",\n                    centre=False,\n                    nodes=bad_nodes,\n                )\n        for bad_nodes in ([-1, 10], [3, 2 * ts.num_nodes]):\n            with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_NODE_OUT_OF_BOUNDS\"):\n                ts.genetic_relatedness_vector(\n                    np.ones(ts.num_samples),\n                    mode=\"branch\",\n                    centre=False,\n                    nodes=bad_nodes,\n                )\n\n    def test_good_nodes(self):\n        n = 5\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=2,\n            sequence_length=10,\n            random_seed=123,\n        )\n        V0 = ts.genetic_relatedness_vector(\n            np.ones(ts.num_samples), mode=\"branch\", centre=False\n        )\n        V = ts.genetic_relatedness_vector(\n            np.ones(ts.num_samples),\n            mode=\"branch\",\n            centre=False,\n            nodes=list(ts.samples()),\n        )\n        np.testing.assert_allclose(V0, V, atol=1e-13)\n        V = ts.genetic_relatedness_vector(\n            np.ones(ts.num_samples),\n            mode=\"branch\",\n            centre=False,\n            nodes=np.fromiter(ts.samples(), dtype=np.int32),\n        )\n        np.testing.assert_allclose(V0, V, atol=1e-13)\n        V = ts.genetic_relatedness_vector(\n            np.ones(ts.num_samples),\n            mode=\"branch\",\n            centre=False,\n            nodes=np.fromiter(ts.samples(), dtype=np.int64),\n        )\n        np.testing.assert_allclose(V0, V, atol=1e-13)\n        V = ts.genetic_relatedness_vector(\n            np.ones(ts.num_samples),\n            mode=\"branch\",\n            centre=False,\n            nodes=list(ts.samples())[:2],\n        )\n        np.testing.assert_allclose(V0[:2], V, atol=1e-13)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5])\n    @pytest.mark.parametrize(\"seed\", range(1, 4))\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"span_normalise\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 1, 2, 3))\n    def test_small_internal_checks(self, n, seed, centre, span_normalise, num_windows):\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=1,\n            sequence_length=1000,\n            recombination_rate=0.01,\n            random_seed=seed,\n        )\n        assert ts.num_trees >= 2\n        check_relatedness_vector(\n            ts,\n            num_windows=num_windows,\n            internal_checks=True,\n            centre=centre,\n            span_normalise=span_normalise,\n        )\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 15])\n    @pytest.mark.parametrize(\"seed\", range(1, 5))\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"span_normalise\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 1, 2, 3))\n    def test_simple_sims(self, n, seed, centre, span_normalise, num_windows):\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=seed,\n        )\n        assert ts.num_trees >= 2\n        check_relatedness_vector(\n            ts,\n            num_windows=num_windows,\n            centre=centre,\n            verbosity=0,\n            span_normalise=span_normalise,\n        )\n\n    def test_simple_sims_windows(self):\n        L = 100\n        ts = msprime.sim_ancestry(\n            5,\n            ploidy=1,\n            population_size=20,\n            sequence_length=L,\n            recombination_rate=0.01,\n            random_seed=345,\n        )\n        assert ts.num_trees >= 2\n        W = np.linspace(0, 1, 2 * ts.num_samples).reshape((ts.num_samples, 2))\n        kwargs = {\"centre\": False, \"mode\": \"branch\"}\n        total = ts.genetic_relatedness_vector(W, span_normalise=False, **kwargs)\n        for windows in [[0, L], [0, L / 3, L / 2, L]]:\n            pieces = ts.genetic_relatedness_vector(\n                W, windows=windows, span_normalise=False, **kwargs\n            )\n            np.testing.assert_allclose(total, pieces.sum(axis=0), atol=1e-13)\n            assert len(pieces) == len(windows) - 1\n            for k in range(len(pieces)):\n                piece = ts.genetic_relatedness_vector(\n                    W, windows=windows[k : k + 2], span_normalise=False, **kwargs\n                )\n                assert piece.shape[0] == 1\n                np.testing.assert_allclose(piece[0], pieces[k], atol=1e-13)\n\n    @pytest.mark.parametrize(\"mode\", (\"branch\",))\n    def test_simple_span_normalise(self, mode):\n        ts = msprime.sim_ancestry(\n            4,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=123,\n        )\n        assert ts.num_trees >= 2\n        x = np.linspace(0, 1, ts.num_samples)\n        w = np.linspace(0, ts.sequence_length, 11)[[0, 3, 4, -1]]\n        a = ts.genetic_relatedness_vector(x, windows=w, mode=mode)  # default is True\n        ax = ts.genetic_relatedness_vector(x, windows=w, mode=mode, span_normalise=True)\n        assert np.all(ax == a)\n        b = ts.genetic_relatedness_vector(x, windows=w, span_normalise=False, mode=mode)\n        assert a.shape == b.shape\n        dw = np.diff(w)\n        for aa, bb, ww in zip(a, b, dw, strict=True):\n            assert np.allclose(aa * ww, bb)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 15])\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    def test_single_balanced_tree(self, n, centre):\n        ts = tskit.Tree.generate_balanced(n).tree_sequence\n        check_relatedness_vector(ts, internal_checks=True, centre=centre)\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    def test_internal_sample(self, centre):\n        tables = tskit.Tree.generate_balanced(4).tree_sequence.dump_tables()\n        flags = tables.nodes.flags\n        flags[3] = 0\n        flags[5] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        ts = tables.tree_sequence()\n        check_relatedness_vector(ts, centre=centre)\n\n    @pytest.mark.parametrize(\"seed\", range(1, 5))\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 1, 2, 3))\n    def test_one_internal_sample_sims(self, seed, centre, num_windows):\n        ts = msprime.sim_ancestry(\n            10,\n            ploidy=1,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=seed,\n        )\n        t = ts.dump_tables()\n        # Add a new sample directly below another sample\n        u = t.nodes.add_row(time=-1, flags=tskit.NODE_IS_SAMPLE)\n        t.edges.add_row(parent=0, child=u, left=0, right=ts.sequence_length)\n        t.sort()\n        t.build_index()\n        ts = t.tree_sequence()\n        check_relatedness_vector(ts, num_windows=num_windows, centre=centre)\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 1, 2, 3))\n    def test_missing_flanks(self, centre, num_windows):\n        ts = msprime.sim_ancestry(\n            2,\n            ploidy=1,\n            population_size=10,\n            sequence_length=100,\n            recombination_rate=0.001,\n            random_seed=1234,\n        )\n        assert ts.num_trees >= 2\n        ts = ts.keep_intervals([[20, 80]])\n        assert ts.first().interval == (0, 20)\n        check_relatedness_vector(ts, num_windows=num_windows, centre=centre)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    def test_suite_examples_centre(self, ts, centre):\n        if ts.num_samples > 0:\n            check_relatedness_vector(ts, centre=centre)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"span_normalise\", (True, False))\n    def test_suite_examples_span_normalise(self, ts, span_normalise):\n        if ts.num_samples > 0:\n            check_relatedness_vector(ts, span_normalise=span_normalise)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 10])\n    def test_dangling_on_samples(self, n):\n        # Adding non sample branches below the samples does not alter\n        # the overall divergence *between* the samples\n        ts1 = tskit.Tree.generate_balanced(n).tree_sequence\n        D1 = check_relatedness_vector(ts1, do_nodes=False)\n        tables = ts1.dump_tables()\n        for u in ts1.samples():\n            v = tables.nodes.add_row(time=-1)\n            tables.edges.add_row(left=0, right=ts1.sequence_length, parent=u, child=v)\n        tables.sort()\n        tables.build_index()\n        ts2 = tables.tree_sequence()\n        D2 = check_relatedness_vector(ts2, internal_checks=True, do_nodes=False)\n        np.testing.assert_array_almost_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 10])\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    def test_dangling_on_all(self, n, centre):\n        # Adding non sample branches below the samples does not alter\n        # the overall divergence *between* the samples\n        ts1 = tskit.Tree.generate_balanced(n).tree_sequence\n        D1 = check_relatedness_vector(ts1, centre=centre, do_nodes=False)\n        tables = ts1.dump_tables()\n        for u in range(ts1.num_nodes):\n            v = tables.nodes.add_row(time=-1)\n            tables.edges.add_row(left=0, right=ts1.sequence_length, parent=u, child=v)\n        tables.sort()\n        tables.build_index()\n        ts2 = tables.tree_sequence()\n        D2 = check_relatedness_vector(\n            ts2, internal_checks=True, centre=centre, do_nodes=False\n        )\n        np.testing.assert_array_almost_equal(D1, D2)\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    def test_disconnected_non_sample_topology(self, centre):\n        # Adding non sample branches below the samples does not alter\n        # the overall divergence *between* the samples\n        ts1 = tskit.Tree.generate_balanced(5).tree_sequence\n        D1 = check_relatedness_vector(ts1, centre=centre, do_nodes=False)\n        tables = ts1.dump_tables()\n        # Add an extra bit of disconnected non-sample topology\n        u = tables.nodes.add_row(time=0)\n        v = tables.nodes.add_row(time=1)\n        tables.edges.add_row(left=0, right=ts1.sequence_length, parent=v, child=u)\n        tables.sort()\n        tables.build_index()\n        ts2 = tables.tree_sequence()\n        D2 = check_relatedness_vector(\n            ts2, internal_checks=True, centre=centre, do_nodes=False\n        )\n        np.testing.assert_array_almost_equal(D1, D2)\n\n\ndef pca(ts, windows, centre, samples=None, individuals=None, time_windows=None):\n    assert samples is None or individuals is None\n    if samples is None:\n        ii = np.arange(ts.num_samples)\n    else:\n        all_samples = ts.samples()\n        ii = np.searchsorted(all_samples, samples)\n    drop_dimension = windows is None\n    if drop_dimension:\n        windows = [0, ts.sequence_length]\n    if time_windows is None:\n        Sigma = relatedness_matrix(\n            ts=ts, windows=windows, centre=False, span_normalise=False\n        )[:, ii, :][:, :, ii]\n    else:\n        assert time_windows[0] < time_windows[1]\n        ts_low, ts_high = (\n            ts.decapitate(time_windows[0]),\n            ts.decapitate(time_windows[1]),\n        )\n        Sigma_low = relatedness_matrix(\n            ts=ts_low, windows=windows, centre=False, span_normalise=False\n        )[:, ii, :][:, :, ii]\n        Sigma_high = relatedness_matrix(\n            ts=ts_high, windows=windows, centre=False, span_normalise=False\n        )[:, ii, :][:, :, ii]\n        Sigma = Sigma_high - Sigma_low\n    if individuals is not None:\n        ni = len(individuals)\n        J = np.zeros((ts.num_samples, ni))\n        for k, i in enumerate(individuals):\n            nn = ts.individual(i).nodes\n            for j in nn:\n                J[j, k] = 1 / len(nn)\n        Sigma = np.matmul(J.T, np.matmul(Sigma, J))\n    if centre:\n        n = Sigma.shape[-1]\n        P = np.eye(n) - 1 / n\n        Sigma = np.matmul(P, np.matmul(Sigma, P))\n    U, S, _ = np.linalg.svd(Sigma, hermitian=True)\n    if drop_dimension:\n        U = U[0]\n        S = S[0]\n    return U, S\n\n\ndef allclose_up_to_sign(x, y, **kwargs):\n    # check if two vectors are the same up to sign\n    x_const = np.isclose(np.std(x), 0)\n    y_const = np.isclose(np.std(y), 0)\n    if x_const or y_const:\n        if np.allclose(x, 0):\n            r = 1.0\n        else:\n            r = np.mean(x / y)\n    else:\n        r = np.sign(np.corrcoef(x, y)[0, 1])\n    return np.allclose(x, r * y, **kwargs)\n\n\ndef assert_pcs_equal(U, D, U_full, D_full, rtol=1e-5, atol=1e-8):\n    # check that the PCs in U, D occur in U_full, D_full\n    # accounting for sign and ordering\n    assert len(D) <= len(D_full)\n    assert U.shape[0] == U_full.shape[0]\n    assert U.shape[1] == len(D)\n    for k in range(len(D)):\n        u = U[:, k]\n        d = D[k]\n        (ii,) = np.where(np.isclose(D_full, d, rtol=rtol, atol=atol))\n        assert len(ii) > 0, f\"{k}th singular value {d} not found in {D_full}.\"\n        found_it = False\n        for i in ii:\n            if allclose_up_to_sign(u, U_full[:, i], rtol=rtol, atol=atol):\n                found_it = True\n                break\n        assert found_it, f\"{k}th singular vector {u} not found in {U_full}.\"\n\n\ndef assert_errors_bound(pca_res, D, U, w=None):\n    # Bounds on the error are from equation 1.11 in https://arxiv.org/pdf/0909.4061 -\n    # this gives a bound on reconstruction error (i.e., operator norm between the GRM\n    # and the low-diml approx). But since the (L2) operator norm is\n    # |X| = sup_v |Xv|/|v|,\n    # this implies bounds on singular values and vectors also:\n    # If G v = lambda v, and we've got estimated singular vectors U and values diag(L),\n    # then let v = \\sum_i b_i u_i + delta be the projection of v into U,\n    # and we have that\n    #  |lambda v - U L U* v|^2\n    #   = \\sum_i b_i^2 (lambda - L_i)^2 + lambda^2 |delta|^2\n    #   < \\epsilon^2   (where epsilon is the spectral norm bound error_bound)\n    # so\n    #  |delta| < epsilon / lambda\n    # since this is the amount by which the eigenvector v isn't hit by the columns of U.\n    # Then also for each i that if b_i is not small then\n    #  |lambda - L_i| < epsilon\n    # and there must be at least one b_i that is big (since sum_i b_i^2 = 1 - |delta|^2).\n    # More concretely, let m = min_i |lambda - L_i|^2,\n    # so that\n    #  epsilon^2 > \\sum_i (lambda - L_i)^2 b_i^2 + lambda^2 |delta|^2\n    #   >= m * \\sum_i b_i^2 + lambda^2 |delta|^2\n    #   = m * (1-|delta|^2) + lambda^2 |delta|^2.\n    # Hence,\n    # min_i |lambda-L_i|^2 = m < (epsilon^2 - lambda^2  |delta|^2) / (1- |delta|^2).\n    # In summary: roughly, epsilon should be the bound on error in eigenvalues,\n    # and epsilon / sigma[k+1] the L2 bound for eigenvectors\n    # Below, the 'roughly/should be' translates into the factor of 5.\n\n    f = pca_res.factors\n    ev = pca_res.eigenvalues\n    rs = pca_res.range_sketch\n    eps = pca_res.error_bound\n    if w is not None:\n        D, U = D[w], U[w]\n        f, ev, rs, eps = f[w], ev[w], rs[w], eps[w]\n    n = ev.shape[0]\n    Sigma = U @ np.diag(D) @ U.T\n    Q = rs[:, :n]\n    err = np.linalg.svd(Sigma - Q @ Q.T @ Sigma).S[0]\n    assert err <= 5 * eps**2, (\n        \"Reconstruction error should be smaller than the bound squared.\"\n    )\n    assert np.max(np.abs(ev - D[:n])) < 5 * eps, (\n        \"Eigenvalue error should be smaller than error bound.\"\n    )\n    for k in range(n):\n        assert np.sum((f[:, k] - U[:, k]) ** 2) < 5 * eps**2 / ev[-1], (\n            \"Factor error should be smaller than the bound squared.\"\n        )\n\n\nclass TestPCA:\n    def verify_error_est(\n        self,\n        ts,\n        num_windows,\n        num_components,\n        centre,\n        samples=None,\n        individuals=None,\n        time_windows=None,\n        **kwargs,\n    ):\n        assert samples is None or individuals is None\n        if num_windows == 0:\n            windows = None\n        elif num_windows % 2 == 0:\n            windows = np.linspace(\n                0.2 * ts.sequence_length, 0.8 * ts.sequence_length, num_windows + 1\n            )\n        else:\n            windows = np.linspace(0, ts.sequence_length, num_windows + 1)\n        if samples is not None:\n            num_rows = len(samples)\n        elif individuals is not None:\n            num_rows = len(individuals)\n        else:\n            num_rows = ts.num_samples\n        num_oversamples = kwargs.get(\n            \"num_oversamples\", min(num_rows - num_components, 10)\n        )\n        pca_res = ts.pca(\n            windows=windows,\n            samples=samples,\n            individuals=individuals,\n            num_components=num_components,\n            centre=centre,\n            time_windows=time_windows,\n            random_seed=1238,\n            **kwargs,\n        )\n        if windows is None:\n            assert pca_res.factors.shape == (num_rows, num_components)\n            assert pca_res.eigenvalues.shape == (num_components,)\n            assert pca_res.range_sketch.shape == (\n                num_rows,\n                num_components + num_oversamples,\n            )\n            assert pca_res.error_bound.shape == ()\n        else:\n            assert pca_res.factors.shape == (num_windows, num_rows, num_components)\n            assert pca_res.eigenvalues.shape == (num_windows, num_components)\n            assert pca_res.range_sketch.shape == (\n                num_windows,\n                num_rows,\n                num_components + num_oversamples,\n            )\n            assert pca_res.error_bound.shape == (num_windows,)\n        U, D = pca(\n            ts=ts,\n            windows=windows,\n            centre=centre,\n            samples=samples,\n            individuals=individuals,\n            time_windows=time_windows,\n        )\n        if windows is None:\n            assert_errors_bound(pca_res, D, U)\n        else:\n            for w in range(num_windows):\n                assert_errors_bound(pca_res, D, U, w=w)\n\n    def verify_pca(\n        self,\n        ts,\n        num_windows,\n        num_components,\n        centre,\n        samples=None,\n        individuals=None,\n        time_windows=None,\n        **kwargs,\n    ):\n        assert samples is None or individuals is None\n        if num_windows == 0:\n            windows = None\n        elif num_windows % 2 == 0:\n            windows = np.linspace(\n                0.2 * ts.sequence_length, 0.8 * ts.sequence_length, num_windows + 1\n            )\n        else:\n            windows = np.linspace(0, ts.sequence_length, num_windows + 1)\n        if samples is not None:\n            num_rows = len(samples)\n        elif individuals is not None:\n            num_rows = len(individuals)\n        else:\n            num_rows = ts.num_samples\n        num_oversamples = kwargs.get(\n            \"num_oversamples\", min(num_rows - num_components, 10)\n        )\n        pca_res = ts.pca(\n            windows=windows,\n            samples=samples,\n            individuals=individuals,\n            num_components=num_components,\n            centre=centre,\n            time_windows=time_windows,\n            random_seed=1238,\n            **kwargs,\n        )\n        if windows is None:\n            assert pca_res.factors.shape == (num_rows, num_components)\n            assert pca_res.eigenvalues.shape == (num_components,)\n            assert pca_res.range_sketch.shape == (\n                num_rows,\n                num_components + num_oversamples,\n            )\n            assert pca_res.error_bound.shape == ()\n        else:\n            assert pca_res.factors.shape == (num_windows, num_rows, num_components)\n            assert pca_res.eigenvalues.shape == (num_windows, num_components)\n            assert pca_res.range_sketch.shape == (\n                num_windows,\n                num_rows,\n                num_components + num_oversamples,\n            )\n            assert pca_res.error_bound.shape == (num_windows,)\n        U, D = pca(\n            ts=ts,\n            windows=windows,\n            centre=centre,\n            samples=samples,\n            individuals=individuals,\n            time_windows=time_windows,\n        )\n        if windows is None:\n            np.testing.assert_allclose(\n                pca_res.eigenvalues, D[:num_components], atol=1e-8\n            )\n            assert_pcs_equal(pca_res.factors, pca_res.eigenvalues, U, D)\n        else:\n            for w in range(num_windows):\n                np.testing.assert_allclose(\n                    pca_res.eigenvalues[w], D[w, :num_components], atol=1e-8\n                )\n                assert_pcs_equal(pca_res.factors[w], pca_res.eigenvalues[w], U[w], D[w])\n\n    def test_bad_windows(self):\n        ts = msprime.sim_ancestry(\n            3,\n            sequence_length=10,\n            random_seed=123,\n        )\n        for bad_w in ([], [1]):\n            with pytest.raises(ValueError, match=\"at least one window\"):\n                ts.pca(num_components=2, windows=bad_w)\n        for bad_w in ([1, 0], [-3, 10]):\n            with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_BAD_WINDOWS\"):\n                ts.pca(num_components=2, windows=bad_w)\n\n    def test_bad_params(self):\n        ts = msprime.sim_ancestry(\n            3,\n            sequence_length=10,\n            random_seed=123,\n        )\n        _ = ts.pca(num_components=3)\n        with pytest.raises(ValueError, match=\"Number of components\"):\n            ts.pca(num_components=ts.num_samples + 1)\n        with pytest.raises(ValueError, match=\"Number of components\"):\n            ts.pca(num_components=4, samples=[0, 1, 2])\n        with pytest.raises(ValueError, match=\"Number of components\"):\n            ts.pca(num_components=4, individuals=[0, 1])\n        with pytest.raises(ValueError, match=\"num_components \\\\+ num_oversamples\"):\n            ts.pca(num_components=2, num_oversamples=ts.num_samples)\n        with pytest.raises(ValueError, match=\"Cannot specify both num_over\"):\n            ts.pca(\n                num_components=2,\n                num_oversamples=2,\n                range_sketch=np.zeros((ts.num_samples, 4)),\n            )\n        with pytest.raises(ValueError, match=\"num_iterations should be\"):\n            ts.pca(num_components=3, num_iterations=-1)\n        with pytest.raises(ValueError, match=\"num_iterations should be\"):\n            ts.pca(num_components=3, num_iterations=0)\n        with pytest.raises(ValueError, match=\"num_iterations should be\"):\n            ts.pca(num_components=3, num_iterations=\"bac\")\n        with pytest.raises(ValueError, match=\"num_iterations should be\"):\n            ts.pca(num_components=3, num_iterations=[])\n\n    def test_bad_range_sketch(self):\n        ts = msprime.sim_ancestry(\n            3,\n            sequence_length=10,\n            random_seed=123,\n        )\n        nc = 2\n        # too few rows\n        Q = np.zeros((ts.num_samples - 1, ts.num_samples))\n        with pytest.raises(ValueError, match=\"Incorrect shape of range\"):\n            ts.pca(num_components=nc, range_sketch=Q)\n        # too many rows\n        Q = np.zeros((ts.num_samples + 1, ts.num_samples))\n        with pytest.raises(ValueError, match=\"Incorrect shape of range\"):\n            ts.pca(num_components=nc, range_sketch=Q)\n        # too few columns\n        Q = np.zeros((ts.num_samples, nc - 1))\n        with pytest.raises(ValueError, match=\"must have at least as many\"):\n            ts.pca(num_components=nc, range_sketch=Q)\n        # too many columns\n        Q = np.zeros((ts.num_samples, nc + ts.num_samples))\n        with pytest.raises(ValueError, match=\"must be less than\"):\n            ts.pca(num_components=nc, range_sketch=Q)\n        # not enough dimensions\n        Q = np.zeros((ts.num_samples,))\n        with pytest.raises(ValueError, match=\"Incorrect shape of range\"):\n            ts.pca(num_components=nc, range_sketch=Q)\n        # not enough dimensions, with windows\n        Q = np.zeros((ts.num_samples, nc + 2))\n        with pytest.raises(ValueError, match=\"Incorrect shape of range\"):\n            ts.pca(num_components=nc, windows=[0, 10], range_sketch=Q)\n        # not enough windows\n        Q = np.zeros((ts.num_samples, 1, nc + 2))\n        with pytest.raises(ValueError, match=\"Incorrect shape of range\"):\n            ts.pca(num_components=nc, windows=[0, 5, 10], range_sketch=Q)\n\n    def test_indivs_and_samples(self):\n        ts = msprime.sim_ancestry(\n            3,\n            ploidy=2,\n            sequence_length=10,\n            random_seed=123,\n        )\n        with pytest.raises(ValueError, match=\"Samples and individuals\"):\n            ts.pca(num_components=2, samples=[0, 1, 2, 3], individuals=[0, 1, 2])\n\n    def test_modes(self):\n        ts = msprime.sim_ancestry(\n            3,\n            sequence_length=10,\n            random_seed=123,\n        )\n        for bad_mode in (\"site\", \"node\"):\n            with pytest.raises(\n                tskit.LibraryError, match=\"TSK_ERR_UNSUPPORTED_STAT_MODE\"\n            ):\n                ts.pca(num_components=2, mode=bad_mode)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 5, 15])\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 1, 2, 3))\n    @pytest.mark.parametrize(\"num_components\", (1, 3))\n    def test_simple_sims(self, n, centre, num_windows, num_components):\n        ploidy = 2\n        nc = min(num_components, n * ploidy)\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=ploidy,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=12345,\n        )\n        kwargs = {}\n        # with n=15 and the default of 5 iterations, the relative tolerance on\n        # the eigenvectors is only 1e-4; so, up this:\n        if n > 10:\n            kwargs[\"num_iterations\"] = 10\n        self.verify_pca(\n            ts, num_windows=num_windows, num_components=nc, centre=centre, **kwargs\n        )\n\n    def test_range_sketch(self):\n        n = 10\n        ploidy = 2\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=2,\n            sequence_length=100,\n            random_seed=123,\n        )\n        nc, no = 2, 3\n        # should work as long as columns are linearly independent\n        range_sketch = np.linspace(0, 1, n * ploidy * (nc + no)).reshape(\n            (n * ploidy, nc + no)\n        )\n        pca_res0 = ts.pca(num_components=nc)\n        pca_res1 = ts.pca(\n            num_components=nc, range_sketch=range_sketch, num_iterations=20\n        )\n        assert_pcs_equal(\n            pca_res0.factors,\n            pca_res0.eigenvalues,\n            pca_res1.factors,\n            pca_res1.eigenvalues,\n        )\n        # check we can recycle previously returned sketches\n        pca_res_1 = ts.pca(num_components=nc, range_sketch=None)\n        for _ in range(20):\n            pca_res_1 = ts.pca(num_components=nc, range_sketch=pca_res_1.range_sketch)\n        assert_pcs_equal(\n            pca_res0.factors,\n            pca_res0.eigenvalues,\n            pca_res1.factors,\n            pca_res1.eigenvalues,\n        )\n\n    def test_num_iterations(self):\n        n = 10\n        ploidy = 2\n        ts = msprime.sim_ancestry(\n            n,\n            ploidy=2,\n            sequence_length=100,\n            random_seed=123,\n        )\n        nc, no = 2, 3\n        range_sketch = np.linspace(0, 1, n * ploidy * (nc + no)).reshape(\n            (n * ploidy, nc + no)\n        )\n        pca_res0 = ts.pca(num_components=nc, range_sketch=range_sketch, num_iterations=5)\n        pca_res1 = ts.pca(num_components=nc, range_sketch=range_sketch, num_iterations=1)\n        for _ in range(4):\n            pca_res1 = ts.pca(\n                num_components=nc, range_sketch=pca_res1.range_sketch, num_iterations=1\n            )\n        assert_pcs_equal(\n            pca_res0.factors,\n            pca_res0.eigenvalues,\n            pca_res1.factors,\n            pca_res1.eigenvalues,\n        )\n\n    def test_seed(self):\n        ts = msprime.sim_ancestry(\n            4,\n            ploidy=2,\n            sequence_length=100,\n            random_seed=345,\n        )\n        pc1 = ts.pca(num_components=3, random_seed=123)\n        pc2 = ts.pca(num_components=3, random_seed=123)\n        assert np.all(pc1.factors == pc2.factors)\n        assert np.all(pc1.eigenvalues == pc2.eigenvalues)\n        assert np.all(pc1.range_sketch == pc2.range_sketch)\n        assert np.all(pc1.error_bound == pc2.error_bound)\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 2))\n    def test_samples(self, centre, num_windows):\n        ploidy = 2\n        ts = msprime.sim_ancestry(\n            20,\n            ploidy=ploidy,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=12345,\n        )\n        samples = [3, 0, 2, 5, 6, 15, 12, 17, 7, 9, 11]\n        time_low, time_high = (ts.nodes_time.max() / 4, ts.nodes_time.max() / 2)\n        self.verify_pca(\n            ts,\n            num_windows=num_windows,\n            num_components=5,\n            centre=centre,\n            samples=samples,\n            time_windows=[time_low, time_high],\n        )\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 2))\n    def test_err_samples(self, centre, num_windows):\n        ploidy = 2\n        ts = msprime.sim_ancestry(\n            20,\n            ploidy=ploidy,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=12345,\n        )\n        samples = [3, 0, 2, 5, 6, 15, 12, 17, 7, 9, 11]\n        time_low, time_high = (ts.nodes_time.max() / 4, ts.nodes_time.max() / 2)\n        self.verify_error_est(\n            ts,\n            num_windows=num_windows,\n            num_components=4,\n            centre=centre,\n            samples=samples,\n            time_windows=[time_low, time_high],\n        )\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    def test_individuals_matches_samples(self, centre):\n        # ploidy 1 individuals should be the same as samples\n        ploidy = 1\n        ts = msprime.sim_ancestry(\n            20,\n            ploidy=ploidy,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=12345,\n        )\n        individuals = [3, 0, 2, 5, 6, 15, 12]\n        for i in individuals:\n            assert ts.individual(i).nodes == [\n                i,\n            ]\n        pci = pca(\n            ts, windows=[0, ts.sequence_length], centre=centre, samples=individuals\n        )\n        pcs = pca(\n            ts, windows=[0, ts.sequence_length], centre=centre, individuals=individuals\n        )\n        tspci = ts.pca(\n            num_components=5, centre=centre, samples=individuals, random_seed=456\n        )\n        tspcs = ts.pca(\n            num_components=5, centre=centre, individuals=individuals, random_seed=456\n        )\n        assert np.all(pci[0] == pcs[0])\n        assert np.all(pci[1] == pcs[1])\n        assert np.all(tspci.factors == tspcs.factors)\n        assert np.all(tspci.eigenvalues == tspcs.eigenvalues)\n        pci = ts.pca(\n            num_components=5,\n            windows=[0, 50, 100],\n            centre=centre,\n            samples=individuals,\n            random_seed=456,\n        )\n        pcs = ts.pca(\n            num_components=5,\n            windows=[0, 50, 100],\n            centre=centre,\n            individuals=individuals,\n            random_seed=456,\n        )\n        assert np.all(pci.factors == pcs.factors)\n        assert np.all(pci.eigenvalues == pcs.eigenvalues)\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 2))\n    @pytest.mark.parametrize(\"ploidy\", (1, 2, 3))\n    def test_individuals(self, centre, num_windows, ploidy):\n        ts = msprime.sim_ancestry(\n            20,\n            ploidy=ploidy,\n            population_size=20,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=12345,\n        )\n        individuals = [3, 0, 2, 5, 6, 15, 12, 11, 7, 17]\n        time_low, time_high = (ts.nodes_time.max() / 4, ts.nodes_time.max() / 2)\n        self.verify_pca(\n            ts,\n            num_windows=num_windows,\n            num_components=5,\n            centre=centre,\n            individuals=individuals,\n            time_windows=[time_low, time_high],\n        )\n\n    @pytest.mark.parametrize(\"centre\", (True, False))\n    @pytest.mark.parametrize(\"num_windows\", (0, 2))\n    @pytest.mark.parametrize(\"ploidy\", (1, 2, 3))\n    def test_err_individuals(self, centre, num_windows, ploidy):\n        # NOTE: this is a randomized test, so if things change under the\n        # hood it might start to fail for perfectly normal (ie unlucky) reasons.\n        # If so, it's probably better to replace the test with a simpler test,\n        # e.g., that error_bound is roughly the right order of magnitude.\n        ts = msprime.sim_ancestry(\n            30,\n            ploidy=ploidy,\n            population_size=30,\n            sequence_length=100,\n            recombination_rate=0.01,\n            random_seed=12346,\n        )\n        individuals = np.arange(30)\n        time_low, time_high = (ts.nodes_time.max() / 4, ts.nodes_time.max() / 2)\n        self.verify_error_est(\n            ts,\n            num_windows=num_windows,\n            num_components=5,\n            centre=centre,\n            individuals=individuals,\n            time_windows=[time_low, time_high],\n        )\n"
  },
  {
    "path": "python/tests/test_stats.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2021 Tskit Developers\n# Copyright (C) 2016 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for stats calculations in tskit.\n\"\"\"\n\nimport contextlib\nimport io\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport _tskit\nimport tests\nimport tests.test_wright_fisher as wf\nimport tests.tsutil as tsutil\nimport tskit\n\n\n@contextlib.contextmanager\ndef suppress_division_by_zero_warning():\n    with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n        yield\n\n\ndef get_r2_matrix(ts):\n    \"\"\"\n    Simple site-based version assuming biallic sites.\n    \"\"\"\n    A = np.zeros((ts.num_sites, ts.num_sites))\n    G = ts.genotype_matrix()\n    n = ts.num_samples\n    for a in range(ts.num_sites):\n        A[a, a] = 1\n        fA = np.sum(G[a] != 0) / n\n        for b in range(a + 1, ts.num_sites):\n            fB = np.sum(G[b] != 0) / n\n            nAB = np.sum(np.logical_and(G[a] != 0, G[b] != 0))\n            fAB = nAB / n\n            D = fAB - fA * fB\n            denom = fA * fB * (1 - fA) * (1 - fB)\n            A[a, b] = D * D\n            with suppress_division_by_zero_warning():\n                A[a, b] /= denom\n            A[b, a] = A[a, b]\n    return A\n\n\ndef _compute_r2(tree, n, f_a, site_b):\n    assert len(site_b.mutations) == 1\n    assert site_b.ancestral_state != site_b.mutations[0].derived_state\n    f_b = tree.num_samples(site_b.mutations[0].node) / n\n    f_ab = tree.num_tracked_samples(site_b.mutations[0].node) / n\n    D2 = (f_ab - f_a * f_b) ** 2\n    denom = f_a * f_b * (1 - f_a) * (1 - f_b)\n    if denom == 0:\n        return np.nan\n    return D2 / denom\n\n\ndef ts_r2(ts, a, b):\n    \"\"\"\n    Returns the r2 value between sites a and b in the specified tree sequence.\n    \"\"\"\n    a, b = (a, b) if a < b else (b, a)\n    site_a = ts.site(a)\n    site_b = ts.site(b)\n    assert len(site_a.mutations) == 1\n    assert len(site_b.mutations) == 1\n    n = ts.num_samples\n    tree = ts.at(site_a.position)\n    a_samples = list(tree.samples(site_a.mutations[0].node))\n    f_a = len(a_samples) / n\n    tree = ts.at(site_b.position, tracked_samples=a_samples)\n    return _compute_r2(tree, n, f_a, site_b)\n\n\nclass LdArrayCalculator:\n    \"\"\"\n    Utility class to help organise the state required when tracking all\n    the different termination conditions.\n    \"\"\"\n\n    def __init__(self, ts, focal_site_id, direction, max_sites, max_distance):\n        self.ts = ts\n        self.focal_site = ts.site(focal_site_id)\n        self.direction = direction\n        self.max_sites = max_sites\n        self.max_distance = max_distance\n        self.result = []\n        self.tree = None\n\n    def _check_site(self, site):\n        assert len(site.mutations) == 1\n        assert site.ancestral_state != site.mutations[0].derived_state\n\n    def _compute_and_append(self, target_site):\n        self._check_site(target_site)\n\n        distance = abs(target_site.position - self.focal_site.position)\n        if distance > self.max_distance or len(self.result) >= self.max_sites:\n            return True\n        r2 = _compute_r2(\n            self.tree, self.ts.num_samples, self.focal_frequency, target_site\n        )\n        self.result.append(r2)\n        return False\n\n    def _compute_forward(self):\n        done = False\n        for site in self.tree.sites():\n            if site.id > self.focal_site.id:\n                done = self._compute_and_append(site)\n                if done:\n                    break\n        while self.tree.next() and not done:\n            for site in self.tree.sites():\n                done = self._compute_and_append(site)\n                if done:\n                    break\n\n    def _compute_backward(self):\n        done = False\n        for site in reversed(list(self.tree.sites())):\n            if site.id < self.focal_site.id:\n                done = self._compute_and_append(site)\n                if done:\n                    break\n        while self.tree.prev() and not done:\n            for site in reversed(list(self.tree.sites())):\n                done = self._compute_and_append(site)\n                if done:\n                    break\n\n    def run(self):\n        self._check_site(self.focal_site)\n        self.tree = self.ts.at(self.focal_site.position)\n        a_samples = list(self.tree.samples(self.focal_site.mutations[0].node))\n        self.focal_frequency = len(a_samples) / self.ts.num_samples\n\n        # Now set the tracked samples on the tree. We don't have a python\n        # API for doing this, so we just create a new tree.\n        self.tree = self.ts.at(self.focal_site.position, tracked_samples=a_samples)\n        if self.direction == 1:\n            self._compute_forward()\n        else:\n            self._compute_backward()\n        return np.array(self.result)\n\n\ndef ts_r2_array(ts, a, *, direction=1, max_sites=None, max_distance=None):\n    max_sites = ts.num_sites if max_sites is None else max_sites\n    max_distance = np.inf if max_distance is None else max_distance\n    calc = LdArrayCalculator(ts, a, direction, max_sites, max_distance)\n    return calc.run()\n\n\nclass TestLdSingleTree:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0      10\n    #      | |  |\n    #  pos 2 4  9\n    # node 0 1  0\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(4, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        tables.mutations.add_row(site=2, node=0, derived_state=\"G\")\n        return tables.tree_sequence()\n\n    @pytest.mark.parametrize([\"a\", \"b\", \"expected\"], [(0, 0, 1), (0, 1, 1), (0, 2, 1)])\n    def test_r2(self, a, b, expected):\n        ts = self.ts()\n        A = get_r2_matrix(ts)\n        ldc = tskit.LdCalculator(ts)\n        assert ldc.r2(a, b) == pytest.approx(expected)\n        assert ts_r2(ts, a, b) == pytest.approx(expected)\n        assert A[a, b] == pytest.approx(expected)\n        assert ldc.r2(b, a) == pytest.approx(expected)\n        assert ts_r2(ts, b, a) == pytest.approx(expected)\n        assert A[b, a] == pytest.approx(expected)\n\n    @pytest.mark.parametrize(\"a\", [0, 1, 2])\n    @pytest.mark.parametrize(\"direction\", [1, -1])\n    def test_r2_array(self, a, direction):\n        ts = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        lib_a = ldc.r2_array(a, direction=direction)\n        py_a = ts_r2_array(ts, a, direction=direction)\n        np.testing.assert_array_almost_equal(lib_a, py_a)\n\n\nclass TestLdFixedSites:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0      10\n    #      | |  |\n    #  pos 2 4  9\n    # node 0 1  0\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        tables = ts.dump_tables()\n        # First and last mutations are over the root\n        tables.sites.add_row(2, ancestral_state=\"A\")\n        tables.sites.add_row(4, ancestral_state=\"A\")\n        tables.sites.add_row(9, ancestral_state=\"T\")\n        tables.mutations.add_row(site=0, node=4, derived_state=\"G\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"C\")\n        tables.mutations.add_row(site=2, node=4, derived_state=\"G\")\n        return tables.tree_sequence()\n\n    def test_r2_fixed_fixed(self):\n        ts = self.ts()\n        A = get_r2_matrix(ts)\n        ldc = tskit.LdCalculator(ts)\n        assert np.isnan(ldc.r2(0, 2))\n        assert np.isnan(ts_r2(ts, 0, 2))\n        assert np.isnan(A[0, 2])\n\n    def test_r2_fixed_non_fixed(self):\n        ts = self.ts()\n        A = get_r2_matrix(ts)\n        ldc = tskit.LdCalculator(ts)\n        assert np.isnan(ldc.r2(0, 1))\n        assert np.isnan(ts_r2(ts, 0, 1))\n        assert np.isnan(A[0, 1])\n\n    def test_r2_non_fixed_fixed(self):\n        ts = self.ts()\n        A = get_r2_matrix(ts)\n        ldc = tskit.LdCalculator(ts)\n        assert np.isnan(ldc.r2(1, 0))\n        assert np.isnan(ts_r2(ts, 1, 0))\n        assert np.isnan(A[1, 0])\n\n\nclass BaseTestLd:\n    \"\"\"\n    Define a set of tests for LD calculations. Subclasses should be\n    concrete examples with at least two sites which implement a\n    method ts() which returns the tree sequence and the full LD\n    matrix.\n    \"\"\"\n\n    def test_r2_all_pairs(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        for j in range(ts.num_sites):\n            for k in range(ts.num_sites):\n                r2 = A[j, k]\n                assert ldc.r2(j, k) == pytest.approx(r2)\n                assert ts_r2(ts, j, k) == pytest.approx(r2)\n\n    def test_r2_array_first_site_forward(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        A1 = ldc.r2_array(0, direction=1)\n        A2 = ts_r2_array(ts, 0, direction=1)\n        np.testing.assert_array_almost_equal(A2, A[0, 1:])\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_mid_forward(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        site = ts.num_sites // 2\n        A1 = ldc.r2_array(site, direction=1)\n        A2 = ts_r2_array(ts, site, direction=1)\n        np.testing.assert_array_almost_equal(A2, A[site, site + 1 :])\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_first_site_forward_max_sites(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        A1 = ldc.r2_array(0, direction=1, max_sites=2)\n        A2 = ts_r2_array(ts, 0, direction=1, max_sites=2)\n        np.testing.assert_array_almost_equal(A2, A[0, 1:3])\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_first_site_forward_max_distance(self):\n        ts, _ = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        A1 = ldc.r2_array(0, direction=1, max_distance=3)\n        A2 = ts_r2_array(ts, 0, direction=1, max_distance=3)\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_last_site_backward(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        a = ts.num_sites - 1\n        A1 = ldc.r2_array(a, direction=-1)\n        A2 = ts_r2_array(ts, a, direction=-1)\n        np.testing.assert_array_almost_equal(A2, A[-1, :-1][::-1])\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_mid_backward(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        site = ts.num_sites // 2\n        A1 = ldc.r2_array(site, direction=-1)\n        A2 = ts_r2_array(ts, site, direction=-1)\n        np.testing.assert_array_almost_equal(A2, A[site, :site][::-1])\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_last_site_backward_max_sites(self):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        a = ts.num_sites - 1\n        A1 = ldc.r2_array(a, direction=-1, max_sites=2)\n        A2 = ts_r2_array(ts, a, direction=-1, max_sites=2)\n        np.testing.assert_array_almost_equal(A2, A[-1, -3:-1][::-1])\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    def test_r2_array_last_site_backward_max_distance(self):\n        ts, _ = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        a = ts.num_sites - 1\n        A1 = ldc.r2_array(a, direction=-1, max_distance=3)\n        A2 = ts_r2_array(ts, a, direction=-1, max_distance=3)\n        np.testing.assert_array_almost_equal(A1, A2)\n\n    @pytest.mark.parametrize(\"max_sites\", [0, 1, 2])\n    def test_r2_array_forward_max_sites_zero(self, max_sites):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        site = ts.num_sites // 2\n        A1 = ldc.r2_array(site, direction=1, max_sites=max_sites)\n        assert A1.shape[0] == max_sites\n        A2 = ts_r2_array(ts, site, direction=1, max_sites=max_sites)\n        assert A2.shape[0] == max_sites\n\n    @pytest.mark.parametrize(\"max_sites\", [0, 1, 2])\n    def test_r2_array_backward_max_sites_zero(self, max_sites):\n        ts, A = self.ts()\n        ldc = tskit.LdCalculator(ts)\n        site = ts.num_sites // 2\n        A1 = ldc.r2_array(site, direction=-1, max_sites=max_sites)\n        assert A1.shape[0] == max_sites\n        A2 = ts_r2_array(ts, site, direction=-1, max_sites=max_sites)\n        assert A2.shape[0] == max_sites\n\n\nclass TestLdOneSitePerTree(BaseTestLd):\n    @tests.cached_example\n    def ts(self):\n        ts = msprime.sim_ancestry(\n            5, sequence_length=10, recombination_rate=0.1, random_seed=1234\n        )\n        assert ts.num_trees > 3\n\n        tables = ts.dump_tables()\n        for tree in ts.trees():\n            site = tables.sites.add_row(tree.interval[0], ancestral_state=\"A\")\n            # Put the mutation somewhere deep in the tree\n            node = tree.preorder()[2]\n            tables.mutations.add_row(site=site, node=node, derived_state=\"B\")\n        ts = tables.tree_sequence()\n        # Return the full f2 matrix also\n        return ts, get_r2_matrix(ts)\n\n\nclass TestLdAllSitesOneTree(BaseTestLd):\n    @tests.cached_example\n    def ts(self):\n        ts = msprime.sim_ancestry(\n            5, sequence_length=10, recombination_rate=0.1, random_seed=1234\n        )\n        assert ts.num_trees > 3\n\n        tables = ts.dump_tables()\n        tree = ts.at(5)\n        pos = np.linspace(tree.interval[0], tree.interval[1], num=10, endpoint=False)\n        for x, node in zip(pos, tree.preorder()[1:]):\n            site = tables.sites.add_row(x, ancestral_state=\"A\")\n            tables.mutations.add_row(site=site, node=node, derived_state=\"B\")\n        ts = tables.tree_sequence()\n        return ts, get_r2_matrix(ts)\n\n\nclass TestLdSitesEveryOtherTree(BaseTestLd):\n    @tests.cached_example\n    def ts(self):\n        ts = msprime.sim_ancestry(\n            5, sequence_length=20, recombination_rate=0.1, random_seed=1234\n        )\n        assert ts.num_trees > 5\n\n        tables = ts.dump_tables()\n        for tree in ts.trees():\n            if tree.index % 2 == 0:\n                pos = np.linspace(*tree.interval, num=2, endpoint=False)\n                for x, node in zip(pos, tree.preorder()[1:]):\n                    site = tables.sites.add_row(x, ancestral_state=\"A\")\n                    tables.mutations.add_row(site=site, node=node, derived_state=\"B\")\n        ts = tables.tree_sequence()\n        return ts, get_r2_matrix(ts)\n\n\nclass TestLdErrors:\n    def test_multi_mutations(self):\n        tables = tskit.TableCollection(2)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(position=0, ancestral_state=\"A\")\n        tables.sites.add_row(position=1, ancestral_state=\"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"C\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\", parent=0)\n        tables.mutations.add_row(site=1, node=0, derived_state=\"C\")\n        ts = tables.tree_sequence()\n        ldc = tskit.LdCalculator(ts)\n        with pytest.raises(tskit.LibraryError, match=\"Only infinite sites mutations\"):\n            ldc.r2(0, 1)\n        with pytest.raises(tskit.LibraryError, match=\"Only infinite sites mutations\"):\n            ldc.r2(1, 0)\n\n    @pytest.mark.parametrize(\"state\", [\"\", \"A\", \"AAAA\", \"💩\"])\n    def test_silent_mutations(self, state):\n        tables = tskit.TableCollection(2)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(position=0, ancestral_state=state)\n        tables.sites.add_row(position=1, ancestral_state=\"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=state)\n        tables.mutations.add_row(site=1, node=0, derived_state=\"C\")\n        ts = tables.tree_sequence()\n        ldc = tskit.LdCalculator(ts)\n        with pytest.raises(tskit.LibraryError, match=\"Silent mutations not supported\"):\n            ldc.r2(0, 1)\n        with pytest.raises(tskit.LibraryError, match=\"Silent mutations not supported\"):\n            ldc.r2(1, 0)\n\n\nclass TestLdCalculator:\n    \"\"\"\n    Tests for the LdCalculator class.\n    \"\"\"\n\n    num_test_sites = 50\n\n    def verify_matrix(self, ts):\n        m = ts.get_num_sites()\n        ldc = tskit.LdCalculator(ts)\n        A = ldc.get_r2_matrix()\n        assert A.shape == (m, m)\n        B = get_r2_matrix(ts)\n        assert np.allclose(A, B)\n\n        # Now look at each row in turn, and verify it's the same\n        # when we use get_r2 directly.\n        for j in range(m):\n            a = ldc.get_r2_array(j, direction=tskit.FORWARD)\n            b = A[j, j + 1 :]\n            assert a.shape[0] == m - j - 1\n            assert b.shape[0] == m - j - 1\n            assert np.allclose(a, b)\n            a = ldc.get_r2_array(j, direction=tskit.REVERSE)\n            b = A[j, :j]\n            assert a.shape[0] == j\n            assert b.shape[0] == j\n            assert np.allclose(a[::-1], b)\n\n        # Now check every cell in the matrix in turn.\n        for j in range(m):\n            for k in range(m):\n                assert ldc.get_r2(j, k) == pytest.approx(A[j, k])\n\n    def verify_max_distance(self, ts):\n        \"\"\"\n        Verifies that the max_distance parameter works as expected.\n        \"\"\"\n        mutations = list(ts.mutations())\n        ldc = tskit.LdCalculator(ts)\n        A = ldc.get_r2_matrix()\n        j = len(mutations) // 2\n        for k in range(j):\n            x = (\n                ts.site(mutations[j + k].site).position\n                - ts.site(mutations[j].site).position\n            )\n            a = ldc.get_r2_array(j, max_distance=x)\n            assert a.shape[0] == k\n            assert np.allclose(A[j, j + 1 : j + 1 + k], a)\n            x = (\n                ts.site(mutations[j].site).position\n                - ts.site(mutations[j - k].site).position\n            )\n            a = ldc.get_r2_array(j, max_distance=x, direction=tskit.REVERSE)\n            assert a.shape[0] == k\n            assert np.allclose(A[j, j - k : j], a[::-1])\n        L = ts.get_sequence_length()\n        m = len(mutations)\n        a = ldc.get_r2_array(0, max_distance=L)\n        assert a.shape[0] == m - 1\n        assert np.allclose(A[0, 1:], a)\n        a = ldc.get_r2_array(m - 1, max_distance=L, direction=tskit.REVERSE)\n        assert a.shape[0] == m - 1\n        assert np.allclose(A[m - 1, :-1], a[::-1])\n\n    def verify_max_mutations(self, ts):\n        \"\"\"\n        Verifies that the max mutations parameter works as expected.\n        \"\"\"\n        mutations = list(ts.mutations())\n        ldc = tskit.LdCalculator(ts)\n        A = ldc.get_r2_matrix()\n        j = len(mutations) // 2\n        for k in range(j):\n            a = ldc.get_r2_array(j, max_mutations=k)\n            assert a.shape[0] == k\n            assert np.allclose(A[j, j + 1 : j + 1 + k], a)\n            a = ldc.get_r2_array(j, max_mutations=k, direction=tskit.REVERSE)\n            assert a.shape[0] == k\n            assert np.allclose(A[j, j - k : j], a[::-1])\n\n    def test_single_tree_simulated_mutations(self):\n        ts = msprime.simulate(20, mutation_rate=10, random_seed=15)\n        ts = tsutil.subsample_sites(ts, self.num_test_sites)\n        self.verify_matrix(ts)\n        self.verify_max_distance(ts)\n\n    def test_deprecated_get_aliases(self):\n        ts = msprime.simulate(20, mutation_rate=10, random_seed=15)\n        ts = tsutil.subsample_sites(ts, self.num_test_sites)\n        ldc = tskit.LdCalculator(ts)\n        A = ldc.get_r2_matrix()\n        B = ldc.r2_matrix()\n        assert np.array_equal(A, B)\n        a = ldc.get_r2_array(0)\n        b = ldc.r2_array(0)\n        assert np.array_equal(a, b)\n        assert ldc.get_r2(0, 1) == ldc.r2(0, 1)\n\n    def test_deprecated_max_mutations_alias(self):\n        ts = msprime.simulate(2, mutation_rate=0.1, random_seed=15)\n        ldc = tskit.LdCalculator(ts)\n        with pytest.raises(ValueError, match=\"deprecated synonym\"):\n            ldc.r2_array(0, max_sites=1, max_mutations=1)\n\n    def test_single_tree_regular_mutations(self):\n        ts = msprime.simulate(self.num_test_sites, length=self.num_test_sites)\n        ts = tsutil.insert_branch_mutations(ts)\n        # We don't support back mutations, so this should fail.\n        with pytest.raises(_tskit.LibraryError):\n            self.verify_matrix(ts)\n        with pytest.raises(_tskit.LibraryError):\n            self.verify_max_distance(ts)\n\n    def test_tree_sequence_regular_mutations(self):\n        ts = msprime.simulate(\n            self.num_test_sites, recombination_rate=1, length=self.num_test_sites\n        )\n        assert ts.get_num_trees() > 10\n        t = ts.dump_tables()\n        t.sites.reset()\n        t.mutations.reset()\n        for j in range(self.num_test_sites):\n            site_id = len(t.sites)\n            t.sites.add_row(position=j, ancestral_state=\"0\")\n            t.mutations.add_row(site=site_id, derived_state=\"1\", node=j)\n        ts = t.tree_sequence()\n        self.verify_matrix(ts)\n        self.verify_max_distance(ts)\n\n    def test_tree_sequence_simulated_mutations(self):\n        ts = msprime.simulate(20, mutation_rate=10, recombination_rate=10)\n        assert ts.get_num_trees() > 10\n        ts = tsutil.subsample_sites(ts, self.num_test_sites)\n        self.verify_matrix(ts)\n        self.verify_max_distance(ts)\n        self.verify_max_mutations(ts)\n\n\ndef set_partitions(collection):\n    \"\"\"\n    Returns an iterator over all partitions of the specified set.\n\n    From https://stackoverflow.com/questions/19368375/set-partitions-in-python\n    \"\"\"\n    if len(collection) == 1:\n        yield [collection]\n    else:\n        first = collection[0]\n        for smaller in set_partitions(collection[1:]):\n            for n, subset in enumerate(smaller):\n                yield smaller[:n] + [[first] + subset] + smaller[n + 1 :]\n            yield [[first]] + smaller\n\n\ndef naive_mean_descendants(ts, reference_sets):\n    \"\"\"\n    Straightforward implementation of mean sample ancestry by iterating\n    over the trees and nodes in each tree.\n    \"\"\"\n    # TODO generalise this to allow arbitrary nodes, not just samples.\n    C = np.zeros((ts.num_nodes, len(reference_sets)))\n    T = np.zeros(ts.num_nodes)\n    tree_iters = [ts.trees(tracked_samples=sample_set) for sample_set in reference_sets]\n    for _ in range(ts.num_trees):\n        trees = [next(tree_iter) for tree_iter in tree_iters]\n        span = trees[0].span\n        for node in trees[0].nodes():\n            num_samples = trees[0].num_samples(node)\n            if num_samples > 0:\n                for j, tree in enumerate(trees):\n                    C[node, j] += span * tree.num_tracked_samples(node)\n                T[node] += span\n    for node in range(ts.num_nodes):\n        if T[node] > 0:\n            C[node] /= T[node]\n    return C\n\n\nclass TestMeanDescendants:\n    \"\"\"\n    Tests the TreeSequence.mean_descendants method.\n    \"\"\"\n\n    def verify(self, ts, reference_sets):\n        C1 = naive_mean_descendants(ts, reference_sets)\n        C2 = tsutil.mean_descendants(ts, reference_sets)\n        C3 = ts.mean_descendants(reference_sets)\n        assert C1.shape == C2.shape\n        assert np.allclose(C1, C2)\n        assert np.allclose(C1, C3)\n        return C1\n\n    def test_two_populations_high_migration(self):\n        ts = msprime.simulate(\n            population_configurations=[\n                msprime.PopulationConfiguration(8),\n                msprime.PopulationConfiguration(8),\n            ],\n            migration_matrix=[[0, 1], [1, 0]],\n            recombination_rate=3,\n            random_seed=5,\n        )\n        assert ts.num_trees > 1\n        self.verify(ts, [ts.samples(0), ts.samples(1)])\n\n    def test_single_tree(self):\n        ts = msprime.simulate(6, random_seed=1)\n        S = [range(3), range(3, 6)]\n        C = self.verify(ts, S)\n        for j, samples in enumerate(S):\n            tree = next(ts.trees(tracked_samples=samples))\n            for u in tree.nodes():\n                assert tree.num_tracked_samples(u) == C[u, j]\n\n    def test_single_tree_partial_samples(self):\n        ts = msprime.simulate(6, random_seed=1)\n        S = [range(3), range(3, 4)]\n        C = self.verify(ts, S)\n        for j, samples in enumerate(S):\n            tree = next(ts.trees(tracked_samples=samples))\n            for u in tree.nodes():\n                assert tree.num_tracked_samples(u) == C[u, j]\n\n    def test_single_tree_all_sample_sets(self):\n        ts = msprime.simulate(6, random_seed=1)\n        for S in set_partitions(list(range(ts.num_samples))):\n            C = self.verify(ts, S)\n            for j, samples in enumerate(S):\n                tree = next(ts.trees(tracked_samples=samples))\n                for u in tree.nodes():\n                    assert tree.num_tracked_samples(u) == C[u, j]\n\n    def test_many_trees_all_sample_sets(self):\n        ts = msprime.simulate(6, recombination_rate=2, random_seed=1)\n        assert ts.num_trees > 2\n        for S in set_partitions(list(range(ts.num_samples))):\n            self.verify(ts, S)\n\n    def test_wright_fisher_unsimplified_all_sample_sets(self):\n        tables = wf.wf_sim(\n            4,\n            5,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=10,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        for S in set_partitions(list(ts.samples())):\n            self.verify(ts, S)\n\n    def test_wright_fisher_unsimplified(self):\n        tables = wf.wf_sim(\n            20,\n            15,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=20,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify(ts, [samples[:10], samples[10:]])\n\n    def test_wright_fisher_simplified(self):\n        tables = wf.wf_sim(\n            30,\n            10,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify(ts, [samples[:10], samples[10:]])\n\n\ndef naive_genealogical_nearest_neighbours(ts, focal, reference_sets):\n    # Make sure everything is a sample so we can use the tracked_samples option.\n    # This is a limitation of the current API.\n    tables = ts.dump_tables()\n    tables.nodes.set_columns(\n        flags=np.ones_like(tables.nodes.flags), time=tables.nodes.time\n    )\n    ts = tables.tree_sequence()\n\n    A = np.zeros((len(focal), len(reference_sets)))\n    L = np.zeros(len(focal))\n    reference_set_map = np.zeros(ts.num_nodes, dtype=int) - 1\n    for k, ref_set in enumerate(reference_sets):\n        for u in ref_set:\n            reference_set_map[u] = k\n    tree_iters = [\n        ts.trees(tracked_samples=reference_nodes) for reference_nodes in reference_sets\n    ]\n    for _ in range(ts.num_trees):\n        trees = list(map(next, tree_iters))\n        length = trees[0].interval.right - trees[0].interval.left\n        for j, u in enumerate(focal):\n            focal_node_set = reference_set_map[u]\n            # delta(u) = 1 if u exists in any of the reference sets; 0 otherwise\n            delta = int(focal_node_set != -1)\n            v = u\n            while v != tskit.NULL:\n                total = sum(tree.num_tracked_samples(v) for tree in trees)\n                if total > delta:\n                    break\n                v = trees[0].parent(v)\n            if v != tskit.NULL:\n                for k, tree in enumerate(trees):\n                    # If the focal node is in the current set, we subtract its\n                    # contribution from the numerator\n                    n = tree.num_tracked_samples(v) - (k == focal_node_set)\n                    # If the focal node is in *any* reference set, we subtract its\n                    # contribution from the demoninator.\n                    A[j, k] += length * n / (total - delta)\n                L[j] += length\n    # Normalise by the accumulated value for each focal node.\n    index = L > 0\n    L = L[index]\n    L = L.reshape((L.shape[0], 1))\n    A[index, :] /= L\n    return A\n\n\ndef parse_time_windows(ts, time_windows):\n    if time_windows is None:\n        time_windows = [0.0, ts.max_root_time]\n    return np.array(time_windows)\n\n\ndef windowed_genealogical_nearest_neighbours(\n    ts,\n    focal,\n    reference_sets,\n    windows=None,\n    time_windows=None,\n    span_normalise=True,\n    time_normalise=True,\n):\n    \"\"\"\n    genealogical_nearest_neighbours with support for span- and time-based windows\n    \"\"\"\n    reference_set_map = np.full(ts.num_nodes, tskit.NULL, dtype=int)\n    for k, reference_set in enumerate(reference_sets):\n        for u in reference_set:\n            if reference_set_map[u] != tskit.NULL:\n                raise ValueError(\"Duplicate value in reference sets\")\n            reference_set_map[u] = k\n\n    windows_used = windows is not None\n    time_windows_used = time_windows is not None\n    windows = ts.parse_windows(windows)\n    num_windows = windows.shape[0] - 1\n    time_windows = parse_time_windows(ts, time_windows)\n    num_time_windows = time_windows.shape[0] - 1\n    A = np.zeros((num_windows, num_time_windows, len(focal), len(reference_sets)))\n    K = len(reference_sets)\n    parent = np.full(ts.num_nodes, tskit.NULL, dtype=int)\n    sample_count = np.zeros((ts.num_nodes, K), dtype=int)\n    time = ts.tables.nodes.time\n    norm = np.zeros((num_windows, num_time_windows, len(focal)))\n\n    # Set the initial conditions.\n    for j in range(K):\n        sample_count[reference_sets[j], j] = 1\n\n    window_index = 0\n    for (t_left, t_right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            parent[edge.child] = tskit.NULL\n            v = edge.parent\n            while v != tskit.NULL:\n                sample_count[v] -= sample_count[edge.child]\n                v = parent[v]\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            v = edge.parent\n            while v != tskit.NULL:\n                sample_count[v] += sample_count[edge.child]\n                v = parent[v]\n\n        # Update the windows\n        assert window_index < num_windows\n        while windows[window_index] < t_right and window_index + 1 <= num_windows:\n            w_left = windows[window_index]\n            w_right = windows[window_index + 1]\n            left = max(t_left, w_left)\n            right = min(t_right, w_right)\n            span = right - left\n            # Process this tree.\n            for j, u in enumerate(focal):\n                focal_reference_set = reference_set_map[u]\n                delta = int(focal_reference_set != tskit.NULL)\n                p = u\n                while p != tskit.NULL:\n                    total = np.sum(sample_count[p])\n                    if total > delta:\n                        break\n                    p = parent[p]\n                if p != tskit.NULL:\n                    scale = span / (total - delta)\n                    time_index = np.searchsorted(time_windows, time[p]) - 1\n                    if 0 <= time_index < num_time_windows:\n                        for k in range(len(reference_sets)):\n                            n = sample_count[p, k] - int(focal_reference_set == k)\n                            A[window_index, time_index, j, k] += n * scale\n                        norm[window_index, time_index, j] += span\n            assert span > 0\n            if w_right <= t_right:\n                window_index += 1\n            else:\n                # This interval crosses a tree boundary, so we update it again\n                # in the next tree\n                break\n\n    # Reshape norm depending on normalization selected\n    # Return NaN when normalisation value is 0\n    if span_normalise and time_normalise:\n        reshaped_norm = norm.reshape((num_windows, num_time_windows, len(focal), 1))\n    elif span_normalise and not time_normalise:\n        norm = np.sum(norm, axis=1)\n        reshaped_norm = norm.reshape((num_windows, 1, len(focal), 1))\n    elif time_normalise and not span_normalise:\n        norm = np.sum(norm, axis=0)\n        reshaped_norm = norm.reshape((1, num_time_windows, len(focal), 1))\n\n    with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n        A /= reshaped_norm\n    A[np.all(A == 0, axis=3)] = np.nan\n\n    # Remove dimension for windows and/or time_windows if parameter is None\n    if not windows_used and time_windows_used:\n        A = A.reshape((num_time_windows, len(focal), len(reference_sets)))\n    elif not time_windows_used and windows_used:\n        A = A.reshape((num_windows, len(focal), len(reference_sets)))\n    elif not windows_used and not time_windows_used:\n        A = A.reshape((len(focal), len(reference_sets)))\n    return A\n\n\nclass TestGenealogicalNearestNeighbours:\n    \"\"\"\n    Tests the TreeSequence.genealogical_nearest_neighbours method.\n    \"\"\"\n\n    #\n    #          8\n    #         / \\\n    #        /   \\\n    #       /     \\\n    #      7       \\\n    #     / \\       6\n    #    /   5     / \\\n    #   /   / \\   /   \\\n    #  4   0   1 2     3\n    small_tree_ex_nodes = \"\"\"\\\n    id      is_sample   population      time\n    0       1       0               0.00000000000000\n    1       1       0               0.00000000000000\n    2       1       0               0.00000000000000\n    3       1       0               0.00000000000000\n    4       1       0               0.00000000000000\n    5       0       0               0.14567111023387\n    6       0       0               0.21385545626353\n    7       0       0               0.43508024345063\n    8       0       0               1.60156352971203\n    \"\"\"\n    small_tree_ex_edges = \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      5       0,1\n    1       0.00000000      1.00000000      6       2,3\n    2       0.00000000      1.00000000      7       4,5\n    3       0.00000000      1.00000000      8       6,7\n    \"\"\"\n\n    def verify(self, ts, reference_sets, focal=None):\n        if focal is None:\n            focal = [u for refset in reference_sets for u in refset]\n        A1 = naive_genealogical_nearest_neighbours(ts, focal, reference_sets)\n        A2 = tsutil.genealogical_nearest_neighbours(ts, focal, reference_sets)\n        A3 = ts.genealogical_nearest_neighbours(focal, reference_sets)\n        A4 = ts.genealogical_nearest_neighbours(focal, reference_sets, num_threads=3)\n        A5 = windowed_genealogical_nearest_neighbours(ts, focal, reference_sets)\n        assert np.array_equal(A3, A4)\n        assert A1.shape == A2.shape\n        assert A1.shape == A3.shape\n        assert np.allclose(A1, A2)\n        assert np.allclose(A1, A3)\n        mask = ~np.isnan(A5)\n        assert np.sum(mask) > 0 or ts.num_edges == 0\n        assert np.allclose(A1[mask], A5[mask])\n        assert np.allclose(A5[mask], A2[mask])\n        assert np.allclose(A5[mask], A3[mask])\n\n        if ts.num_edges > 0 and all(ts.node(u).is_sample() for u in focal):\n            # When the focal nodes are samples, we can assert some stronger properties.\n            assert np.allclose(np.sum(A1, axis=1), 1)\n            assert np.allclose(np.sum(A5, axis=1), 1)\n        return A1\n\n    def test_simple_example_all_samples(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0])\n        assert list(A[0]) == [1, 0]\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [4])\n        assert list(A[0]) == [1, 0]\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [2])\n        assert list(A[0]) == [0, 1]\n        A = self.verify(ts, [[0, 2], [1, 3, 4]], [0])\n        assert list(A[0]) == [0, 1]\n        A = self.verify(ts, [[0, 2], [1, 3, 4]], [4])\n        assert list(A[0]) == [0.5, 0.5]\n\n    def test_simple_example_missing_samples(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        A = self.verify(ts, [[0, 1], [2, 4]], [3])\n        assert list(A[0]) == [0, 1]\n        A = self.verify(ts, [[0, 1], [2, 4]], [2])\n        assert np.allclose(A[0], [2 / 3, 1 / 3])\n\n    def test_simple_example_internal_focal_node(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        focal = [7]  # An internal node\n        reference_sets = [[4, 0, 1], [2, 3]]\n        GNN = naive_genealogical_nearest_neighbours(ts, focal, reference_sets)\n        assert np.allclose(GNN[0], np.array([1.0, 0.0]))\n        GNN = tsutil.genealogical_nearest_neighbours(ts, focal, reference_sets)\n        assert np.allclose(GNN[0], np.array([1.0, 0.0]))\n        GNN = ts.genealogical_nearest_neighbours(focal, reference_sets)\n        assert np.allclose(GNN[0], np.array([1.0, 0.0]))\n        focal = [8]  # The root\n        GNN = naive_genealogical_nearest_neighbours(ts, focal, reference_sets)\n        assert np.allclose(GNN[0], np.array([0.6, 0.4]))\n        GNN = tsutil.genealogical_nearest_neighbours(ts, focal, reference_sets)\n        assert np.allclose(GNN[0], np.array([0.6, 0.4]))\n        GNN = ts.genealogical_nearest_neighbours(focal, reference_sets)\n        assert np.allclose(GNN[0], np.array([0.6, 0.4]))\n\n    def test_two_populations_high_migration(self):\n        ts = msprime.simulate(\n            population_configurations=[\n                msprime.PopulationConfiguration(18),\n                msprime.PopulationConfiguration(18),\n            ],\n            migration_matrix=[[0, 1], [1, 0]],\n            recombination_rate=8,\n            random_seed=5,\n        )\n        assert ts.num_trees > 1\n        self.verify(ts, [ts.samples(0), ts.samples(1)])\n\n    def test_single_tree(self):\n        ts = msprime.simulate(6, random_seed=1)\n        S = [range(3), range(3, 6)]\n        self.verify(ts, S)\n\n    def test_single_tree_internal_reference_sets(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tree = ts.first()\n        S = [[u] for u in tree.children(tree.root)]\n        self.verify(ts, S, ts.samples())\n\n    def test_single_tree_all_nodes(self):\n        ts = msprime.simulate(10, random_seed=1)\n        S = [np.arange(ts.num_nodes, dtype=np.int32)]\n        self.verify(ts, S, np.arange(ts.num_nodes, dtype=np.int32))\n\n    def test_single_tree_partial_samples(self):\n        ts = msprime.simulate(6, random_seed=1)\n        S = [range(3), range(3, 4)]\n        self.verify(ts, S)\n\n    def test_single_tree_all_sample_sets(self):\n        ts = msprime.simulate(6, random_seed=1)\n        for S in set_partitions(list(range(ts.num_samples))):\n            self.verify(ts, S)\n\n    def test_many_trees_all_sample_sets(self):\n        ts = msprime.simulate(6, recombination_rate=2, random_seed=1)\n        assert ts.num_trees > 2\n        for S in set_partitions(list(range(ts.num_samples))):\n            self.verify(ts, S)\n\n    def test_many_trees_sequence_length(self):\n        for L in [0.5, 1.5, 3.3333]:\n            ts = msprime.simulate(6, length=L, recombination_rate=2, random_seed=1)\n            self.verify(ts, [range(3), range(3, 6)])\n\n    def test_many_trees_all_nodes(self):\n        ts = msprime.simulate(6, length=4, recombination_rate=2, random_seed=1)\n        S = [np.arange(ts.num_nodes, dtype=np.int32)]\n        self.verify(ts, S, np.arange(ts.num_nodes, dtype=np.int32))\n\n    def test_wright_fisher_unsimplified_all_sample_sets(self):\n        tables = wf.wf_sim(\n            4,\n            5,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=10,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        for S in set_partitions(list(ts.samples())):\n            self.verify(ts, S)\n\n    def test_wright_fisher_unsimplified(self):\n        tables = wf.wf_sim(\n            20,\n            15,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=20,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify(ts, [samples[:10], samples[10:]])\n\n    def test_wright_fisher_initial_generation(self):\n        tables = wf.wf_sim(\n            20,\n            15,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=True,\n            num_loci=20,\n        )\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        founders = [u for u in samples if ts.node(u).time > 0]\n        samples = [u for u in samples if ts.node(u).time == 0]\n        self.verify(ts, [founders[:10], founders[10:]], samples)\n\n    def test_wright_fisher_initial_generation_no_deep_history(self):\n        tables = wf.wf_sim(\n            20,\n            15,\n            seed=2,\n            deep_history=False,\n            initial_generation_samples=True,\n            num_loci=20,\n        )\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        founders = [u for u in samples if ts.node(u).time > 0]\n        samples = [u for u in samples if ts.node(u).time == 0]\n        self.verify(ts, [founders[:10], founders[10:]], samples)\n\n    def test_wright_fisher_unsimplified_multiple_roots(self):\n        tables = wf.wf_sim(\n            20,\n            15,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=20,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify(ts, [samples[:10], samples[10:]])\n\n    def test_wright_fisher_simplified(self):\n        tables = wf.wf_sim(\n            31,\n            10,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        samples = ts.samples()\n        self.verify(ts, [samples[:10], samples[10:]])\n\n    def test_wright_fisher_simplified_multiple_roots(self):\n        tables = wf.wf_sim(\n            31,\n            10,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        samples = ts.samples()\n        self.verify(ts, [samples[:10], samples[10:]])\n\n    def test_empty_ts(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(1, 0)\n        ts = tables.tree_sequence()\n        self.verify(ts, [[0], [1]])\n\n\nclass TestWindowedGenealogicalNearestNeighbours(TestGenealogicalNearestNeighbours):\n    \"\"\"\n    Tests the TreeSequence.genealogical_nearest_neighbours method.\n    \"\"\"\n\n    #               .    5\n    #               .   / \\\n    #        4      .  |   4\n    #       / \\     .  |   |\\\n    #      3   \\    .  |   | \\\n    #     / \\   \\   .  |   |  \\\n    #   [0] [1] [2] . [0] [1] [2]\n    #\n    two_tree_nodes = \"\"\"\\\n    id      is_sample   time\n    0       1           0\n    1       1           0\n    2       1           0\n    3       0           1\n    4       0           2\n    5       0           3\n    \"\"\"\n    two_tree_edges = \"\"\"\\\n    left    right   parent  child\n    0       0.2     3       0,1\n    0       1       4       2\n    0       0.2     4       3\n    0.2     1       4       1\n    0.2     1       5       0,4\n    \"\"\"\n\n    def get_two_tree_ts(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.two_tree_nodes),\n            edges=io.StringIO(self.two_tree_edges),\n            strict=False,\n        )\n        return ts\n\n    def verify(self, ts, reference_sets, focal=None, windows=None, time_windows=None):\n        if focal is None:\n            focal = [u for refset in reference_sets for u in refset]\n        gnn = windowed_genealogical_nearest_neighbours(\n            ts, focal, reference_sets, windows, time_windows\n        )\n        if windows is not None:\n            windows_len = len(windows) - 1\n        if time_windows is not None:\n            time_windows_len = len(time_windows) - 1\n        if windows is None and time_windows is None:\n            assert np.array_equal(gnn.shape, [len(focal), len(reference_sets)])\n        elif windows is None and time_windows is not None:\n            assert np.array_equal(\n                gnn.shape, [time_windows_len, len(focal), len(reference_sets)]\n            )\n        elif windows is not None and time_windows is None:\n            assert np.array_equal(\n                gnn.shape, [windows_len, len(focal), len(reference_sets)]\n            )\n        else:\n            assert np.array_equal(\n                gnn.shape,\n                [windows_len, time_windows_len, len(focal), len(reference_sets)],\n            )\n\n        return gnn\n\n    def test_one_tree_windows(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], [0, 1])\n        assert np.allclose(A, [[[[1, 0]]]])\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], [0, 0.5, 1])\n        assert np.allclose(A, [[[[1.0, 0.0]]], [[[1.0, 0.0]]]])\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], [0, 0.5, 0.6, 1])\n        assert np.allclose(A, [[[[1.0, 0.0]]], [[[1.0, 0.0]]], [[[1.0, 0.0]]]])\n\n    def test_two_tree_windows(self):\n        ts = self.get_two_tree_ts()\n        A = self.verify(ts, [[0, 1], [2]], [0], [0, 1])\n        assert np.allclose(A, [[[0.6, 0.4]]])\n        A = self.verify(ts, [[0, 1], [2]], [0], [0, 0.2, 1])\n        assert np.allclose(A, [[[1.0, 0.0]], [[0.5, 0.5]]])\n        A = self.verify(ts, [[0, 1], [2]], [0], [0, 0.2, 0.5, 1])\n        assert np.allclose(A, [[[1.0, 0.0]], [[0.5, 0.5]], [[0.5, 0.5]]])\n\n    def test_one_tree_time_windows(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], None, [0, ts.max_root_time])\n        assert np.allclose(A, [[[1, 0]]])\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], None, [1, 2])\n        assert np.allclose(A, [[[np.nan, np.nan]]], equal_nan=True)\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], None, [0, 0.1])\n        assert np.allclose(A, [[[np.nan, np.nan]]], equal_nan=True)\n\n    def test_two_tree_time_windows(self):\n        ts = self.get_two_tree_ts()\n        A = self.verify(ts, [[0, 1], [2]], [0], None, [0, ts.max_root_time])\n        assert np.allclose(A, [[[0.6, 0.4]]])\n        A = self.verify(ts, [[0, 1], [2]], [0], None, [0, 1.1, ts.max_root_time])\n        assert np.allclose(A, [[[1.0, 0.0]], [[0.5, 0.5]]])\n        A = self.verify(ts, [[0, 1], [2]], [0], None, [0, 0.5, 1])\n        assert np.allclose(A, [[[np.nan, np.nan]], [[1.0, 0.0]]], equal_nan=True)\n        A = self.verify(ts, [[0, 1], [2]], [0], None, [1, ts.max_root_time, 10])\n        assert np.allclose(A, [[[0.5, 0.5]], [[np.nan, np.nan]]], equal_nan=True)\n\n    def test_one_tree_windows_time_windows(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], [0, 1], [0, ts.max_root_time])\n        assert np.allclose(A, [[[[1, 0]]]])\n        A = self.verify(\n            ts, [[0, 1], [2, 3, 4]], [0], [0, 0.2, 1], [0, 1.1, ts.max_root_time]\n        )\n        assert np.allclose(\n            A,\n            [\n                [[[1.0, 0.0]], [[np.nan, np.nan]]],\n                [[[1.0, 0.0]], [[np.nan, np.nan]]],\n            ],\n            equal_nan=True,\n        )\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], [0, 0.2], [0, 0.5, 1])\n        assert np.allclose(A, [[[[1.0, 0.0]], [[np.nan, np.nan]]]], equal_nan=True)\n        A = self.verify(\n            ts, [[0, 1], [2, 3, 4]], [0], [0, 0.2, 1, 1.5], [0, ts.max_root_time, 10]\n        )\n        assert np.allclose(\n            A,\n            [\n                [[[1.0, 0.0]], [[np.nan, np.nan]]],\n                [[[1.0, 0.0]], [[np.nan, np.nan]]],\n                [[[np.nan, np.nan]], [[np.nan, np.nan]]],\n            ],\n            equal_nan=True,\n        )\n\n    def test_two_tree_windows_time_windows(self):\n        ts = self.get_two_tree_ts()\n        A = self.verify(ts, [[0, 1], [2]], [0], [0, 1], [0, ts.max_root_time])\n        assert np.allclose(A, [[[[0.6, 0.4]]]])\n        A = self.verify(ts, [[0, 1], [2]], [0], [0, 0.2, 1], [0, 1.1, ts.max_root_time])\n        assert np.allclose(\n            A,\n            [\n                [[[1.0, 0.0]], [[np.nan, np.nan]]],\n                [[[np.nan, np.nan]], [[0.5, 0.5]]],\n            ],\n            equal_nan=True,\n        )\n        A = self.verify(ts, [[0, 1], [2, 3, 4]], [0], [0, 0.2, 1], [0, 0.5, 1])\n        assert np.allclose(\n            A,\n            [\n                [[[np.nan, np.nan]], [[0.5, 0.5]]],\n                [[[np.nan, np.nan]], [[np.nan, np.nan]]],\n            ],\n            equal_nan=True,\n        )\n\n    def test_span_normalise(self):\n        ts = self.get_two_tree_ts()\n        sample_sets = [[0, 1], [2]]\n        focal = [0]\n        np.random.seed(5)\n        windows = ts.sequence_length * np.array([0.2, 0.4, 0.6, 0.8, 1])\n        windows.sort()\n        windows[0] = 0.0\n        windows[-1] = ts.sequence_length\n\n        result1 = windowed_genealogical_nearest_neighbours(\n            ts, focal, sample_sets, windows\n        )\n        result2 = windowed_genealogical_nearest_neighbours(\n            ts, focal, sample_sets, windows, span_normalise=True\n        )\n        result3 = windowed_genealogical_nearest_neighbours(\n            ts, focal, sample_sets, windows, span_normalise=False\n        )\n        denom = np.diff(windows)[:, np.newaxis, np.newaxis]\n\n        # Test the dimensions are correct\n        assert np.array_equal(result1.shape, result2.shape)\n        assert np.array_equal(result2.shape, result3.shape)\n\n        # Test normalisation is correct\n        assert np.allclose(result1, result2)\n        assert np.allclose(result1, result3 / denom)\n\n        # If span_normalised, then sum over all reference sets should be 1\n        assert np.allclose(np.sum(result1, axis=2), 1)\n        assert np.allclose(np.sum(result2, axis=2), 1)\n        # If not span_normalised, then sum over all value is 1\n        assert np.allclose(result3.sum(), 1)\n\n    def test_time_normalise(self):\n        \"\"\"\n        Testing time_normalise is trickier than span_normalise, as the norm\n        depends on the span of the nearest neighbours found in each time grid.\n        In this small example, we check which grid nodes 3 and 5 fall in, and use their\n        spans to check the time_normalisation.\n        \"\"\"\n        ts = self.get_two_tree_ts()\n        sample_sets = [[0, 1], [2]]\n        focal = [0]\n        oldest_node = ts.max_root_time\n        time_windows = oldest_node * np.array([0.2, 0.4, 0.6, 0.8, 1])\n        time_windows.sort()\n        time_windows[0] = 0.0\n        time_windows[-1] = oldest_node\n\n        # Determine output_dim of the function\n        result1 = windowed_genealogical_nearest_neighbours(\n            ts, focal, sample_sets, windows=None, time_windows=time_windows\n        )\n        result2 = windowed_genealogical_nearest_neighbours(\n            ts,\n            focal,\n            sample_sets,\n            windows=None,\n            time_windows=time_windows,\n            time_normalise=True,\n        )\n        result3 = windowed_genealogical_nearest_neighbours(\n            ts,\n            focal,\n            sample_sets,\n            windows=None,\n            time_windows=time_windows,\n            time_normalise=False,\n        )\n        denom = np.zeros(len(time_windows) - 1)\n        time_index_3 = np.searchsorted(time_windows, ts.tables.nodes.time[3]) - 1\n        time_index_5 = np.searchsorted(time_windows, ts.tables.nodes.time[5]) - 1\n        denom[time_index_3] += 0.2\n        denom[time_index_5] += 0.8\n\n        # Avoid division by zero\n        denom[denom == 0] = 1\n        denom = denom[:, np.newaxis, np.newaxis]\n\n        # Test the dimensions are correct\n        assert np.array_equal(result1.shape, result2.shape)\n        assert np.array_equal(result2.shape, result3.shape)\n\n        # Test normalisation is correct\n        assert np.allclose(result1, result2, equal_nan=True)\n        assert np.allclose(result1, result3 / denom, equal_nan=True)\n\n        # If time_normalised, then sum over all reference sets should be 1\n        # Mask out time intervals that sum to 0\n        result1_dim_sum = np.sum(result1, axis=2)\n        mask = ~(np.isnan(result1_dim_sum))\n        assert np.allclose(\n            result1_dim_sum[mask],\n            np.ones((len(result1_dim_sum), len(focal)))[mask],\n            equal_nan=True,\n        )\n        result2_dim_sum = np.sum(result2, axis=2)\n        mask = ~(np.isnan(result2_dim_sum))\n        assert np.allclose(\n            result2_dim_sum[mask],\n            np.ones((len(result2_dim_sum), len(focal)))[mask],\n            equal_nan=True,\n        )\n        # If not span_normalised, then sum over all value is 1\n        assert np.allclose(np.nansum(result3), 1)\n\n\ndef exact_genealogical_nearest_neighbours(ts, focal, reference_sets):\n    # Same as above, except we return the per-tree value for a single node.\n\n    # Make sure everyhing is a sample so we can use the tracked_samples option.\n    # This is a limitation of the current API.\n    tables = ts.dump_tables()\n    tables.nodes.set_columns(\n        flags=np.ones_like(tables.nodes.flags), time=tables.nodes.time\n    )\n    ts = tables.tree_sequence()\n\n    A = np.zeros((len(reference_sets), ts.num_trees))\n    L = np.zeros(ts.num_trees)\n    reference_set_map = np.zeros(ts.num_nodes, dtype=int) - 1\n    for k, ref_set in enumerate(reference_sets):\n        for u in ref_set:\n            reference_set_map[u] = k\n    tree_iters = [\n        ts.trees(tracked_samples=reference_nodes) for reference_nodes in reference_sets\n    ]\n    u = focal\n    focal_node_set = reference_set_map[u]\n    # delta(u) = 1 if u exists in any of the reference sets; 0 otherwise\n    delta = int(focal_node_set != -1)\n    for _ in range(ts.num_trees):\n        trees = list(map(next, tree_iters))\n        v = trees[0].parent(u)\n        while v != tskit.NULL:\n            total = sum(tree.num_tracked_samples(v) for tree in trees)\n            if total > delta:\n                break\n            v = trees[0].parent(v)\n        if v != tskit.NULL:\n            # The length is only reported where the statistic is defined.\n            L[trees[0].index] = trees[0].interval.right - trees[0].interval.left\n            for k, tree in enumerate(trees):\n                # If the focal node is in the current set, we subtract its\n                # contribution from the numerator\n                n = tree.num_tracked_samples(v) - (k == focal_node_set)\n                # If the focal node is in *any* reference set, we subtract its\n                # contribution from the demoninator.\n                A[k, tree.index] = n / (total - delta)\n    return A, L\n\n\ndef local_gnn(ts, focal, reference_sets):\n    # Temporary implementation of the treewise GNN.\n    reference_set_map = np.zeros(ts.num_nodes, dtype=int) - 1\n    for k, reference_set in enumerate(reference_sets):\n        for u in reference_set:\n            if reference_set_map[u] != -1:\n                raise ValueError(\"Duplicate value in reference sets\")\n            reference_set_map[u] = k\n\n    K = len(reference_sets)\n    A = np.zeros((len(focal), ts.num_trees, K))\n    lefts = np.zeros(ts.num_trees, dtype=float)\n    rights = np.zeros(ts.num_trees, dtype=float)\n    parent = np.zeros(ts.num_nodes, dtype=int) - 1\n    sample_count = np.zeros((ts.num_nodes, K), dtype=int)\n\n    # Set the intitial conditions.\n    for j in range(K):\n        sample_count[reference_sets[j], j] = 1\n\n    for t, ((left, right), edges_out, edges_in) in enumerate(ts.edge_diffs()):\n        for edge in edges_out:\n            parent[edge.child] = -1\n            v = edge.parent\n            while v != -1:\n                sample_count[v] -= sample_count[edge.child]\n                v = parent[v]\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            v = edge.parent\n            while v != -1:\n                sample_count[v] += sample_count[edge.child]\n                v = parent[v]\n\n        # Process this tree.\n        for j, u in enumerate(focal):\n            focal_reference_set = reference_set_map[u]\n            delta = int(focal_reference_set != -1)\n            p = parent[u]\n            lefts[t] = left\n            rights[t] = right\n            while p != tskit.NULL:\n                total = np.sum(sample_count[p])\n                if total > delta:\n                    break\n                p = parent[p]\n            if p != tskit.NULL:\n                scale = 1 / (total - delta)\n                for k, _reference_set in enumerate(reference_sets):\n                    n = sample_count[p, k] - int(focal_reference_set == k)\n                    A[j, t, k] = n * scale\n    return (A, lefts, rights)\n\n\nclass TestExactGenealogicalNearestNeighbours(TestGenealogicalNearestNeighbours):\n    # This is a work in progress - these tests will be adapted to use the\n    # treewise GNN when it's implemented.\n\n    def verify(self, ts, reference_sets, focal=None):\n        if focal is None:\n            focal = [u for refset in reference_sets for u in refset]\n        A = ts.genealogical_nearest_neighbours(focal, reference_sets)\n\n        G, lefts, rights = local_gnn(ts, focal, reference_sets)\n        for tree in ts.trees():\n            assert lefts[tree.index] == tree.interval.left\n            assert rights[tree.index] == tree.interval.right\n\n        for j, u in enumerate(focal):\n            T, L = exact_genealogical_nearest_neighbours(ts, u, reference_sets)\n            assert np.allclose(G[j], T.T)\n            # Ignore the cases where the node has no GNNs\n            if np.sum(L) > 0:\n                mean = np.sum(T * L, axis=1) / np.sum(L)\n                assert np.allclose(mean, A[j])\n        return A\n"
  },
  {
    "path": "python/tests/test_table_transforms.py",
    "content": "# MIT License\n#\n# Copyright (c) 2022-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for table transformation operations like trim(), decapitate, etc.\n\"\"\"\n\nimport decimal\nimport fractions\nimport io\nimport math\n\nimport numpy as np\nimport pytest\n\nimport tests\nimport tskit\nimport tskit.util as util\nfrom tests.tsutil import get_example_tree_sequences\n\n# ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when\n# we can remove this.\n\n\ndef delete_older_definition(tables, time):\n    node_time = tables.nodes.time\n    edges = tables.edges.copy()\n    tables.edges.clear()\n    for edge in edges:\n        if node_time[edge.parent] <= time:\n            tables.edges.append(edge)\n\n    mutations = tables.mutations.copy()\n    # Map of old ID -> new ID\n    mutation_map = np.full(len(mutations), tskit.NULL, dtype=int)\n    tables.mutations.clear()\n    keep = []\n    for j, mutation in enumerate(mutations):\n        mutation_time = (\n            node_time[mutation.node]\n            if util.is_unknown_time(mutation.time)\n            else mutation.time\n        )\n        if mutation_time < time:\n            mutation_map[j] = len(keep)\n            keep.append(mutation)\n    # Not making assumptions about ordering, so do it in two passes.\n    for mutation in keep:\n        if mutation.parent != tskit.NULL:\n            mutation = mutation.replace(parent=mutation_map[mutation.parent])\n        tables.mutations.append(mutation)\n\n    migrations = tables.migrations.copy()\n    tables.migrations.clear()\n    for migration in migrations:\n        if migration.time < time:\n            tables.migrations.append(migration)\n\n\nclass TestDeleteOlderExamples:\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_definition(self, ts):\n        time = 0 if ts.num_nodes == 0 else np.median(ts.tables.nodes.time)\n        tables1 = ts.dump_tables()\n        delete_older_definition(tables1, time)\n        tables2 = ts.dump_tables()\n        tables2.delete_older(time)\n        tables1.assert_equals(tables2, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_mutation_parents(self, ts):\n        time = 0 if ts.num_nodes == 0 else np.median(ts.tables.nodes.time)\n        tables1 = ts.dump_tables()\n        tables1.delete_older(time)\n        tables2 = tables1.copy()\n        tables2.build_index()\n        tables2.compute_mutation_parents()\n        tables1.assert_equals(tables2, ignore_provenance=True)\n\n\nclass TestDeleteOlderSimpleTree:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0       1\n    def tables(self):\n        # Don't cache this because we modify the result!\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        return tree.tree_sequence.dump_tables()\n\n    @pytest.mark.parametrize(\"time\", [0, -0.5, -100, 0.01, 0.999])\n    def test_before_first_internal_node(self, time):\n        tables = self.tables()\n        before = tables.copy()\n        tables.delete_older(time)\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.num_roots == 3\n        assert list(sorted(tree.roots)) == [0, 1, 2]\n        assert before.nodes.equals(tables.nodes[: len(before.nodes)])\n        assert len(tables.edges) == 0\n\n    @pytest.mark.parametrize(\"time\", [1, 1.01, 1.5, 1.999])\n    def test_t1_to_2(self, time):\n        #\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊    3  ┊\n        #     ┊   ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tables = self.tables()\n        before = tables.copy()\n        tables.delete_older(time)\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.num_roots == 2\n        assert list(sorted(tree.roots)) == [0, 3]\n        assert len(tables.nodes) == 5\n        assert before.nodes.equals(tables.nodes)\n\n    @pytest.mark.parametrize(\"time\", [2, 2.5, 1e9])\n    def test_t2(self, time):\n        tables = self.tables()\n        before = tables.copy()\n        tables.delete_older(time)\n        tables.assert_equals(before, ignore_provenance=True)\n\n\nclass TestDeleteOlderSimpleTreeMutationExamples:\n    def test_single_mutation_no_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\", metadata=b\"1234\")\n\n        tables.delete_older(1)\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊    3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert len(tables.nodes) == 5\n        mut = tables.mutations[0]\n        assert mut.node == 0\n        assert mut.derived_state == \"T\"\n        assert mut.metadata == b\"1234\"\n        assert tskit.is_unknown_time(mut.time)\n\n    def test_single_mutation_before_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ x━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(\n            site=0, node=0, time=1.5, derived_state=\"T\", metadata=b\"1234\"\n        )\n        tables.delete_older(1)\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊    3  ┊\n        #     ┊   ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert len(tables.nodes) == 5\n        assert len(tables.mutations) == 0\n\n    def test_single_mutation_at_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(\n            site=0, node=0, time=1, derived_state=\"T\", metadata=b\"1234\"\n        )\n\n        tables.delete_older(1)\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊    3  ┊\n        #     ┊   ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert len(tables.nodes) == 5\n        assert len(tables.mutations) == 0\n\n    def test_multi_mutation_no_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, parent=0, derived_state=\"G\")\n        before = tables.copy()\n\n        tables.delete_older(1)\n        # 2.00┊   4   ┊\n        #     ┊       ┊\n        #     ┊    3  ┊\n        #     ┊ x  ┃  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tables.mutations.assert_equals(before.mutations)\n\n    def test_multi_mutation_out_of_order(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, parent=1, derived_state=\"G\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        before = tables.copy()\n        with pytest.raises(tskit.LibraryError, match=\"PARENT_AFTER_CHILD\"):\n            tables.tree_sequence()\n\n        tables.delete_older(1)\n        # 2.00┊   4   ┊\n        #     ┊       ┊\n        #     ┊    3  ┊\n        #     ┊ x  ┃  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tables.mutations.assert_equals(before.mutations)\n\n    def test_mutation_not_on_branch(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        before = tables.copy()\n        tables.delete_older(0.01)\n        tables.assert_equals(before, ignore_provenance=True)\n\n\nclass TestDeleteOlderSimpleTreeMigrationExamples:\n    @tests.cached_example\n    def ts(self):\n        # 2.00┊   4   ┊\n        #     ┊ o━┻┓  ┊\n        # 1.00┊ o  3  ┊\n        #     ┊ o ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.populations.add_row()\n        tables.populations.add_row()\n        tables.migrations.add_row(source=0, dest=1, node=0, time=0.5, left=0, right=1)\n        tables.migrations.add_row(source=1, dest=0, node=0, time=1.0, left=0, right=1)\n        tables.migrations.add_row(source=0, dest=1, node=0, time=1.5, left=0, right=1)\n        tables.compute_mutation_parents()\n        ts = tables.tree_sequence()\n        return ts\n\n    def test_t099(self):\n        tables = self.ts().dump_tables()\n        tables.delete_older(0.99)\n        assert len(tables.migrations) == 1\n        assert tables.migrations[0].time == 0.5\n\n    def test_t1(self):\n        tables = self.ts().dump_tables()\n        tables.delete_older(1)\n        assert len(tables.migrations) == 1\n        assert tables.migrations[0].time == 0.5\n\n    @pytest.mark.parametrize(\"time\", [1.51, 2.0, 2.5])\n    def test_older(self, time):\n        tables = self.ts().dump_tables()\n        before = tables.copy()\n        tables.delete_older(time)\n        tables.migrations.assert_equals(before.migrations)\n\n\ndef split_edges_definition(ts, time, *, flags=0, population=None, metadata=None):\n    population = -1 if population is None else population\n    tables = ts.dump_tables()\n    if ts.num_migrations > 0:\n        raise ValueError(\"Migrations not supported\")\n\n    node_time = tables.nodes.time\n    tables.edges.clear()\n    split_edge = np.full(ts.num_edges, tskit.NULL, dtype=int)\n    for edge in ts.edges():\n        if node_time[edge.child] < time < node_time[edge.parent]:\n            u = tables.nodes.add_row(\n                flags=flags, time=time, population=population, metadata=metadata\n            )\n            tables.edges.append(edge.replace(parent=u))\n            tables.edges.append(edge.replace(child=u))\n            split_edge[edge.id] = u\n        else:\n            tables.edges.append(edge)\n\n    tables.mutations.clear()\n    for mutation in ts.mutations():\n        mapped_node = tskit.NULL\n        if mutation.edge != tskit.NULL:\n            mapped_node = split_edge[mutation.edge]\n        if mapped_node != tskit.NULL and mutation.time >= time:\n            mutation = mutation.replace(node=mapped_node)\n        tables.mutations.append(mutation)\n\n    tables.sort()\n    return tables.tree_sequence()\n\n\nclass TestSplitEdgesSimpleTree:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0       1\n    @tests.cached_example\n    def ts(self):\n        return tskit.Tree.generate_balanced(3, branch_length=1).tree_sequence\n\n    @pytest.mark.parametrize(\"time\", [0.1, 0.5, 0.9])\n    def test_lowest_branches(self, time):\n        # 2.00┊   4   ┊    2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊        ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊    1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊        ┊ ┃ ┏┻┓ ┊\n        #     ┊ ┃ ┃ ┃ ┊      t ┊ 7 5 6 ┊\n        #     ┊ ┃ ┃ ┃ ┊ ->     ┊ ┃ ┃ ┃ ┊\n        # 0.00┊ 0 1 2 ┊    0.00┊ 0 1 2 ┊\n        #     0       1        0       1\n        before_ts = self.ts()\n        ts = before_ts.split_edges(time)\n        assert ts.num_nodes == 8\n        assert all(ts.node(u).time == time for u in [5, 6, 7])\n        assert ts.num_trees == 1\n        assert ts.first().parent_dict == {0: 7, 1: 5, 2: 6, 5: 3, 6: 3, 7: 4, 3: 4}\n        ts = ts.simplify()\n        ts.tables.assert_equals(before_ts.tables, ignore_provenance=True)\n\n    def test_same_time_as_node(self):\n        # 2.00┊   4   ┊    2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊        ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊    1.00┊ 5  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊        ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊    0.00┊ 0 1 2 ┊\n        #     0       1        0       1\n        before_ts = self.ts()\n        ts = before_ts.split_edges(1)\n        assert ts.num_nodes == 6\n        assert ts.node(5).time == 1\n        assert ts.num_trees == 1\n        assert ts.first().parent_dict == {0: 5, 1: 3, 2: 3, 5: 4, 3: 4}\n        ts = ts.simplify()\n        ts.tables.assert_equals(before_ts.tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"time\", [1.1, 1.5, 1.9])\n    def test_top_branches(self, time):\n        # 2.00┊   4   ┊    2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊        ┊ ┏━┻┓  ┊\n        #     ┊ ┃  ┃  ┊      t ┊ 5  6  ┊\n        #     ┊ ┃  ┃  ┊ ->     ┊ ┃  ┃  ┊\n        # 1.00┊ ┃  3  ┊    1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊        ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊    0.00┊ 0 1 2 ┊\n        #     0       1        0       1\n\n        before_ts = self.ts()\n        ts = before_ts.split_edges(time)\n        assert ts.num_nodes == 7\n        assert all(ts.node(u).time == time for u in [5, 6])\n        assert ts.num_trees == 1\n        assert ts.first().parent_dict == {0: 5, 1: 3, 2: 3, 3: 6, 6: 4, 5: 4}\n        ts = ts.simplify()\n        ts.tables.assert_equals(before_ts.tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"time\", [0, 2])\n    def test_at_leaf_or_root_time(self, time):\n        split = self.ts().split_edges(time)\n        split.tables.assert_equals(self.ts().tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"time\", [-1, 2.1])\n    def test_outside_time_scales(self, time):\n        split = self.ts().split_edges(time)\n        split.tables.assert_equals(self.ts().tables, ignore_provenance=True)\n\n\nclass TestSplitEdgesSimpleTreeMutationExamples:\n    def test_single_mutation_no_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\", metadata=b\"1234\")\n        ts = tables.tree_sequence()\n\n        ts_split = ts.split_edges(1)\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ 5  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert ts_split.num_nodes == 6\n        mut = ts_split.mutation(0)\n        assert mut.node == 0\n        assert mut.derived_state == \"T\"\n        assert mut.metadata == b\"1234\"\n        assert tskit.is_unknown_time(mut.time)\n\n    def test_single_mutation_split_before_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ x━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(\n            site=0, node=0, time=1.5, derived_state=\"T\", metadata=b\"1234\"\n        )\n        ts = tables.tree_sequence()\n\n        ts_split = ts.split_edges(1)\n        # 2.00┊   4   ┊\n        #     ┊ x━┻┓  ┊\n        # 1.00┊ 5  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert ts_split.num_nodes == 6\n        mut = ts_split.mutation(0)\n        assert mut.node == 5\n        assert mut.derived_state == \"T\"\n        assert mut.metadata == b\"1234\"\n        assert mut.time == 1.5\n\n    def test_single_mutation_split_at_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(\n            site=0, node=0, time=1, derived_state=\"T\", metadata=b\"1234\"\n        )\n        ts = tables.tree_sequence()\n\n        ts_split = ts.split_edges(1)\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ 5x 3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        mut = ts_split.mutation(0)\n        assert mut.node == 5\n        assert mut.derived_state == \"T\"\n        assert mut.metadata == b\"1234\"\n        assert mut.time == 1.0\n\n    def test_multi_mutation_no_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, parent=0, derived_state=\"G\")\n        ts = tables.tree_sequence()\n\n        ts_split = ts.split_edges(1)\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        #     ┊ 5  3  ┊\n        #     ┊ x  ┃  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts_split.tables.mutations.assert_equals(tables.mutations)\n\n    def test_multi_mutation_over_sample_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ x━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, time=1.01, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, time=0.99, parent=0, derived_state=\"G\")\n        ts = tables.tree_sequence()\n\n        ts_split = ts.split_edges(1)\n        # 2.00┊   4   ┊\n        #     ┊ x━┻┓  ┊\n        # 1.00┊ 5  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert ts_split.num_mutations == 2\n\n        mut = ts_split.mutation(0)\n        assert mut.site == 0\n        assert mut.node == 5\n        assert mut.time == 1.01\n        mut = ts_split.mutation(1)\n        assert mut.site == 0\n        assert mut.node == 0\n        assert mut.time == 0.99\n\n    def test_mutation_not_on_branch(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        ts = tables.tree_sequence()\n        tables.assert_equals(ts.split_edges(0).tables, ignore_provenance=True)\n\n\nclass TestSplitEdgesExamples:\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_genotypes_round_trip(self, ts):\n        time = 0 if ts.num_nodes == 0 else np.median(ts.tables.nodes.time)\n        if ts.num_migrations == 0:\n            split_ts = ts.split_edges(time)\n            assert np.array_equal(split_ts.genotype_matrix(), ts.genotype_matrix())\n        else:\n            with pytest.raises(tskit.LibraryError):\n                ts.split_edges(time)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    @pytest.mark.parametrize(\"population\", [-1, None])\n    def test_definition(self, ts, population):\n        # The python implementation of split_edges performs a sort,\n        # which changes the order relative to the original tree sequence\n        tables = ts.dump_tables()\n        tables.sort()\n        ts = tables.tree_sequence()\n        time = 0 if ts.num_nodes == 0 else np.median(ts.tables.nodes.time)\n        if ts.num_migrations == 0:\n            ts1 = split_edges_definition(ts, time, population=population)\n            ts2 = ts.split_edges(time, population=population)\n            ts1.tables.assert_equals(ts2.tables, ignore_provenance=True)\n\n\nclass TestSplitEdgesInterface:\n    def test_migrations_fail(self, ts_fixture):\n        assert ts_fixture.num_migrations > 0\n        with pytest.raises(tskit.LibraryError, match=\"MIGRATIONS_NOT_SUPPORTED\"):\n            ts_fixture.split_edges(0)\n\n    def test_population_out_of_bounds(self):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.LibraryError, match=\"POPULATION_OUT_OF_BOUNDS\"):\n            ts.split_edges(0, population=0)\n\n    def test_bad_flags(self):\n        ts = tskit.TableCollection(1).tree_sequence()\n        with pytest.raises(TypeError):\n            ts.split_edges(0, flags=\"asdf\")\n\n    def test_bad_metadata_no_schema(self):\n        ts = tskit.TableCollection(1).tree_sequence()\n        with pytest.raises(TypeError):\n            ts.split_edges(0, metadata=\"asdf\")\n\n    def test_bad_metadata_json_schema(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json()\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.MetadataEncodingError):\n            ts.split_edges(0, metadata=b\"bytes\")\n\n    @pytest.mark.parametrize(\"time\", [math.inf, np.inf, tskit.UNKNOWN_TIME, np.nan])\n    def test_nonfinite_time(self, time):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.LibraryError, match=\"TIME_NONFINITE\"):\n            ts.split_edges(time)\n\n\nclass TestSplitEdgesNodeValues:\n    @tests.cached_example\n    def ts(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(5):\n            tables.populations.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0, time=0)\n        tables.nodes.add_row(time=1)\n        tables.edges.add_row(0, 1, 1, 0)\n        return tables.tree_sequence()\n\n    @tests.cached_example\n    def ts_with_schema(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(5):\n            tables.populations.add_row()\n        tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0, time=0)\n        tables.nodes.add_row(time=1)\n        tables.edges.add_row(0, 1, 1, 0)\n        return tables.tree_sequence()\n\n    def test_default_population(self):\n        ts = self.ts().split_edges(0.5)\n        assert ts.node(2).population == -1\n\n    @pytest.mark.parametrize(\"population\", range(-1, 5))\n    def test_specify_population(self, population):\n        ts = self.ts().split_edges(0.5, population=population)\n        assert ts.node(2).population == population\n\n    def test_default_flags(self):\n        ts = self.ts().split_edges(0.5)\n        assert ts.node(2).flags == 0\n\n    @pytest.mark.parametrize(\"flags\", range(0, 5))\n    def test_specify_flags(self, flags):\n        ts = self.ts().split_edges(0.5, flags=flags)\n        assert ts.node(2).flags == flags\n\n    def test_default_metadata_no_schema(self):\n        ts = self.ts().split_edges(0.5)\n        assert ts.node(2).metadata == b\"\"\n\n    @pytest.mark.parametrize(\"metadata\", [b\"\", b\"some bytes\"])\n    def test_specify_metadata_no_schema(self, metadata):\n        ts = self.ts().split_edges(0.5, metadata=metadata)\n        assert ts.node(2).metadata == metadata\n\n    def test_default_metadata_with_schema(self):\n        ts = self.ts_with_schema().split_edges(0.5)\n        assert ts.node(2).metadata == {}\n\n    @pytest.mark.parametrize(\"metadata\", [{}, {\"some\": \"json\"}])\n    def test_specify_metadata_with_schema(self, metadata):\n        ts = self.ts_with_schema().split_edges(0.5, metadata=metadata)\n        assert ts.node(2).metadata == metadata\n\n\ndef decapitate_definition(ts, time, *, flags=0, population=None, metadata=None):\n    \"\"\"\n    Simple loop implementation of the decapitate operation\n    \"\"\"\n    population = -1 if population is None else population\n    tables = ts.dump_tables()\n    node_time = tables.nodes.time\n    tables.edges.clear()\n    for edge in ts.edges():\n        if node_time[edge.parent] <= time:\n            tables.edges.append(edge)\n        elif node_time[edge.child] < time:\n            new_parent = tables.nodes.add_row(\n                time=time, population=population, flags=flags, metadata=metadata\n            )\n            tables.edges.append(edge.replace(parent=new_parent))\n\n    tables.mutations.clear()\n    for mutation in ts.mutations():\n        mutation_time = (\n            node_time[mutation.node]\n            if util.is_unknown_time(mutation.time)\n            else mutation.time\n        )\n        if mutation_time < time:\n            tables.mutations.append(mutation.replace(parent=tskit.NULL))\n\n    tables.migrations.clear()\n    for migration in ts.migrations():\n        if migration.time <= time:\n            tables.migrations.append(migration)\n\n    tables.build_index()\n    tables.compute_mutation_parents()\n    return tables.tree_sequence()\n\n\nclass TestDecapitateExamples:\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_defaults(self, ts):\n        time = 0 if ts.num_nodes == 0 else np.median(ts.tables.nodes.time)\n        if ts.num_migrations == 0:\n            decap1 = decapitate_definition(ts, time)\n            decap2 = ts.decapitate(time)\n            decap1.tables.assert_equals(decap2.tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_no_population(self, ts):\n        time = 0 if ts.num_nodes == 0 else np.median(ts.tables.nodes.time)\n        if ts.num_migrations == 0:\n            decap1 = decapitate_definition(ts, time, population=-1)\n            decap2 = ts.decapitate(time, population=-1)\n            decap1.tables.assert_equals(decap2.tables, ignore_provenance=True)\n\n\nclass TestDecapitateSimpleTree:\n    # 2.00┊   4   ┊\n    #     ┊ ┏━┻┓  ┊\n    # 1.00┊ ┃  3  ┊\n    #     ┊ ┃ ┏┻┓ ┊\n    # 0.00┊ 0 1 2 ┊\n    #     0       1\n    @tests.cached_example\n    def ts(self):\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        return tree.tree_sequence\n\n    @pytest.mark.parametrize(\"time\", [0, -0.5, -100])\n    def test_t0_or_before(self, time):\n        before = self.ts()\n        ts = before.decapitate(time)\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.num_roots == 3\n        assert list(sorted(tree.roots)) == [0, 1, 2]\n        assert before.tables.nodes.equals(ts.tables.nodes[: before.num_nodes])\n        assert ts.num_edges == 0\n\n    @pytest.mark.parametrize(\"time\", [0.01, 0.5, 0.999])\n    def test_t0_to_1(self, time):\n        #\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 0.99┊ 7 5 6 ┊\n        #     ┊ ┃ ┃ ┃ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        before = self.ts()\n        ts = before.decapitate(time)\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.num_roots == 3\n        assert list(sorted(tree.roots)) == [5, 6, 7]\n        assert ts.num_nodes == 8\n        assert ts.tables.nodes[5].time == time\n        assert ts.tables.nodes[6].time == time\n        assert ts.tables.nodes[7].time == time\n\n    def test_t1(self):\n        #\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊ 5  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        before = self.ts()\n        ts = before.decapitate(1)\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.num_roots == 2\n        assert list(sorted(tree.roots)) == [3, 5]\n        assert ts.num_nodes == 6\n        assert ts.tables.nodes[5].time == 1\n\n    @pytest.mark.parametrize(\"time\", [1.01, 1.5, 1.999])\n    def test_t1_to_2(self, time):\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.01┊ 5  6  ┊\n        #     ┊ ┃  ┃  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #    0       1\n        before = self.ts()\n        ts = before.decapitate(time)\n        assert ts.num_trees == 1\n        tree = ts.first()\n        assert tree.num_roots == 2\n        assert list(sorted(tree.roots)) == [5, 6]\n        assert ts.num_nodes == 7\n        assert ts.tables.nodes[5].time == time\n        assert ts.tables.nodes[6].time == time\n\n    @pytest.mark.parametrize(\"time\", [2, 2.5, 1e9])\n    def test_t2(self, time):\n        before = self.ts()\n        ts = before.decapitate(time)\n        ts.tables.assert_equals(before.tables, ignore_provenance=True)\n\n\nclass TestDecapitateSimpleTreeMutationExamples:\n    def test_single_mutation_over_sample(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        before = tables.tree_sequence()\n\n        ts = before.decapitate(1)\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊ 5  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        before.tables.mutations.assert_equals(ts.tables.mutations)\n        assert list(before.alignments()) == list(ts.alignments())\n\n    def test_single_mutation_at_decap_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, time=1, derived_state=\"T\")\n        before = tables.tree_sequence()\n\n        # Because the mutation is at exactly the decapitation time, we must\n        # remove it, or it would violate the requirement that a mutation must\n        # have a time less than that of the parent of the edge that its on.\n        ts = before.decapitate(1)\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊ 5  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert ts.num_mutations == 0\n        assert list(ts.alignments()) == [\"A\", \"A\", \"A\"]\n\n    def test_multi_mutation_over_sample(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ x  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, parent=0, derived_state=\"G\")\n        before = tables.tree_sequence()\n\n        ts = before.decapitate(1)\n        # 2.00┊       ┊\n        #     ┊ 5  3  ┊\n        #     ┊ x  ┃  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        before.tables.mutations.assert_equals(ts.tables.mutations)\n        assert list(before.alignments()) == list(ts.alignments())\n\n    def test_multi_mutation_over_sample_time(self):\n        # 2.00┊   4   ┊\n        #     ┊ x━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=0, time=1.01, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, time=0.99, parent=0, derived_state=\"G\")\n        before = tables.tree_sequence()\n\n        ts = before.decapitate(1)\n        # 2.00┊       ┊\n        #     ┊ 5  3  ┊\n        #     ┊ ┃  ┃  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert ts.num_mutations == 1\n        # Alignments are equal because the ancestral mutation was silent anyway.\n        assert list(before.alignments()) == list(ts.alignments())\n\n    def test_multi_mutation_over_root(self):\n        #         x\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        tables = tree.tree_sequence.dump_tables()\n        tables.sites.add_row(0, \"A\")\n        tables.mutations.add_row(site=0, node=4, derived_state=\"G\")\n        tables.mutations.add_row(site=0, node=0, parent=0, derived_state=\"T\")\n        before = tables.tree_sequence()\n\n        ts = before.decapitate(1)\n        # 2.00┊       ┊\n        #     ┊ 5  3  ┊\n        #     ┊ ┃  ┃  ┊\n        #     ┊ x ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        assert ts.num_mutations == 1\n        assert list(before.alignments()) == [\"T\", \"G\", \"G\"]\n        # The states inherited by samples changes because we drop the old mutation\n        assert list(ts.alignments()) == [\"T\", \"A\", \"A\"]\n\n\nclass TestDecapitateSimpleTsExample:\n    # 9.08┊    9    ┊         ┊         ┊         ┊         ┊\n    #     ┊  ┏━┻━┓  ┊         ┊         ┊         ┊         ┊\n    # 6.57┊  ┃   ┃  ┊         ┊         ┊         ┊    8    ┊\n    #     ┊  ┃   ┃  ┊         ┊         ┊         ┊  ┏━┻━┓  ┊\n    # 5.31┊  ┃   ┃  ┊    7    ┊         ┊    7    ┊  ┃   ┃  ┊\n    #     ┊  ┃   ┃  ┊  ┏━┻━┓  ┊         ┊  ┏━┻━┓  ┊  ┃   ┃  ┊\n    # 1.75┊  ┃   ┃  ┊  ┃   ┃  ┊    6    ┊  ┃   ┃  ┊  ┃   ┃  ┊\n    #     ┊  ┃   ┃  ┊  ┃   ┃  ┊  ┏━┻━┓  ┊  ┃   ┃  ┊  ┃   ┃  ┊\n    # 1.11┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊\n    #     ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊\n    # 0.11┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊\n    #     ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊\n    # 0.00┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n    #   0.00      0.06      0.79      0.91      0.91      1.00\n\n    @tests.cached_example\n    def ts(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      individual      time    metadata\n        0       1       0       -1      0\n        1       1       0       -1      0\n        2       1       0       -1      0\n        3       1       0       -1      0\n        4       0       0       -1      0.114\n        5       0       0       -1      1.110\n        6       0       0       -1      1.750\n        7       0       0       -1      5.310\n        8       0       0       -1      6.573\n        9       0       0       -1      9.083\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      1.00000000      4       0\n        1       0.00000000      1.00000000      4       1\n        2       0.00000000      1.00000000      5       2\n        3       0.00000000      1.00000000      5       3\n        4       0.79258618      0.90634460      6       4\n        5       0.79258618      0.90634460      6       5\n        6       0.05975243      0.79258618      7       4\n        7       0.90634460      0.91029435      7       4\n        8       0.05975243      0.79258618      7       5\n        9       0.90634460      0.91029435      7       5\n        10      0.91029435      1.00000000      8       4\n        11      0.91029435      1.00000000      8       5\n        12      0.00000000      0.05975243      9       4\n        13      0.00000000      0.05975243      9       5\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position      ancestral_state\n        0.05          A\n        0.06          0\n        0.3           C\n        0.5           AAA\n        0.91          T\n        \"\"\"\n        )\n        muts = io.StringIO(\n            \"\"\"\\\n        site   node    derived_state    parent    time\n        0      9       T                -1        15\n        0      9       GGG              0         9.1\n        0      5       1                1         9\n        1      4       C                -1        1.6\n        1      4       G                3         1.5\n        2      7       G                -1        10\n        2      3       C                5         1\n        4      3       G                -1        1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sites=sites, mutations=muts, strict=False)\n        return ts\n\n    def test_at_time_of_5(self):\n        # NOTE: we don't remember that the edge 4-7 was shared in trees 1 and 3.\n        # 1.11┊  14  5  ┊ 11   5  ┊ 10   5  ┊  12  5  ┊  13  5  ┊\n        #     ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊\n        # 0.11┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊\n        #     ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊\n        # 0.00┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n        #   0.00      0.06      0.79      0.91      0.91      1.00\n        ts = self.ts().decapitate(1.110)\n        assert ts.num_nodes == 15\n        assert ts.num_trees == 5\n        # Most mutations are older than this.\n        assert ts.num_mutations == 2\n        for u in range(10, 15):\n            node = ts.node(u)\n            assert node.time == 1.110\n            assert node.flags == 0\n        assert [set(tree.roots) for tree in ts.trees()] == [\n            {5, 14},\n            {11, 5},\n            {10, 5},\n            {12, 5},\n            {13, 5},\n        ]\n\n    def test_at_time6(self):\n        # 6   ┊ 12   13 ┊         ┊         ┊         ┊ 10   11 ┊\n        # 5.31┊  ┃   ┃  ┊    7    ┊         ┊    7    ┊  ┃   ┃  ┊\n        #     ┊  ┃   ┃  ┊  ┏━┻━┓  ┊         ┊  ┏━┻━┓  ┊  ┃   ┃  ┊\n        # 1.75┊  ┃   ┃  ┊  ┃   ┃  ┊    6    ┊  ┃   ┃  ┊  ┃   ┃  ┊\n        #     ┊  ┃   ┃  ┊  ┃   ┃  ┊  ┏━┻━┓  ┊  ┃   ┃  ┊  ┃   ┃  ┊\n        # 1.11┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊  ┃   5  ┊\n        #     ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊  ┃  ┏┻┓ ┊\n        # 0.11┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊  4  ┃ ┃ ┊\n        #     ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┊\n        # 0.00┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊\n        #   0.00      0.06      0.79      0.91      0.91      1.00\n        ts = self.ts().decapitate(6)\n        assert ts.num_nodes == 14\n        assert ts.num_trees == 5\n        assert ts.num_mutations == 4\n        for u in range(10, 14):\n            node = ts.node(u)\n            assert node.time == 6\n            assert node.flags == 0\n        assert [set(tree.roots) for tree in ts.trees()] == [\n            {12, 13},\n            {7},\n            {6},\n            {7},\n            {10, 11},\n        ]\n\n\nclass TestDecapitateNodeValues:\n    @tests.cached_example\n    def ts(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(5):\n            tables.populations.add_row()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0, time=0)\n        tables.nodes.add_row(time=1)\n        tables.edges.add_row(0, 1, 1, 0)\n        return tables.tree_sequence()\n\n    @tests.cached_example\n    def ts_with_schema(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(5):\n            tables.populations.add_row()\n        tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json()\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0, time=0)\n        tables.nodes.add_row(time=1)\n        tables.edges.add_row(0, 1, 1, 0)\n        return tables.tree_sequence()\n\n    def test_default_population(self):\n        ts = self.ts().decapitate(0.5)\n        assert ts.node(2).population == tskit.NULL\n\n    @pytest.mark.parametrize(\"population\", range(-1, 5))\n    def test_specify_population(self, population):\n        ts = self.ts().decapitate(0.5, population=population)\n        assert ts.node(2).population == population\n\n    def test_default_flags(self):\n        ts = self.ts().decapitate(0.5)\n        assert ts.node(2).flags == 0\n\n    @pytest.mark.parametrize(\"flags\", range(0, 5))\n    def test_specify_flags(self, flags):\n        ts = self.ts().decapitate(0.5, flags=flags)\n        assert ts.node(2).flags == flags\n\n    def test_default_metadata_no_schema(self):\n        ts = self.ts().decapitate(0.5)\n        assert ts.node(2).metadata == b\"\"\n\n    @pytest.mark.parametrize(\"metadata\", [b\"\", b\"some bytes\"])\n    def test_specify_metadata_no_schema(self, metadata):\n        ts = self.ts().decapitate(0.5, metadata=metadata)\n        assert ts.node(2).metadata == metadata\n\n    def test_default_metadata_with_schema(self):\n        ts = self.ts_with_schema().decapitate(0.5)\n        assert ts.node(2).metadata == {}\n\n    @pytest.mark.parametrize(\"metadata\", [{}, {\"some\": \"json\"}])\n    def test_specify_metadata_with_schema(self, metadata):\n        ts = self.ts_with_schema().decapitate(0.5, metadata=metadata)\n        assert ts.node(2).metadata == metadata\n\n\nclass TestDecapitateInterface:\n    @tests.cached_example\n    def ts(self):\n        tree = tskit.Tree.generate_balanced(3, branch_length=1)\n        return tree.tree_sequence\n\n    @pytest.mark.parametrize(\"bad_type\", [\"x\", \"0.1\", [], [0.1]])\n    def test_bad_types(self, bad_type):\n        with pytest.raises(TypeError, match=\"number\"):\n            self.ts().decapitate(bad_type)\n\n    @pytest.mark.parametrize(\n        \"time\", [1, 1.0, np.array([1])[0], fractions.Fraction(1, 1), decimal.Decimal(1)]\n    )\n    def test_number_types(self, time):\n        expected = self.ts().decapitate(1)\n        got = self.ts().decapitate(time)\n        expected.tables.assert_equals(got.tables, ignore_timestamps=True)\n\n    def test_migrations_not_supported(self, ts_fixture):\n        with pytest.raises(tskit.LibraryError, match=\"MIGRATIONS_NOT_SUPPORTED\"):\n            ts_fixture.decapitate(0)\n\n    def test_population_out_of_bounds(self):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.LibraryError, match=\"POPULATION_OUT_OF_BOUNDS\"):\n            ts.decapitate(0, population=0)\n\n    def test_bad_flags(self):\n        ts = tskit.TableCollection(1).tree_sequence()\n        with pytest.raises(TypeError):\n            ts.decapitate(0, flags=\"asdf\")\n\n    def test_bad_metadata_no_schema(self):\n        ts = tskit.TableCollection(1).tree_sequence()\n        with pytest.raises(TypeError):\n            ts.decapitate(0, metadata=\"asdf\")\n\n    def test_bad_metadata_json_schema(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json()\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.MetadataEncodingError):\n            ts.decapitate(0, metadata=b\"bytes\")\n\n    @pytest.mark.parametrize(\"time\", [math.inf, np.inf, tskit.UNKNOWN_TIME, np.nan])\n    def test_nonfinite_time(self, time):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        with pytest.raises(tskit.LibraryError, match=\"TIME_NONFINITE\"):\n            ts.decapitate(time)\n"
  },
  {
    "path": "python/tests/test_tables.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (c) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for the low-level tables used to transfer information\nbetween simulations and the tree sequence.\n\"\"\"\n\nimport dataclasses\nimport io\nimport json\nimport math\nimport pathlib\nimport pickle\nimport platform\nimport random\nimport re\nimport struct\nimport time\nimport unittest\nimport warnings\n\nimport kastore\nimport msprime\nimport numpy as np\nimport pytest\n\nimport _tskit\nimport tests.test_wright_fisher as wf\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.exceptions as exceptions\nimport tskit.metadata as metadata\nfrom tskit.tables import _ragged_selection_indices\n\n\nclass Column:\n    def __init__(self, name):\n        self.name = name\n\n\nclass Int32Column(Column):\n    def get_input(self, n):\n        return 1 + np.arange(n, dtype=np.int32)\n\n\nclass UInt8Column(Column):\n    def get_input(self, n):\n        return 2 + np.arange(n, dtype=np.uint8)\n\n\nclass UInt32Column(Column):\n    def get_input(self, n):\n        return 3 + np.arange(n, dtype=np.uint32)\n\n\nclass CharColumn(Column):\n    def get_input(self, n):\n        rng = np.random.RandomState(42)\n        return rng.randint(low=65, high=122, size=n, dtype=np.int8)\n\n\nclass DoubleColumn(Column):\n    def get_input(self, n):\n        return 4 + np.arange(n, dtype=np.float64)\n\n\nclass CommonTestsMixin:\n    \"\"\"\n    Abstract base class for common table tests. Because of the design of unittest,\n    we have to make this a mixin.\n    \"\"\"\n\n    def make_input_data(self, num_rows):\n        rng = np.random.RandomState(42)\n        input_data = {col.name: col.get_input(num_rows) for col in self.columns}\n        for list_col, offset_col in self.ragged_list_columns:\n            lengths = rng.randint(low=0, high=10, size=num_rows)\n            input_data[list_col.name] = list_col.get_input(sum(lengths))\n            input_data[offset_col.name] = np.zeros(num_rows + 1, dtype=np.uint64)\n            input_data[offset_col.name][1:] = np.cumsum(lengths, dtype=np.uint64)\n        return input_data\n\n    def make_transposed_input_data(self, num_rows):\n        cols = self.make_input_data(num_rows)\n        return [\n            {\n                col: (\n                    data[j]\n                    if len(data) == num_rows\n                    else (\n                        bytes(\n                            data[cols[f\"{col}_offset\"][j] : cols[f\"{col}_offset\"][j + 1]]\n                        )\n                        if \"metadata\" in col\n                        else data[\n                            cols[f\"{col}_offset\"][j] : cols[f\"{col}_offset\"][j + 1]\n                        ]\n                    )\n                )\n                for col, data in cols.items()\n                if \"offset\" not in col\n            }\n            for j in range(num_rows)\n        ]\n\n    @pytest.fixture\n    def test_rows(self, scope=\"session\"):\n        test_rows = self.make_transposed_input_data(10)\n        # Annoyingly we have to tweak some types as once added to a row and then put in\n        # an error message things come out differently\n        for n in range(10):\n            for col in test_rows[n].keys():\n                if col in [\"timestamp\", \"record\", \"ancestral_state\", \"derived_state\"]:\n                    test_rows[n][col] = bytes(test_rows[n][col]).decode(\"ascii\")\n        return test_rows\n\n    @pytest.fixture\n    def table(self, test_rows):\n        table = self.table_class()\n        for row in test_rows:\n            table.add_row(**row)\n        return table\n\n    @pytest.fixture\n    def table_5row(self, test_rows):\n        table_5row = self.table_class()\n        for row in test_rows[:5]:\n            table_5row.add_row(**row)\n        return table_5row\n\n    def test_asdict(self, table, test_rows):\n        for table_row, test_row in zip(table, test_rows):\n            for k, v in table_row.asdict().items():\n                if isinstance(v, np.ndarray):\n                    assert np.array_equal(v, test_row[k])\n                else:\n                    assert v == test_row[k]\n\n    def test_max_rows_increment(self):\n        for bad_value in [-1, -(2**10)]:\n            with pytest.raises(ValueError):\n                self.table_class(max_rows_increment=bad_value)\n        for v in [1, 100, 256]:\n            table = self.table_class(max_rows_increment=v)\n            assert table.max_rows_increment == v\n        # Setting zero implies doubling\n        table = self.table_class()\n        assert table.max_rows_increment == 0\n        table = self.table_class(max_rows_increment=1024)\n        assert table.max_rows_increment == 1024\n        table = self.table_class(max_rows_increment=0)\n        assert table.max_rows_increment == 0\n\n    def test_low_level_get_row(self):\n        # Tests the low-level get_row interface to ensure we're getting coverage.\n        t = self.table_class()\n        with pytest.raises(TypeError):\n            t.ll_table.get_row()\n        with pytest.raises(TypeError):\n            t.ll_table.get_row(\"row\")\n        with pytest.raises(_tskit.LibraryError):\n            t.ll_table.get_row(1)\n\n    def test_low_level_equals(self):\n        # Tests the low-level equals interface to ensure we're getting coverage.\n        t = self.table_class()\n        with pytest.raises(TypeError):\n            t.ll_table.equals()\n        with pytest.raises(TypeError):\n            t.ll_table.equals(None)\n\n    def test_low_level_set_columns(self):\n        t = self.table_class()\n        with pytest.raises(TypeError):\n            t.ll_table.set_columns(None)\n        with pytest.raises(TypeError):\n            t.ll_table.append_columns(None)\n\n    def test_input_parameters_errors(self):\n        assert len(self.input_parameters) > 0\n        for param, _ in self.input_parameters:\n            for bad_value in [-1, -(2**10)]:\n                with pytest.raises(ValueError):\n                    self.table_class(**{param: bad_value})\n            for bad_type in [None, ValueError, \"ser\"]:\n                with pytest.raises(TypeError):\n                    self.table_class(**{param: bad_type})\n\n    def test_input_parameter_values(self):\n        assert len(self.input_parameters) > 0\n        for param, _ in self.input_parameters:\n            for v in [1, 100, 256]:\n                table = self.table_class(**{param: v})\n                assert getattr(table, param) == v\n\n    def test_set_columns_string_errors(self):\n        inputs = {c.name: c.get_input(1) for c in self.columns}\n        for list_col, offset_col in self.ragged_list_columns:\n            value = list_col.get_input(1)\n            inputs[list_col.name] = value\n            inputs[offset_col.name] = [0, 1]\n        # Make sure this works.\n        table = self.table_class()\n        table.set_columns(**inputs)\n        for list_col, offset_col in self.ragged_list_columns:\n            kwargs = dict(inputs)\n            del kwargs[list_col.name]\n            with pytest.raises(TypeError):\n                table.set_columns(**kwargs)\n            kwargs = dict(inputs)\n            del kwargs[offset_col.name]\n            with pytest.raises(TypeError):\n                table.set_columns(**kwargs)\n\n    def test_set_columns_interface(self):\n        kwargs = self.make_input_data(1)\n        # Make sure this works.\n        table = self.table_class()\n        table.set_columns(**kwargs)\n        table.append_columns(**kwargs)\n        for focal_col in self.columns:\n            table = self.table_class()\n            for bad_type in [Exception, tskit]:\n                error_kwargs = dict(kwargs)\n                error_kwargs[focal_col.name] = bad_type\n                with pytest.raises(ValueError):\n                    table.set_columns(**error_kwargs)\n                with pytest.raises(ValueError):\n                    table.append_columns(**error_kwargs)\n            for bad_value in [\"qwer\", [0, \"sd\"]]:\n                error_kwargs = dict(kwargs)\n                error_kwargs[focal_col.name] = bad_value\n                with pytest.raises(ValueError):\n                    table.set_columns(**error_kwargs)\n                with pytest.raises(ValueError):\n                    table.append_columns(**error_kwargs)\n\n    def test_set_columns_from_dict(self):\n        kwargs = self.make_input_data(1)\n        # Make sure this works.\n        t1 = self.table_class()\n        t1.set_columns(**kwargs)\n        t2 = self.table_class()\n        t2.set_columns(**t1.asdict())\n        t1.assert_equals(t2)\n\n    def test_set_columns_dimension(self):\n        kwargs = self.make_input_data(1)\n        table = self.table_class()\n        table.set_columns(**kwargs)\n        table.append_columns(**kwargs)\n        for focal_col in self.columns:\n            table = self.table_class()\n            for bad_dims in [5, [[1], [1]], np.zeros((2, 2))]:\n                error_kwargs = dict(kwargs)\n                error_kwargs[focal_col.name] = bad_dims\n                with pytest.raises(ValueError):\n                    table.set_columns(**error_kwargs)\n                with pytest.raises(ValueError):\n                    table.append_columns(**error_kwargs)\n        for _, offset_col in self.ragged_list_columns:\n            error_kwargs = dict(kwargs)\n            for bad_dims in [5, [[1], [1]], np.zeros((2, 2))]:\n                error_kwargs[offset_col.name] = bad_dims\n                with pytest.raises(ValueError):\n                    table.set_columns(**error_kwargs)\n                with pytest.raises(ValueError):\n                    table.append_columns(**error_kwargs)\n            # Empty offset columns are caught also\n            error_kwargs[offset_col.name] = []\n            with pytest.raises(ValueError):\n                table.set_columns(**error_kwargs)\n\n    def test_set_columns_input_sizes(self):\n        input_data = self.make_input_data(100)\n        col_map = {col.name: col for col in self.columns}\n        for list_col, offset_col in self.ragged_list_columns:\n            col_map[list_col.name] = list_col\n            col_map[offset_col.name] = offset_col\n        table = self.table_class()\n        table.set_columns(**input_data)\n        table.append_columns(**input_data)\n        for equal_len_col_set in self.equal_len_columns:\n            if len(equal_len_col_set) > 1:\n                for col in equal_len_col_set:\n                    kwargs = dict(input_data)\n                    kwargs[col] = col_map[col].get_input(1)\n                    with pytest.raises(ValueError):\n                        table.set_columns(**kwargs)\n                    with pytest.raises(ValueError):\n                        table.append_columns(**kwargs)\n\n    def test_set_read_only_attributes(self):\n        table = self.table_class()\n        with pytest.raises(AttributeError):\n            table.num_rows = 10\n        with pytest.raises(AttributeError):\n            table.max_rows = 10\n        for param, _default in self.input_parameters:\n            with pytest.raises(AttributeError):\n                setattr(table, param, 2)\n        assert table.num_rows == 0\n        assert len(table) == 0\n\n    def test_set_column_attributes_empty(self):\n        table = self.table_class()\n        input_data = {col.name: col.get_input(0) for col in self.columns}\n        for col, data in input_data.items():\n            setattr(table, col, data)\n            assert len(getattr(table, col)) == 0\n\n    def test_set_column_attributes_data(self):\n        table = self.table_class()\n        for num_rows in [1, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table.set_columns(**input_data)\n\n            for list_col, offset_col in self.ragged_list_columns:\n                list_data = input_data[list_col.name]\n                assert np.array_equal(getattr(table, list_col.name), list_data)\n                list_data += 1\n                assert not np.array_equal(getattr(table, list_col.name), list_data)\n                setattr(table, list_col.name, list_data)\n                assert np.array_equal(getattr(table, list_col.name), list_data)\n                list_value = getattr(table[0], list_col.name)\n                assert len(list_value) == input_data[offset_col.name][1]\n\n                # Reset the offsets so that all the full array is associated with the\n                # first element.\n                offset_data = np.zeros(num_rows + 1, dtype=np.uint32) + len(\n                    input_data[list_col.name]\n                )\n                offset_data[0] = 0\n                setattr(table, offset_col.name, offset_data)\n                list_value = getattr(table[0], list_col.name)\n                assert len(list_value) == len(input_data[list_col.name])\n\n                del input_data[list_col.name]\n                del input_data[offset_col.name]\n\n            for col, data in input_data.items():\n                assert np.array_equal(getattr(table, col), data)\n                data += 1\n                assert not np.array_equal(getattr(table, col), data)\n                setattr(table, col, data)\n                assert np.array_equal(getattr(table, col), data)\n\n    def test_set_column_attributes_errors(self):\n        table = self.table_class()\n        num_rows = 10\n        input_data = self.make_input_data(num_rows)\n        table.set_columns(**input_data)\n\n        for list_col, offset_col in self.ragged_list_columns:\n            for bad_list_col in [[], input_data[list_col.name][:-1]]:\n                with pytest.raises(ValueError):\n                    setattr(table, list_col.name, bad_list_col)\n            for bad_offset_col in [[], np.arange(num_rows + 2, dtype=np.uint32)]:\n                with pytest.raises(ValueError):\n                    setattr(table, offset_col.name, bad_offset_col)\n\n            del input_data[list_col.name]\n            del input_data[offset_col.name]\n\n        for col, data in input_data.items():\n            for bad_data in [[], data[:-1]]:\n                with pytest.raises(ValueError):\n                    setattr(table, col, bad_data)\n\n        # Try to read a column that isn't there. (We can always write to new attributes\n        # in Python, so there's nothing to test in that case.)\n        with pytest.raises(AttributeError):\n            _ = table.no_such_column\n\n    def test_defaults(self):\n        table = self.table_class()\n        assert table.num_rows == 0\n        assert len(table) == 0\n        for param, default in self.input_parameters:\n            assert getattr(table, param) == default\n        for col in self.columns:\n            array = getattr(table, col.name)\n            assert array.shape == (0,)\n\n    def test_add_row_data(self):\n        for num_rows in [0, 10, 100]:\n            table = self.table_class()\n            for j, row in enumerate(self.make_transposed_input_data(num_rows)):\n                k = table.add_row(**row)\n                assert k == j\n            for colname, input_array in self.make_input_data(num_rows).items():\n                output_array = getattr(table, colname)\n                assert input_array.shape == output_array.shape\n                assert np.all(input_array == output_array)\n            table.clear()\n            assert table.num_rows == 0\n            assert len(table) == 0\n\n    def test_add_row_round_trip(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            t1 = self.table_class()\n            t1.set_columns(**input_data)\n            for colname, input_array in input_data.items():\n                output_array = getattr(t1, colname)\n                assert input_array.shape == output_array.shape\n                assert np.all(input_array == output_array)\n            t2 = self.table_class()\n            for row in list(t1):\n                t2.add_row(**dataclasses.asdict(row))\n            t1.assert_equals(t2)\n\n    def test_append_row(self):\n        for num_rows in [0, 10, 100]:\n            table = self.table_class()\n            for j, row in enumerate(self.make_transposed_input_data(num_rows)):\n                k = table.append(table.row_class(**row))\n                assert k == j\n            for colname, input_array in self.make_input_data(num_rows).items():\n                output_array = getattr(table, colname)\n                assert input_array.shape == output_array.shape\n                assert np.all(input_array == output_array)\n            table.clear()\n            assert table.num_rows == 0\n            assert len(table) == 0\n\n    def test_append_duck_type(self):\n        class Duck:\n            pass\n\n        table = self.table_class()\n        for j, row in enumerate(self.make_transposed_input_data(20)):\n            duck = Duck()\n            for k, v in row.items():\n                setattr(duck, k, v)\n            k = table.append(duck)\n            assert k == j\n        for colname, input_array in self.make_input_data(20).items():\n            output_array = getattr(table, colname)\n            assert np.array_equal(input_array, output_array)\n\n    def test_append_error(self):\n        class NotADuck:\n            pass\n\n        with pytest.raises(AttributeError, match=\"'NotADuck' object has no attribute\"):\n            self.table_class().append(NotADuck())\n\n    def test_setitem(self):\n        table = self.table_class()\n        for row in self.make_transposed_input_data(10):\n            table.append(table.row_class(**row))\n        table2 = self.table_class()\n        for row in self.make_transposed_input_data(20)[10:]:\n            table2.append(table.row_class(**row))\n        assert table != table2\n\n        copy = table.copy()\n        for j in range(10):\n            table[j] = table[j]\n        table.assert_equals(copy)\n\n        for j in range(10):\n            table[j] = table2[j]\n        table.assert_equals(table2)\n\n    def test_setitem_duck_type(self):\n        class Duck:\n            pass\n\n        table = self.table_class()\n        for row in self.make_transposed_input_data(10):\n            table.append(table.row_class(**row))\n        table2 = self.table_class()\n        for row in self.make_transposed_input_data(20)[10:]:\n            table2.append(table.row_class(**row))\n        assert table != table2\n\n        for j in range(10):\n            duck = Duck()\n            for k, v in dataclasses.asdict(table2[j]).items():\n                setattr(duck, k, v)\n            table[j] = duck\n        table.assert_equals(table2)\n\n    def test_setitem_error(self):\n        class NotADuck:\n            pass\n\n        table = self.table_class()\n        table.append(table.row_class(**self.make_transposed_input_data(1)[0]))\n        with pytest.raises(AttributeError, match=\"'NotADuck' object has no attribute\"):\n            table[0] = NotADuck()\n\n        with pytest.raises(IndexError, match=\"Index out of bounds\"):\n            self.table_class()[0] = table[0]\n        with pytest.raises(IndexError, match=\"Index out of bounds\"):\n            self.table_class()[-1] = table[0]\n\n        with pytest.raises(TypeError, match=\"Index must be integer\"):\n            self.table_class()[0.5] = table[0]\n        with pytest.raises(TypeError, match=\"Index must be integer\"):\n            self.table_class()[None] = table[0]\n        with pytest.raises(TypeError, match=\"Index must be integer\"):\n            self.table_class()[[1]] = table[0]\n\n    def test_set_columns_data(self):\n        for num_rows in [0, 10, 100, 1000]:\n            input_data = {col.name: col.get_input(num_rows) for col in self.columns}\n            offset_cols = set()\n            for list_col, offset_col in self.ragged_list_columns:\n                value = list_col.get_input(num_rows)\n                input_data[list_col.name] = value\n                input_data[offset_col.name] = np.arange(num_rows + 1, dtype=np.uint32)\n                offset_cols.add(offset_col.name)\n            table = self.table_class()\n            for _ in range(5):\n                table.set_columns(**input_data)\n                for colname, input_array in input_data.items():\n                    output_array = getattr(table, colname)\n                    assert input_array.shape == output_array.shape\n                    assert np.all(input_array == output_array)\n                table.clear()\n                assert table.num_rows == 0\n                assert len(table) == 0\n                for colname in input_data.keys():\n                    if colname in offset_cols:\n                        assert list(getattr(table, colname)) == [0]\n                    else:\n                        assert list(getattr(table, colname)) == []\n\n    def test_truncate(self):\n        num_rows = 100\n        input_data = {col.name: col.get_input(num_rows) for col in self.columns}\n        for list_col, offset_col in self.ragged_list_columns:\n            value = list_col.get_input(2 * num_rows)\n            input_data[list_col.name] = value\n            input_data[offset_col.name] = 2 * np.arange(num_rows + 1, dtype=np.uint32)\n        table = self.table_class()\n        table.set_columns(**input_data)\n\n        copy = table.copy()\n        table.truncate(num_rows)\n        assert copy == table\n\n        for num_rows in [100, 10, 1]:\n            table.truncate(num_rows)\n            assert table.num_rows == num_rows\n            assert len(table) == num_rows\n            used = set()\n            for list_col, offset_col in self.ragged_list_columns:\n                offset = getattr(table, offset_col.name)\n                assert offset.shape == (num_rows + 1,)\n                assert np.array_equal(\n                    input_data[offset_col.name][: num_rows + 1], offset\n                )\n                list_data = getattr(table, list_col.name)\n                assert np.array_equal(list_data, input_data[list_col.name][: offset[-1]])\n                used.add(offset_col.name)\n                used.add(list_col.name)\n            for name, data in input_data.items():\n                if name not in used:\n                    assert np.array_equal(data[:num_rows], getattr(table, name))\n\n    def test_truncate_errors(self):\n        num_rows = 10\n        input_data = {col.name: col.get_input(num_rows) for col in self.columns}\n        for list_col, offset_col in self.ragged_list_columns:\n            value = list_col.get_input(2 * num_rows)\n            input_data[list_col.name] = value\n            input_data[offset_col.name] = 2 * np.arange(num_rows + 1, dtype=np.uint32)\n        table = self.table_class()\n        table.set_columns(**input_data)\n        for bad_type in [None, 0.001, {}]:\n            with pytest.raises(TypeError):\n                table.truncate(bad_type)\n        for bad_num_rows in [-1, num_rows + 1, 10**6]:\n            with pytest.raises(ValueError):\n                table.truncate(bad_num_rows)\n\n    def test_append_columns_data(self):\n        for num_rows in [0, 10, 100, 1000]:\n            input_data = self.make_input_data(num_rows)\n            offset_cols = set()\n            for _, offset_col in self.ragged_list_columns:\n                offset_cols.add(offset_col.name)\n            table = self.table_class()\n            for j in range(1, 10):\n                table.append_columns(**input_data)\n                for colname, values in input_data.items():\n                    output_array = getattr(table, colname)\n                    if colname in offset_cols:\n                        input_array = np.zeros(j * num_rows + 1, dtype=np.uint32)\n                        for k in range(j):\n                            input_array[k * num_rows : (k + 1) * num_rows + 1] = (\n                                k * values[-1]\n                            ) + values\n                        assert input_array.shape == output_array.shape\n                    else:\n                        input_array = np.hstack([values for _ in range(j)])\n                        assert input_array.shape == output_array.shape\n                    assert np.array_equal(input_array, output_array)\n                assert table.num_rows == j * num_rows\n                assert len(table) == j * num_rows\n\n    def test_append_columns_max_rows(self):\n        for num_rows in [0, 10, 100, 1000]:\n            input_data = self.make_input_data(num_rows)\n            for max_rows in [1, 8192]:\n                table = self.table_class(max_rows_increment=max_rows)\n                for j in range(1, 10):\n                    table.append_columns(**input_data)\n                    assert table.num_rows == j * num_rows\n                    assert len(table) == j * num_rows\n                    if table.num_rows == 0:\n                        assert table.max_rows == 1\n                    elif table.num_rows > max_rows + 1:\n                        assert table.max_rows == max((max_rows * 2) + 1, table.num_rows)\n                    else:\n                        assert table.max_rows == max(max_rows + 1, table.num_rows)\n\n    def test_keep_rows_data(self):\n        input_data = self.make_input_data(100)\n        t1 = self.table_class()\n        t1.append_columns(**input_data)\n        t2 = t1.copy()\n        keep = np.ones(len(t1), dtype=bool)\n        # Only keep even\n        keep[::2] = 0\n        t1.keep_rows(keep)\n        keep_rows_definition(t2, keep)\n        assert t1.equals(t2)\n\n    def test_str(self):\n        for num_rows in [0, 10]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            s = str(table)\n            assert len(s.splitlines()) == num_rows + 4\n        input_data = self.make_input_data(41)\n        table = self.table_class()\n        table.set_columns(**input_data)\n        blank_meta_row = 39\n        if \"metadata\" in input_data:\n            table[blank_meta_row] = table[blank_meta_row].replace(metadata=b\"\")\n        assert \"1 rows skipped\" in str(table)\n        tskit.set_print_options(max_lines=None)\n        assert \"1 rows skipped\" not in str(table)\n        assert \"b''\" not in str(table)\n        tskit.set_print_options(max_lines=40)\n        tskit.MAX_LINES = 40\n\n    def test_str_pos_time_integer(self):\n        num_rows = 2\n        identifiable_integers = [12345, 54321]\n        identifiable_floats = [1.2345, 5.4321]\n        table = self.table_class()\n        for test_cols in [\n            [\"left\", \"right\"],\n            [\"position\"],\n            [\"time\"],\n        ]:  # only cols that get discretised\n            input_data = self.make_input_data(num_rows)\n            if all(col in input_data for col in test_cols):\n                for i, col in enumerate(test_cols):\n                    input_data[col] = [identifiable_floats[i]] * num_rows\n                table.set_columns(**input_data)\n                _, rows = table._text_header_and_rows()\n                for row in rows:\n                    assert f\"{identifiable_floats[0]:.8f}\" in row\n                    assert f\"{identifiable_integers[0]}\" not in row\n                for i, col in enumerate(test_cols):\n                    input_data[col] = [identifiable_integers[i]] * num_rows\n                table.set_columns(**input_data)\n                _, rows = table._text_header_and_rows()\n                for row in rows:\n                    assert f\"{identifiable_integers[0]:,}\" in row\n                    assert f\"{identifiable_floats[0]:.8f}\" not in row\n\n    def test_repr_html(self):\n        for num_rows in [0, 10, 40, 50]:\n            input_data = {col.name: col.get_input(num_rows) for col in self.columns}\n            for list_col, offset_col in self.ragged_list_columns:\n                value = list_col.get_input(num_rows)\n                input_data[list_col.name] = value\n                input_data[offset_col.name] = np.arange(num_rows + 1, dtype=np.uint32)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            html = table._repr_html_()\n            if num_rows == 50:\n                assert len(html.splitlines()) == num_rows + 11\n                assert (\n                    \"<em>10 rows skipped (tskit.set_print_options)</em>\"\n                    in html.split(\"</tr>\")[21]\n                )\n            else:\n                assert len(html.splitlines()) == num_rows + 20\n\n    def test_copy(self):\n        for num_rows in [0, 10]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            for _ in range(10):\n                copy = table.copy()\n                assert id(copy) != id(table)\n                assert isinstance(copy, self.table_class)\n                copy.assert_equals(table)\n                table = copy\n\n    def test_pickle(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            pkl = pickle.dumps(table)\n            new_table = pickle.loads(pkl)\n            table.assert_equals(new_table)\n            for protocol in range(pickle.HIGHEST_PROTOCOL + 1):\n                pkl = pickle.dumps(table, protocol=protocol)\n                new_table = pickle.loads(pkl)\n                table.assert_equals(new_table)\n\n    def test_equality(self):\n        for num_rows in [1, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            t1 = self.table_class()\n            t2 = self.table_class()\n            assert t1 == t1\n            assert t1 == t2\n            assert t1 == t2\n            assert not (t1 != t2)\n            t1.set_columns(**input_data)\n            assert t1 == t1\n            assert t1 != t2\n            assert t2 != t1\n            t2.set_columns(**input_data)\n            assert t1 == t2\n            assert t2 == t2\n            t2.clear()\n            assert t1 != t2\n            assert t2 != t1\n            # Check each column in turn to see if we are correctly checking values.\n            for col in self.columns:\n                col_copy = np.copy(input_data[col.name])\n                input_data_copy = dict(input_data)\n                input_data_copy[col.name] = col_copy\n                t2.set_columns(**input_data_copy)\n                assert t1 == t2\n                assert not (t1 != t2)\n                assert t1[0] == t2[0]\n                col_copy += 1\n                t2.set_columns(**input_data_copy)\n                assert t1 != t2\n                assert t2 != t1\n                assert t1[0] != t2[0]\n                assert t1[0] != t2[0]\n                assert t1[0] != []\n            for list_col, offset_col in self.ragged_list_columns:\n                value = list_col.get_input(num_rows)\n                input_data_copy = dict(input_data)\n                input_data_copy[list_col.name] = value + 1\n                input_data_copy[offset_col.name] = np.arange(\n                    num_rows + 1, dtype=np.uint32\n                )\n                t2.set_columns(**input_data_copy)\n                assert t1 != t2\n                assert t1[0] != t2[0]\n                value = list_col.get_input(num_rows + 1)\n                input_data_copy = dict(input_data)\n                input_data_copy[list_col.name] = value\n                input_data_copy[offset_col.name] = np.arange(\n                    num_rows + 1, dtype=np.uint32\n                )\n                input_data_copy[offset_col.name][-1] = num_rows + 1\n                t2.set_columns(**input_data_copy)\n                assert t1 != t2\n                assert t2 != t1\n                assert t1[-1] != t2[-1]\n            # Different types should always be unequal.\n            assert t1 is not None\n            assert t1 != []\n\n    def test_nbytes(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            # We don't have any metadata_schema here, so we can sum over the\n            # columns directly.\n            assert sum(col.nbytes for col in input_data.values()) == table.nbytes\n\n    def test_bad_offsets(self):\n        for num_rows in [10, 100]:\n            input_data = self.make_input_data(num_rows)\n            t = self.table_class()\n            t.set_columns(**input_data)\n\n            for _list_col, offset_col in self.ragged_list_columns:\n                original_offset = np.copy(input_data[offset_col.name])\n                # As numpy no longer allows conversion of out-of-bounds values, we\n                # explictly cast first.\n                input_data[offset_col.name][0] = np.array(-1).astype(\n                    input_data[offset_col.name].dtype\n                )\n                with pytest.raises(ValueError):\n                    t.set_columns(**input_data)\n                input_data[offset_col.name] = np.copy(original_offset)\n                t.set_columns(**input_data)\n                input_data[offset_col.name][-1] = 0\n                with pytest.raises(ValueError):\n                    t.set_columns(**input_data)\n                input_data[offset_col.name] = np.copy(original_offset)\n                t.set_columns(**input_data)\n                input_data[offset_col.name][num_rows // 2] = 2**31\n                with pytest.raises(ValueError):\n                    t.set_columns(**input_data)\n                input_data[offset_col.name] = np.copy(original_offset)\n\n                input_data[offset_col.name][0] = np.array(-1).astype(\n                    input_data[offset_col.name].dtype\n                )\n                with pytest.raises(ValueError):\n                    t.append_columns(**input_data)\n                input_data[offset_col.name] = np.copy(original_offset)\n                t.append_columns(**input_data)\n                input_data[offset_col.name][-1] = 0\n                with pytest.raises(ValueError):\n                    t.append_columns(**input_data)\n                input_data[offset_col.name] = np.copy(original_offset)\n                t.append_columns(**input_data)\n                input_data[offset_col.name][num_rows // 2] = 2**31\n                with pytest.raises(ValueError):\n                    t.append_columns(**input_data)\n                input_data[offset_col.name] = np.copy(original_offset)\n\n    def test_replace_with_wrong_class(self):\n        t = self.table_class()\n        with pytest.raises(TypeError, match=\"is required\"):\n            t.replace_with(tskit.MutableBaseTable(None, None))\n\n\nclass MetadataTestsMixin:\n    \"\"\"\n    Tests for column that have metadata columns.\n    \"\"\"\n\n    metadata_schema = metadata.MetadataSchema(\n        {\n            \"codec\": \"json\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"one\": {\"type\": \"string\"},\n                \"two\": {\"type\": \"number\"},\n                \"three\": {\"type\": \"array\"},\n                \"four\": {\"type\": \"boolean\"},\n            },\n            \"required\": [\"one\", \"two\", \"three\", \"four\"],\n            \"additionalProperties\": False,\n        },\n    )\n\n    def metadata_example_data(self):\n        try:\n            self.val += 1\n        except AttributeError:\n            self.val = 0\n        return {\n            \"one\": \"val one\",\n            \"two\": self.val,\n            \"three\": list(range(self.val, self.val + 10)),\n            \"four\": True,\n        }\n\n    def input_data_for_add_row(self):\n        input_data = {col.name: col.get_input(1) for col in self.columns}\n        kwargs = {col: data[0] for col, data in input_data.items()}\n        for col in self.string_colnames:\n            kwargs[col] = \"x\"\n        for col in self.binary_colnames:\n            kwargs[col] = b\"x\"\n        return kwargs\n\n    def test_random_metadata(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            metadatas = [tsutil.random_bytes(10) for _ in range(num_rows)]\n            metadata, metadata_offset = tskit.pack_bytes(metadatas)\n            input_data[\"metadata\"] = metadata\n            input_data[\"metadata_offset\"] = metadata_offset\n            table.set_columns(**input_data)\n            unpacked_metadatas = tskit.unpack_bytes(\n                table.metadata, table.metadata_offset\n            )\n            assert metadatas == unpacked_metadatas\n\n    def test_drop_metadata(self):\n        for num_rows in [1, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table_no_meta = self.table_class()\n            table_with_meta = self.table_class()\n            table_with_meta.set_columns(**input_data)\n            if not getattr(self, \"metadata_mandatory\", False):\n                del input_data[\"metadata\"]\n                del input_data[\"metadata_offset\"]\n            else:\n                # Have to do this slightly circular way for the population\n                # table because it requires metadata.\n                input_data[\"metadata\"] = []\n                input_data[\"metadata_offset\"][:] = 0\n            table_no_meta.set_columns(**input_data)\n            assert not table_no_meta.equals(table_with_meta)\n            table_with_meta.drop_metadata()\n            table_no_meta.assert_equals(table_with_meta)\n\n    def test_optional_metadata(self):\n        if not getattr(self, \"metadata_mandatory\", False):\n            for num_rows in [0, 10, 100]:\n                input_data = self.make_input_data(num_rows)\n                table = self.table_class()\n                del input_data[\"metadata\"]\n                del input_data[\"metadata_offset\"]\n                table.set_columns(**input_data)\n                assert len(list(table.metadata)) == 0\n                assert list(table.metadata_offset) == [0 for _ in range(num_rows + 1)]\n                # Supplying None is the same not providing the column.\n                input_data[\"metadata\"] = None\n                input_data[\"metadata_offset\"] = None\n                table.set_columns(**input_data)\n                assert len(list(table.metadata)) == 0\n                assert list(table.metadata_offset) == [0 for _ in range(num_rows + 1)]\n\n    def test_packset_metadata(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            metadatas = [tsutil.random_bytes(10) for _ in range(num_rows)]\n            metadata, metadata_offset = tskit.pack_bytes(metadatas)\n            table.packset_metadata(metadatas)\n            assert np.array_equal(table.metadata, metadata)\n            assert np.array_equal(table.metadata_offset, metadata_offset)\n\n    def test_set_metadata_schema(self):\n        metadata_schema2 = metadata.MetadataSchema({\"codec\": \"json\"})\n        table = self.table_class()\n        # Default is no-op metadata codec\n        assert repr(table.metadata_schema) == repr(metadata.MetadataSchema(None))\n        # Set\n        table.metadata_schema = self.metadata_schema\n        assert repr(table.metadata_schema) == repr(self.metadata_schema)\n        # Overwrite\n        table.metadata_schema = metadata_schema2\n        assert repr(table.metadata_schema) == repr(metadata_schema2)\n        # Remove\n        table.metadata_schema = metadata.MetadataSchema(None)\n        assert repr(table.metadata_schema) == repr(metadata.MetadataSchema(None))\n        # Set after remove\n        table.metadata_schema = self.metadata_schema\n        assert repr(table.metadata_schema) == repr(self.metadata_schema)\n        # Del should fail\n        with pytest.raises(AttributeError):\n            del table.metadata_schema\n        # None should fail\n        with pytest.raises(\n            TypeError,\n            match=\"Only instances of tskit.MetadataSchema can be assigned to \"\n            \"metadata_schema, not <class 'NoneType'>\",\n        ):\n            table.metadata_schema = None\n        # And dict\n        with pytest.raises(\n            TypeError,\n            match=\"Only instances of tskit.MetadataSchema can be assigned to \"\n            \"metadata_schema, not <class 'dict'>\",\n        ):\n            table.metadata_schema = {}\n\n    def test_drop_metadata_with_schema(self):\n        table = self.table_class()\n        table.metadata_schema = metadata.MetadataSchema.permissive_json()\n        data = self.input_data_for_add_row()\n        data[\"metadata\"] = {\"a\": \"dict\"}\n        table.add_row(**data)\n        assert table[0].metadata == {\"a\": \"dict\"}\n        table.drop_metadata()\n        assert table.metadata_schema == metadata.MetadataSchema.null()\n        assert table[0].metadata == b\"\"\n\n    def test_drop_metadata_keep_schema(self):\n        table = self.table_class()\n        table.metadata_schema = metadata.MetadataSchema.permissive_json()\n        data = self.input_data_for_add_row()\n        data[\"metadata\"] = {\"a\": \"dict\"}\n        table.add_row(**data)\n        assert table[0].metadata == {\"a\": \"dict\"}\n        table.drop_metadata(keep_schema=True)\n        assert table.metadata_schema == metadata.MetadataSchema.permissive_json()\n        assert table[0].metadata == {}\n\n    def test_default_metadata_schema(self):\n        # Default should allow bytes as in pre-exisiting code\n        table = self.table_class()\n        table.add_row(\n            **{**self.input_data_for_add_row(), \"metadata\": b\"acceptable bytes\"}\n        )\n        # Adding non-bytes metadata should error\n        with pytest.raises(TypeError):\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": self.metadata_example_data(),\n                }\n            )\n\n    def test_default_metadata_add_row(self):\n        row_data = self.input_data_for_add_row()\n        del row_data[\"metadata\"]\n\n        table = self.table_class()\n        table.add_row(**row_data)\n        assert table[0].metadata == b\"\"\n        assert table[0].metadata == table.metadata_schema.empty_value\n\n        table = self.table_class()\n        table.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n        table.add_row(**row_data)\n        assert table[0].metadata == {}\n        assert table[0].metadata == table.metadata_schema.empty_value\n\n    def test_row_round_trip_metadata_schema(self):\n        data = self.metadata_example_data()\n        table = self.table_class()\n        table.metadata_schema = self.metadata_schema\n        table.add_row(**{**self.input_data_for_add_row(), \"metadata\": data})\n        assert table[0].metadata == data\n\n    def test_bad_row_metadata_schema(self):\n        metadata = self.metadata_example_data()\n        metadata[\"I really shouldn't be here\"] = 6\n        table = self.table_class()\n        table.metadata_schema = self.metadata_schema\n        with pytest.raises(exceptions.MetadataValidationError):\n            table.add_row(**{**self.input_data_for_add_row(), \"metadata\": metadata})\n        assert len(table) == 0\n\n    def test_absent_metadata_with_required_schema(self):\n        table = self.table_class()\n        table.metadata_schema = self.metadata_schema\n        input_data = self.input_data_for_add_row()\n        del input_data[\"metadata\"]\n        with pytest.raises(exceptions.MetadataValidationError):\n            table.add_row(**{**input_data})\n\n    def test_unsupported_type(self):\n        table = self.table_class()\n        table.metadata_schema = metadata.MetadataSchema(\n            {\n                \"codec\": \"json\",\n                \"type\": \"object\",\n                \"properties\": {\"an_array\": {\"type\": \"array\"}},\n            }\n        )\n        input_data = self.input_data_for_add_row()\n        # Numpy is not a JSONSchema array\n        input_data[\"metadata\"] = {\"an_array\": np.arange(10)}\n        with pytest.raises(exceptions.MetadataValidationError):\n            table.add_row(**{**input_data})\n\n    def test_round_trip_set_columns(self):\n        for num_rows in [0, 10, 100]:\n            table = self.table_class()\n            table.metadata_schema = self.metadata_schema\n            input_data = self.make_input_data(num_rows)\n            del input_data[\"metadata\"]\n            del input_data[\"metadata_offset\"]\n            metadata_column = [self.metadata_example_data() for _ in range(num_rows)]\n            encoded_metadata_column = [\n                table.metadata_schema.validate_and_encode_row(r) for r in metadata_column\n            ]\n            packed_metadata, metadata_offset = tskit.util.pack_bytes(\n                encoded_metadata_column\n            )\n            table.set_columns(\n                metadata=packed_metadata, metadata_offset=metadata_offset, **input_data\n            )\n            table.append_columns(\n                metadata=packed_metadata, metadata_offset=metadata_offset, **input_data\n            )\n            for j in range(num_rows):\n                assert table[j].metadata == metadata_column[j]\n                assert table[j + num_rows].metadata == metadata_column[j]\n\n    @pytest.mark.parametrize(\n        \"codec\",\n        [\"struct\", \"json\"],\n    )\n    def test_set_null_metadata(self, codec):\n        table = self.table_class()\n        table.metadata_schema = metadata.MetadataSchema(\n            {\n                \"codec\": f\"{codec}\",\n                \"title\": \"Example Metadata\",\n                \"type\": [\"object\", \"null\"],\n                \"properties\": {\n                    \"a\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n                \"required\": [\"a\"],\n                \"additionalProperties\": False,\n            },\n        )\n        examples = [{\"a\": 4}, None]\n        for md in examples:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        assert table.num_rows == len(examples)\n        for md, row in zip(examples, table):\n            assert md == row.metadata\n\n    # only json allows leaving out of optional entries\n    def test_set_empty_metadata_json(self):\n        table = self.table_class()\n        table.metadata_schema = metadata.MetadataSchema(\n            {\n                \"codec\": \"json\",\n                \"title\": \"Example Metadata\",\n                \"type\": [\"object\", \"null\"],\n                \"properties\": {\n                    \"a\": {\"type\": \"number\", \"binaryFormat\": \"i\"},\n                },\n                \"required\": [],\n                \"additionalProperties\": False,\n            },\n        )\n        examples = [{\"a\": 4}, {}]\n        for md in examples:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        assert table.num_rows == len(examples)\n        for md, row in zip(examples, table):\n            assert md == row.metadata\n\n    @pytest.mark.parametrize(\n        \"codec\",\n        [\"struct\", \"json\"],\n    )\n    def test_set_with_optional_properties(self, codec):\n        table = self.table_class()\n        table.metadata_schema = metadata.MetadataSchema(\n            {\n                \"codec\": f\"{codec}\",\n                \"title\": \"Example Metadata\",\n                \"type\": [\"object\", \"null\"],\n                \"properties\": {\n                    \"a\": {\"type\": \"number\", \"binaryFormat\": \"i\", \"default\": 0},\n                },\n                \"additionalProperties\": False,\n            },\n        )\n        metadata_list = [{\"a\": 4}, None, {\"a\": 5}, {}]\n        for md in metadata_list:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        assert table.num_rows == len(metadata_list)\n        for md, row in zip(metadata_list, table):\n            # If None is allowed by the schema it gets used even in the presence of\n            # default values.\n            if isinstance(md, dict):\n                defaults = {\"a\": 0}\n                defaults.update(md)\n                assert defaults == row.metadata\n            else:\n                assert md == row.metadata\n\n    def test_copy_metadata_schema(self):\n        table = self.table_class()\n        assert table.metadata_schema == tskit.MetadataSchema(None)\n        table.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n        copy = table.copy()\n        table.assert_equals(copy)\n        # Check this independently to check the schema cache was invalidated\n        assert table.metadata_schema == copy.metadata_schema\n\n        copy.metadata_schema = tskit.MetadataSchema(None)\n        assert table.metadata_schema != copy.metadata_schema\n\n    def test_set_columns_metadata_schema(self):\n        table = self.table_class()\n        table2 = self.table_class()\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        table2.metadata_schema = ms\n        table.set_columns(**table2.asdict())\n        assert table.metadata_schema == ms\n\n    def verify_metadata_vector(self, table, key, dtype, default_value=9999):\n        # this is just a hack for testing; the actual method\n        # does this more elegantly\n        has_default = default_value != 9999\n        if has_default:\n            md_vec = table.metadata_vector(key, default_value=default_value, dtype=dtype)\n        else:\n            md_vec = table.metadata_vector(key, dtype=dtype)\n        assert isinstance(md_vec, np.ndarray)\n        if dtype is not None:\n            assert md_vec.dtype == np.dtype(dtype)\n        assert len(md_vec) == table.num_rows\n        if not isinstance(key, list):\n            key = [key]\n        for x, row in zip(md_vec, table):\n            md = row.metadata\n            for k in key:\n                if k in md or not has_default:\n                    md = md[k]\n                else:\n                    md = default_value\n                    break\n            assert np.all(np.asarray(md, dtype=dtype) == x)\n\n    def test_metadata_vector_errors(self):\n        table = self.table_class()\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        table.metadata_schema = ms\n        table.add_row(\n            **{\n                **self.input_data_for_add_row(),\n                \"metadata\": None,\n            }\n        )\n        with pytest.raises(KeyError):\n            _ = table.metadata_vector(\"x\")\n        metadata_list = [\n            {\"a\": 4, \"u\": [1, 2]},\n            {},\n        ]\n        for md in metadata_list:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        with pytest.raises(KeyError):\n            _ = table.metadata_vector(\"x\")\n\n        table.clear()\n        metadata_list = [\n            {\"a\": {\"c\": 5}, \"u\": [1, 2]},\n            {\"a\": {\"b\": 6}},\n        ]\n        for md in metadata_list:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        with pytest.raises(KeyError):\n            _ = table.metadata_vector([\"a\", \"x\"])\n\n    def test_metadata_vector_nodefault(self):\n        table = self.table_class()\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        table.metadata_schema = ms\n        metadata_list = [\n            {\"abc\": 4, \"u\": [1, 2]},\n            {\"abc\": 10, \"u\": [3, 4]},\n            {\"abc\": -3, \"b\": {\"c\": 1}, \"u\": [5, 6]},\n            {\"abc\": 1},\n        ]\n        for md in metadata_list:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        # first the totally obvious test\n        md_vec = table.metadata_vector(\"abc\")\n        assert np.all(np.equal(md_vec, [d[\"abc\"] for d in metadata_list]))\n        # now automated ones\n        for dtype in [None, \"int\", \"float\", \"object\"]:\n            self.verify_metadata_vector(\n                table, key=\"abc\", dtype=dtype, default_value=9999\n            )\n            self.verify_metadata_vector(\n                table, key=[\"abc\"], dtype=dtype, default_value=9999\n            )\n\n    def test_metadata_vector(self):\n        table = self.table_class()\n        ms = tskit.MetadataSchema({\"codec\": \"json\"})\n        table.metadata_schema = ms\n        metadata_list = [\n            {\"abc\": 4, \"u\": [1, 2]},\n            {\"abc\": 10, \"u\": [3, 4]},\n            {\"abc\": -3, \"b\": {\"c\": 1}, \"u\": [5, 6]},\n            {\"b\": {\"c\": 3.2}, \"u\": [7, 8]},\n            {\"b\": {\"x\": 8.2}},\n            {},\n            None,\n        ]\n        for md in metadata_list:\n            table.add_row(\n                **{\n                    **self.input_data_for_add_row(),\n                    \"metadata\": md,\n                }\n            )\n        # first the totally obvious test\n        md_vec = table.metadata_vector(\"abc\", default_value=0)\n        assert np.all(\n            np.equal(\n                md_vec,\n                [\n                    d[\"abc\"] if (d is not None and \"abc\" in d) else 0\n                    for d in metadata_list\n                ],\n            )\n        )\n\n        # now some automated ones\n        for dtype in [None, \"int\", \"float\", \"object\"]:\n            self.verify_metadata_vector(table, key=\"abc\", dtype=dtype, default_value=-1)\n            self.verify_metadata_vector(\n                table, key=[\"abc\"], dtype=dtype, default_value=-1\n            )\n            self.verify_metadata_vector(table, key=[\"x\"], dtype=dtype, default_value=-1)\n            self.verify_metadata_vector(\n                table, key=[\"b\", \"c\"], dtype=dtype, default_value=-1\n            )\n        self.verify_metadata_vector(table, key=[\"b\"], dtype=\"object\", default_value=-1)\n        self.verify_metadata_vector(table, key=[\"u\"], dtype=\"int\", default_value=[0, 0])\n        # and finally we should get rectangular arrays when it makes sense\n        md_vec = table.metadata_vector(\"u\", default_value=[0, 0])\n        assert md_vec.shape == (table.num_rows, 2)\n\n\nclass AssertEqualsMixin:\n    def test_equal(self, table_5row, test_rows):\n        table2 = self.table_class()\n        for row in test_rows[:5]:\n            table2.add_row(**row)\n        table_5row.assert_equals(table2)\n\n    def test_type(self, table_5row):\n        with pytest.raises(\n            AssertionError,\n            match=f\"Types differ: self={type(table_5row)} other=<class 'int'>\",\n        ):\n            table_5row.assert_equals(42)\n\n    def test_metadata_schema(self, table_5row):\n        if hasattr(table_5row, \"metadata_schema\"):\n            assert table_5row.metadata_schema == tskit.MetadataSchema(None)\n            table2 = table_5row.copy()\n            table2.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n            with pytest.raises(\n                AssertionError,\n                match=f\"{type(table_5row).__name__} metadata schemas differ: \",\n            ):\n                table_5row.assert_equals(table2)\n            table_5row.assert_equals(table2, ignore_metadata=True)\n\n    def test_degenerate_metadata_schema(self, table_5row):\n        if hasattr(table_5row, \"metadata_schema\"):\n            table_5row.clear()\n            table1 = table_5row.copy()\n            table2 = table_5row.copy()\n            table1.ll_table.metadata_schema = (\n                '{\"codec\": \"json\", \"properties\": '\n                '{\"A\": {\"type\": \"integer\"}, \"B\": {\"type\": \"number\"}}}'\n            )\n            table2.ll_table.metadata_schema = (\n                '{\"codec\": \"json\", \"properties\": '\n                '{\"B\": {\"type\": \"number\"}, \"A\": {\"type\": \"integer\"}}}'\n            )\n            table1.assert_equals(table2)\n\n    def test_row_changes(self, table_5row, test_rows):\n        for column_name in test_rows[0].keys():\n            table2 = self.table_class()\n            for row in test_rows[:4]:\n                table2.add_row(**row)\n            modified_row = {\n                **test_rows[4],\n                **{column_name: test_rows[5][column_name]},\n            }\n            table2.add_row(**modified_row)\n            with pytest.raises(\n                AssertionError,\n                match=re.escape(\n                    f\"{type(table_5row).__name__} row 4 differs:\\n\"\n                    f\"self.{column_name}={test_rows[4][column_name]} \"\n                    f\"other.{column_name}={test_rows[5][column_name]}\"\n                ),\n            ):\n                table_5row.assert_equals(table2)\n            if column_name == \"metadata\":\n                table_5row.assert_equals(table2, ignore_metadata=True)\n            if column_name == \"timestamp\":\n                table_5row.assert_equals(table2, ignore_timestamps=True)\n\n        # Two columns differ, as we don't know the order in the error message\n        # test for both independently\n        for column_name, column_name2 in zip(\n            list(test_rows[0].keys())[:-1], list(test_rows[0].keys())[1:]\n        ):\n            table2 = self.table_class()\n            for row in test_rows[:4]:\n                table2.add_row(**row)\n            modified_row = {\n                **test_rows[4],\n                **{\n                    column_name: test_rows[5][column_name],\n                    column_name2: test_rows[5][column_name2],\n                },\n            }\n            table2.add_row(**modified_row)\n            with pytest.raises(\n                AssertionError,\n                match=re.escape(\n                    f\"self.{column_name}={test_rows[4][column_name]} \"\n                    f\"other.{column_name}={test_rows[5][column_name]}\"\n                ),\n            ):\n                table_5row.assert_equals(table2)\n            with pytest.raises(\n                AssertionError,\n                match=re.escape(\n                    f\"self.{column_name2}={test_rows[4][column_name2]} \"\n                    f\"other.{column_name2}={test_rows[5][column_name2]}\"\n                ),\n            ):\n                table_5row.assert_equals(table2)\n\n    def test_num_rows(self, table_5row, test_rows):\n        table2 = self.table_class()\n        for row in test_rows[:4]:\n            table2.add_row(**row)\n        with pytest.raises(\n            AssertionError,\n            match=f\"{type(table_5row).__name__} number of rows differ: self=5 other=4\",\n        ):\n            table_5row.assert_equals(table2)\n\n    def test_metadata(self, table_5row, test_rows):\n        if \"metadata\" in test_rows[0].keys():\n            table2 = self.table_class()\n            for row in test_rows[:4]:\n                table2.add_row(**row)\n            modified_row = {\n                **test_rows[4],\n                **{\"metadata\": test_rows[5][\"metadata\"]},\n            }\n            table2.add_row(**modified_row)\n            with pytest.raises(\n                AssertionError,\n                match=re.escape(\n                    f\"{type(table_5row).__name__} row 4 differs:\\n\"\n                    f\"self.metadata={test_rows[4]['metadata']} \"\n                    f\"other.metadata={test_rows[5]['metadata']}\"\n                ),\n            ):\n                table_5row.assert_equals(table2)\n            table_5row.assert_equals(table2, ignore_metadata=True)\n\n    def test_timestamp(self, table_5row, test_rows):\n        if \"timestamp\" in test_rows[0].keys():\n            table2 = self.table_class()\n            for row in test_rows[:4]:\n                table2.add_row(**row)\n            modified_row = {\n                **test_rows[4],\n                **{\"timestamp\": test_rows[5][\"timestamp\"]},\n            }\n            table2.add_row(**modified_row)\n            with pytest.raises(\n                AssertionError,\n                match=re.escape(\n                    f\"{type(table_5row).__name__} row 4 differs:\\n\"\n                    f\"self.timestamp={test_rows[4]['timestamp']} \"\n                    f\"other.timestamp={test_rows[5]['timestamp']}\"\n                ),\n            ):\n                table_5row.assert_equals(table2)\n            table_5row.assert_equals(table2, ignore_timestamps=True)\n\n\nclass FancyIndexingMixin:\n    @pytest.mark.parametrize(\n        \"slic\",\n        [\n            slice(None, None),\n            slice(None, 3),\n            slice(2, None),\n            slice(1, 4),\n            slice(4, 1),\n            slice(1, 4, 2),\n            slice(4, 1, 2),\n            slice(4, 1, -1),\n            slice(1, 4, -1),\n            slice(3, None, -1),\n            slice(None, 3, -1),\n            slice(None, None, -2),\n        ],\n    )\n    def test_slice(self, table, test_rows, slic):\n        assert table.num_rows >= 5\n        expected = table.copy()\n        expected.truncate(0)\n        for row in test_rows[slic]:\n            expected.add_row(**row)\n        table[slic].assert_equals(expected)\n\n    @pytest.mark.parametrize(\n        \"mask\",\n        [\n            [False] * 5,\n            [True] * 5,\n            [True] + [False] * 4,\n            [False, True, False, True, True],\n        ],\n    )\n    def test_boolean_array(self, table_5row, test_rows, mask):\n        assert table_5row.num_rows >= 5\n        expected = table_5row.copy()\n        expected.truncate(0)\n        for flag, row in zip(mask, test_rows[:5]):\n            if flag:\n                expected.add_row(**row)\n        table_5row[mask].assert_equals(expected)\n\n    @pytest.mark.parametrize(\n        \"index_array\",\n        [\n            [],\n            [0],\n            [4],\n            random.choices(range(5), k=100),\n            np.array([0, 0, 0, 2], dtype=np.uint64),\n            np.array([2, 4, 4, 0], dtype=np.int64),\n            np.array([0, 0, 0, 2], dtype=np.uint32),\n            np.array([2, 4, 4, 0], dtype=np.int32),\n            np.array([4, 3, 4, 1], dtype=np.uint8),\n            np.array([4, 3, 4, 1], dtype=np.int8),\n        ],\n    )\n    def test_index_array(self, table_5row, index_array):\n        assert table_5row.num_rows >= 5\n        expected = table_5row.copy()\n        expected.truncate(0)\n        for index in index_array:\n            expected.append(table_5row[index])\n        table_5row[index_array].assert_equals(expected)\n        table_5row[tuple(index_array)].assert_equals(expected)\n\n    def test_index_range(self, table_5row):\n        expected = table_5row.copy()\n        expected.truncate(0)\n        for index in range(2, 4):\n            expected.append(table_5row[index])\n        table_5row[range(2, 4)].assert_equals(expected)\n\n    @pytest.mark.parametrize(\n        \"dtype\",\n        [\n            np.float32,\n            np.float64,\n            object,\n            str,\n        ],\n    )\n    def test_bad_dtypes(self, table, dtype):\n        with pytest.raises(TypeError):\n            table[np.zeros((10,), dtype=np.float32)]\n\n    @pytest.mark.parametrize(\n        \"dtype\",\n        [\n            np.uint32,\n            np.int64,\n            np.uint64,\n        ],\n    )\n    def test_bad_casts(self, table, dtype):\n        with pytest.raises(OverflowError, match=\"Cannot convert safely\"):\n            table[np.asarray([np.iinfo(np.int32).max + 1], dtype=dtype)]\n\n    def test_extrema(self, table):\n        max_ = np.iinfo(np.int32).max\n        with pytest.raises(OverflowError, match=\"Cannot convert safely\"):\n            table[[max_ + 1]]\n\n        # Slice gets clipped to valid range\n        copy = table.copy()\n        copy.clear()\n        table[max_ + 1 : max_ + 2].assert_equals(copy)\n\n        with pytest.raises(OverflowError, match=\"Cannot convert safely\"):\n            table[range(max_ + 1, max_ + 2)]\n\n    @pytest.mark.parametrize(\n        \"bad_shape\",\n        [\n            [[0]],\n            [[1, 2], [3, 4]],\n        ],\n    )\n    def test_bad_shapes(self, table, bad_shape):\n        with pytest.raises(ValueError, match=\"object too deep\"):\n            table[bad_shape]\n\n    def test_bad_bool_length(self, table):\n        with pytest.raises(\n            IndexError, match=\"Boolean index must be same length as table\"\n        ):\n            table[[False] * (len(table) + 1)]\n        with pytest.raises(\n            IndexError, match=\"Boolean index must be same length as table\"\n        ):\n            table[[False]]\n\n    def test_bad_indexes(self, table):\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            table[[-1]]\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            table[range(-5, 0)]\n        with pytest.raises(_tskit.LibraryError, match=\"out of bounds\"):\n            table[[len(table)]]\n        with pytest.raises(TypeError, match=\"Cannot cast\"):\n            table[[5.5]]\n        with pytest.raises(TypeError, match=\"Cannot convert\"):\n            table[[None]]\n        with pytest.raises(TypeError, match=\"not supported|did not contain\"):\n            table[[\"foobar\"]]\n        with pytest.raises(TypeError, match=\"Index must be integer, slice or iterable\"):\n            table[5.5]\n        with pytest.raises(TypeError, match=\"Cannot convert to a rectangular array\"):\n            table[None]\n        with pytest.raises(TypeError, match=\"not supported|did not contain\"):\n            table[\"foobar\"]\n\n\ncommon_tests = [\n    CommonTestsMixin,\n    MetadataTestsMixin,\n    AssertEqualsMixin,\n    FancyIndexingMixin,\n]\n\n\nclass TestIndividualTable(*common_tests):\n    columns = [UInt32Column(\"flags\")]\n    ragged_list_columns = [\n        (DoubleColumn(\"location\"), UInt32Column(\"location_offset\")),\n        (Int32Column(\"parents\"), UInt32Column(\"parents_offset\")),\n        (CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\")),\n    ]\n    string_colnames = []\n    binary_colnames = [\"metadata\"]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    equal_len_columns = [[\"flags\"]]\n    table_class = tskit.IndividualTable\n\n    def test_simple_example(self):\n        t = tskit.IndividualTable()\n        t.add_row(flags=0, location=[], parents=[], metadata=b\"123\")\n        t.add_row(flags=1, location=(0, 1, 2, 3), parents=(4, 5, 6, 7), metadata=b\"\\xf0\")\n        s = str(t)\n        assert len(s) > 0\n        assert len(t) == 2\n        assert t[0].flags == 0\n        assert list(t[0].location) == []\n        assert list(t[0].parents) == []\n        assert t[0].metadata == b\"123\"\n        assert t[1].flags == 1\n        assert list(t[1].location) == [0, 1, 2, 3]\n        assert list(t[1].parents) == [4, 5, 6, 7]\n        assert t[1].metadata == b\"\\xf0\"\n        with pytest.raises(IndexError):\n            t.__getitem__(-4)\n\n    def test_add_row_defaults(self):\n        t = tskit.IndividualTable()\n        assert t.add_row() == 0\n        assert t.flags[0] == 0\n        assert len(t.location) == 0\n        assert t.location_offset[0] == 0\n        assert len(t.parents) == 0\n        assert t.parents_offset[0] == 0\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n    def test_add_row_bad_data(self):\n        t = tskit.IndividualTable()\n        with pytest.raises(TypeError):\n            t.add_row(flags=\"x\")\n        with pytest.raises(TypeError):\n            t.add_row(metadata=123)\n        with pytest.raises(ValueError):\n            t.add_row(location=\"1234\")\n        with pytest.raises(ValueError):\n            t.add_row(parents=\"forty-two\")\n\n    def test_packset_location(self):\n        t = tskit.IndividualTable()\n        t.add_row(flags=0)\n        t.packset_location([[0.125, 2]])\n        assert list(t[0].location) == [0.125, 2]\n        t.add_row(flags=1)\n        assert list(t[1].location) == []\n        t.packset_location([[0], [1, 2, 3]])\n        assert list(t[0].location) == [0]\n        assert list(t[1].location) == [1, 2, 3]\n\n    def test_packset_parents(self):\n        t = tskit.IndividualTable()\n        t.add_row(flags=0)\n        t.packset_parents([[0, 2]])\n        assert list(t[0].parents) == [0, 2]\n        t.add_row(flags=1)\n        assert list(t[1].parents) == []\n        t.packset_parents([[0], [1, 2, 3]])\n        assert list(t[0].parents) == [0]\n        assert list(t[1].parents) == [1, 2, 3]\n\n    def test_missing_time_equal_to_self(self):\n        t = tskit.TableCollection(sequence_length=10)\n        t.sites.add_row(position=1, ancestral_state=\"0\")\n        t.mutations.add_row(site=0, node=0, derived_state=\"1\", time=tskit.UNKNOWN_TIME)\n        assert t.mutations[0] == t.mutations[0]\n\n    def test_various_not_equals(self):\n        args = {\n            \"site\": 0,\n            \"node\": 0,\n            \"derived_state\": \"a\",\n            \"parent\": 0,\n            \"metadata\": b\"abc\",\n            \"time\": 0,\n        }\n        a = tskit.MutationTableRow(**args)\n        assert a != []\n        assert a != 12\n        assert a is not None\n        b = tskit.MutationTableRow(**args)\n        assert a == b\n        args[\"site\"] = 2\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        args[\"site\"] = 0\n        args[\"node\"] = 2\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        args[\"node\"] = 0\n        args[\"derived_state\"] = \"b\"\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        args[\"derived_state\"] = \"a\"\n        args[\"parent\"] = 2\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        args[\"parent\"] = 0\n        args[\"metadata\"] = b\"\"\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        args[\"metadata\"] = b\"abc\"\n        args[\"time\"] = 1\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        args[\"time\"] = 0\n        args[\"time\"] = tskit.UNKNOWN_TIME\n        b = tskit.MutationTableRow(**args)\n        assert a != b\n        a = tskit.MutationTableRow(**args)\n        assert a == b\n\n    def test_keep_rows_data(self):\n        input_data = self.make_input_data(100)\n        t1 = self.table_class()\n        # Set the parent column to -1s for this simple test as\n        # we need to reason about reference integrity\n        t1.append_columns(**input_data)\n        t1.parents = np.full_like(t1.parents, -1)\n        t2 = t1.copy()\n        keep = np.ones(len(t1), dtype=bool)\n        # Only keep even\n        keep[::2] = 0\n        t1.keep_rows(keep)\n        keep_rows_definition(t2, keep)\n        assert t1.equals(t2)\n\n\nclass TestNodeTable(*common_tests):\n    columns = [\n        UInt32Column(\"flags\"),\n        DoubleColumn(\"time\"),\n        Int32Column(\"individual\"),\n        Int32Column(\"population\"),\n    ]\n    ragged_list_columns = [(CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\"))]\n    string_colnames = []\n    binary_colnames = [\"metadata\"]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    equal_len_columns = [[\"time\", \"flags\", \"population\"]]\n    table_class = tskit.NodeTable\n\n    def test_simple_example(self):\n        t = tskit.NodeTable()\n        t.add_row(flags=0, time=1, population=2, individual=0, metadata=b\"123\")\n        t.add_row(flags=1, time=2, population=3, individual=1, metadata=b\"\\xf0\")\n        s = str(t)\n        assert len(s) > 0\n        assert len(t) == 2\n        assert dataclasses.astuple(t[0]) == (0, 1, 2, 0, b\"123\")\n        assert dataclasses.astuple(t[1]) == (1, 2, 3, 1, b\"\\xf0\")\n        assert t[0].flags == 0\n        assert t[0].time == 1\n        assert t[0].population == 2\n        assert t[0].individual == 0\n        assert t[0].metadata == b\"123\"\n        assert t[0] == t[-2]\n        assert t[1] == t[-1]\n        with pytest.raises(IndexError):\n            t.__getitem__(-3)\n\n    def test_add_row_defaults(self):\n        t = tskit.NodeTable()\n        assert t.add_row() == 0\n        assert t.time[0] == 0\n        assert t.flags[0] == 0\n        assert t.population[0] == tskit.NULL\n        assert t.individual[0] == tskit.NULL\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n    def test_optional_population(self):\n        for num_rows in [0, 10, 100]:\n            metadatas = [str(j) for j in range(num_rows)]\n            metadata, metadata_offset = tskit.pack_strings(metadatas)\n            flags = list(range(num_rows))\n            time = list(range(num_rows))\n            table = tskit.NodeTable()\n            table.set_columns(\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                flags=flags,\n                time=time,\n            )\n            assert list(table.population) == [-1 for _ in range(num_rows)]\n            assert list(table.flags) == flags\n            assert list(table.time) == time\n            assert list(table.metadata) == list(metadata)\n            assert list(table.metadata_offset) == list(metadata_offset)\n            table.set_columns(flags=flags, time=time, population=None)\n            assert list(table.population) == [-1 for _ in range(num_rows)]\n            assert list(table.flags) == flags\n            assert list(table.time) == time\n\n    def test_add_row_bad_data(self):\n        t = tskit.NodeTable()\n        with pytest.raises(TypeError):\n            t.add_row(flags=\"x\")\n        with pytest.raises(TypeError):\n            t.add_row(time=\"x\")\n        with pytest.raises(TypeError):\n            t.add_row(individual=\"x\")\n        with pytest.raises(TypeError):\n            t.add_row(population=\"x\")\n        with pytest.raises(TypeError):\n            t.add_row(metadata=123)\n\n\nclass TestEdgeTable(*common_tests):\n    columns = [\n        DoubleColumn(\"left\"),\n        DoubleColumn(\"right\"),\n        Int32Column(\"parent\"),\n        Int32Column(\"child\"),\n    ]\n    equal_len_columns = [[\"left\", \"right\", \"parent\", \"child\"]]\n    string_colnames = []\n    binary_colnames = [\"metadata\"]\n    ragged_list_columns = [(CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\"))]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    table_class = tskit.EdgeTable\n\n    def test_simple_example(self):\n        t = tskit.EdgeTable()\n        t.add_row(left=0, right=1, parent=2, child=3, metadata=b\"123\")\n        t.add_row(1, 2, 3, 4, b\"\\xf0\")\n        assert len(t) == 2\n        assert dataclasses.astuple(t[0]) == (0, 1, 2, 3, b\"123\")\n        assert dataclasses.astuple(t[1]) == (1, 2, 3, 4, b\"\\xf0\")\n        assert t[0].left == 0\n        assert t[0].right == 1\n        assert t[0].parent == 2\n        assert t[0].child == 3\n        assert t[0].metadata == b\"123\"\n        assert t[0] == t[-2]\n        assert t[1] == t[-1]\n        with pytest.raises(IndexError):\n            t.__getitem__(-3)\n\n    def test_add_row_defaults(self):\n        t = tskit.EdgeTable()\n        assert t.add_row(0, 0, 0, 0) == 0\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n    def test_add_row_bad_data(self):\n        t = tskit.EdgeTable()\n        with pytest.raises(TypeError):\n            t.add_row(left=\"x\", right=0, parent=0, child=0)\n        with pytest.raises(TypeError):\n            t.add_row()\n        with pytest.raises(TypeError):\n            t.add_row(0, 0, 0, 0, metadata=123)\n\n\nclass TestSiteTable(*common_tests):\n    columns = [DoubleColumn(\"position\")]\n    ragged_list_columns = [\n        (CharColumn(\"ancestral_state\"), UInt32Column(\"ancestral_state_offset\")),\n        (CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\")),\n    ]\n    equal_len_columns = [[\"position\"]]\n    string_colnames = [\"ancestral_state\"]\n    binary_colnames = [\"metadata\"]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    table_class = tskit.SiteTable\n\n    def test_simple_example(self):\n        t = tskit.SiteTable()\n        t.add_row(position=0, ancestral_state=\"1\", metadata=b\"2\")\n        t.add_row(1, \"2\", b\"\\xf0\")\n        s = str(t)\n        assert len(s) > 0\n        assert len(t) == 2\n        assert dataclasses.astuple(t[0]) == (0, \"1\", b\"2\")\n        assert dataclasses.astuple(t[1]) == (1, \"2\", b\"\\xf0\")\n        assert t[0].position == 0\n        assert t[0].ancestral_state == \"1\"\n        assert t[0].metadata == b\"2\"\n        assert t[0] == t[-2]\n        assert t[1] == t[-1]\n        with pytest.raises(IndexError):\n            t.__getitem__(2)\n        with pytest.raises(IndexError):\n            t.__getitem__(-3)\n\n    def test_add_row_bad_data(self):\n        t = tskit.SiteTable()\n        t.add_row(0, \"A\")\n        with pytest.raises(TypeError):\n            t.add_row(\"x\", \"A\")\n        with pytest.raises(TypeError):\n            t.add_row(0, 0)\n        with pytest.raises(TypeError):\n            t.add_row(0, \"A\", metadata=[0, 1, 2])\n\n    def test_packset_ancestral_state(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            ancestral_states = [tsutil.random_strings(10) for _ in range(num_rows)]\n            ancestral_state, ancestral_state_offset = tskit.pack_strings(\n                ancestral_states\n            )\n            table.packset_ancestral_state(ancestral_states)\n            assert np.array_equal(table.ancestral_state, ancestral_state)\n            assert np.array_equal(table.ancestral_state_offset, ancestral_state_offset)\n\n\nclass TestMutationTable(*common_tests):\n    columns = [\n        Int32Column(\"site\"),\n        Int32Column(\"node\"),\n        DoubleColumn(\"time\"),\n        Int32Column(\"parent\"),\n    ]\n    ragged_list_columns = [\n        (CharColumn(\"derived_state\"), UInt32Column(\"derived_state_offset\")),\n        (CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\")),\n    ]\n    equal_len_columns = [[\"site\", \"node\", \"time\"]]\n    string_colnames = [\"derived_state\"]\n    binary_colnames = [\"metadata\"]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    table_class = tskit.MutationTable\n\n    def test_simple_example(self):\n        t = tskit.MutationTable()\n        t.add_row(site=0, node=1, derived_state=\"2\", parent=3, metadata=b\"4\", time=5)\n        t.add_row(1, 2, \"3\", 4, b\"\\xf0\", 6)\n        t.add_row(\n            site=0,\n            node=1,\n            derived_state=\"2\",\n            parent=3,\n            metadata=b\"4\",\n            time=tskit.UNKNOWN_TIME,\n        )\n        s = str(t)\n        assert len(s) > 0\n        assert len(t) == 3\n        assert dataclasses.astuple(t[0]) == (0, 1, \"2\", 3, b\"4\", 5)\n        assert dataclasses.astuple(t[1]) == (1, 2, \"3\", 4, b\"\\xf0\", 6)\n        assert t[0].site == 0\n        assert t[0].node == 1\n        assert t[0].derived_state == \"2\"\n        assert t[0].parent == 3\n        assert t[0].metadata == b\"4\"\n        assert t[0].time == 5\n        assert t[0] == t[-3]\n        assert t[1] == t[-2]\n        assert t[2] == t[-1]\n        with pytest.raises(IndexError):\n            t.__getitem__(-4)\n\n    def test_add_row_defaults(self):\n        t = tskit.MutationTable()\n        assert t.add_row(0, 0, \"A\", 0) == 0\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n        assert tskit.is_unknown_time(t.time[0])\n\n    def test_add_row_bad_data(self):\n        t = tskit.MutationTable()\n        t.add_row(0, 0, \"A\")\n        with pytest.raises(TypeError):\n            t.add_row(\"0\", 0, \"A\")\n        with pytest.raises(TypeError):\n            t.add_row(0, \"0\", \"A\")\n        with pytest.raises(TypeError):\n            t.add_row(0, 0, \"A\", parent=None)\n        with pytest.raises(TypeError):\n            t.add_row(0, 0, \"A\", metadata=[0])\n        with pytest.raises(TypeError):\n            t.add_row(0, 0, \"A\", time=\"A\")\n\n    def test_packset_derived_state(self):\n        for num_rows in [0, 10, 100]:\n            input_data = self.make_input_data(num_rows)\n            table = self.table_class()\n            table.set_columns(**input_data)\n            derived_states = [tsutil.random_strings(10) for _ in range(num_rows)]\n            derived_state, derived_state_offset = tskit.pack_strings(derived_states)\n            table.packset_derived_state(derived_states)\n            assert np.array_equal(table.derived_state, derived_state)\n            assert np.array_equal(table.derived_state_offset, derived_state_offset)\n\n    def test_keep_rows_data(self):\n        input_data = self.make_input_data(100)\n        t1 = self.table_class()\n        # Set the parent column to -1s for this simple test as\n        # we need to reason about reference integrity\n        t1.append_columns(**input_data)\n        t1.parent = np.full_like(t1.parent, -1)\n        t2 = t1.copy()\n        keep = np.ones(len(t1), dtype=bool)\n        # Only keep even\n        keep[::2] = 0\n        t1.keep_rows(keep)\n        keep_rows_definition(t2, keep)\n        assert t1.equals(t2)\n\n\nclass TestMigrationTable(*common_tests):\n    columns = [\n        DoubleColumn(\"left\"),\n        DoubleColumn(\"right\"),\n        Int32Column(\"node\"),\n        Int32Column(\"source\"),\n        Int32Column(\"dest\"),\n        DoubleColumn(\"time\"),\n    ]\n    ragged_list_columns = [(CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\"))]\n    string_colnames = []\n    binary_colnames = [\"metadata\"]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    equal_len_columns = [[\"left\", \"right\", \"node\", \"source\", \"dest\", \"time\"]]\n    table_class = tskit.MigrationTable\n\n    def test_simple_example(self):\n        t = tskit.MigrationTable()\n        t.add_row(left=0, right=1, node=2, source=3, dest=4, time=5, metadata=b\"123\")\n        t.add_row(1, 2, 3, 4, 5, 6, b\"\\xf0\")\n        assert len(t) == 2\n        assert dataclasses.astuple(t[0]) == (0, 1, 2, 3, 4, 5, b\"123\")\n        assert dataclasses.astuple(t[1]) == (1, 2, 3, 4, 5, 6, b\"\\xf0\")\n        assert t[0].left == 0\n        assert t[0].right == 1\n        assert t[0].node == 2\n        assert t[0].source == 3\n        assert t[0].dest == 4\n        assert t[0].time == 5\n        assert t[0].metadata == b\"123\"\n        assert t[0] == t[-2]\n        assert t[1] == t[-1]\n        with pytest.raises(IndexError):\n            t.__getitem__(-3)\n\n    def test_add_row_defaults(self):\n        t = tskit.MigrationTable()\n        assert t.add_row(0, 0, 0, 0, 0, 0) == 0\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n    def test_add_row_bad_data(self):\n        t = tskit.MigrationTable()\n        with pytest.raises(TypeError):\n            t.add_row(left=\"x\", right=0, node=0, source=0, dest=0, time=0)\n        with pytest.raises(TypeError):\n            t.add_row()\n        with pytest.raises(TypeError):\n            t.add_row(0, 0, 0, 0, 0, 0, metadata=123)\n\n\nclass TestProvenanceTable(CommonTestsMixin, AssertEqualsMixin):\n    columns = []\n    ragged_list_columns = [\n        (CharColumn(\"timestamp\"), UInt32Column(\"timestamp_offset\")),\n        (CharColumn(\"record\"), UInt32Column(\"record_offset\")),\n    ]\n    equal_len_columns = [[]]\n    string_colnames = [\"record\", \"timestamp\"]\n    binary_colnames = []\n    input_parameters = [(\"max_rows_increment\", 0)]\n    table_class = tskit.ProvenanceTable\n\n    def test_simple_example(self):\n        t = tskit.ProvenanceTable()\n        t.add_row(timestamp=\"0\", record=\"1\")\n        t.add_row(\"2\", \"1\")  # The orders are reversed for default timestamp.\n        assert len(t) == 2\n        assert dataclasses.astuple(t[0]) == (\"0\", \"1\")\n        assert dataclasses.astuple(t[1]) == (\"1\", \"2\")\n        assert t[0].timestamp == \"0\"\n        assert t[0].record == \"1\"\n        assert t[0] == t[-2]\n        assert t[1] == t[-1]\n        with pytest.raises(IndexError):\n            t.__getitem__(-3)\n\n    def test_add_row_bad_data(self):\n        t = tskit.ProvenanceTable()\n        t.add_row(\"a\", \"b\")\n        with pytest.raises(TypeError):\n            t.add_row(0, \"b\")\n        with pytest.raises(TypeError):\n            t.add_row(\"a\", 0)\n\n    def test_packset_timestamp(self):\n        t = tskit.ProvenanceTable()\n        t.add_row(timestamp=\"0\", record=\"1\")\n        t.add_row(timestamp=\"1\", record=\"2\")\n        t.packset_timestamp([\"AAAA\", \"BBBB\"])\n        assert t[0].timestamp == \"AAAA\"\n        assert t[1].timestamp == \"BBBB\"\n\n    def test_packset_record(self):\n        t = tskit.ProvenanceTable()\n        t.add_row(timestamp=\"0\", record=\"1\")\n        t.add_row(timestamp=\"1\", record=\"2\")\n        t.packset_record([\"AAAA\", \"BBBB\"])\n        assert t[0].record == \"AAAA\"\n        assert t[1].record == \"BBBB\"\n\n    def test_assert_equals_ignore_timestamps_with_other_difference(self):\n        t1 = tskit.ProvenanceTable()\n        t1.add_row(record=\"{}\", timestamp=\"2024-01-01T00:00:00Z\")\n        t2 = tskit.ProvenanceTable()\n        t2.add_row(record=\"different\", timestamp=\"2024-02-01T00:00:00Z\")\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\n                \"ProvenanceTable row 0 differs:\\nself.record={} other.record=different\"\n            ),\n        ):\n            t1.assert_equals(t2, ignore_timestamps=True)\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\n                \"ProvenanceTable row 0 differs:\\nself.record=different other.record={}\"\n            ),\n        ):\n            t2.assert_equals(t1, ignore_timestamps=True)\n\n\nclass TestPopulationTable(*common_tests):\n    metadata_mandatory = True\n    columns = []\n    ragged_list_columns = [(CharColumn(\"metadata\"), UInt32Column(\"metadata_offset\"))]\n    equal_len_columns = [[]]\n    string_colnames = []\n    binary_colnames = [\"metadata\"]\n    input_parameters = [(\"max_rows_increment\", 0)]\n    table_class = tskit.PopulationTable\n\n    def test_simple_example(self):\n        t = tskit.PopulationTable()\n        t.add_row(metadata=b\"\\xf0\")\n        t.add_row(b\"1\")\n        s = str(t)\n        assert len(s) > 0\n        assert len(t) == 2\n        assert dataclasses.astuple(t[0]) == (b\"\\xf0\",)\n        assert t[0].metadata == b\"\\xf0\"\n        assert dataclasses.astuple(t[1]) == (b\"1\",)\n        with pytest.raises(IndexError):\n            t.__getitem__(-3)\n\n    def test_add_row_defaults(self):\n        t = tskit.PopulationTable()\n        assert t.add_row() == 0\n        assert len(t.metadata) == 0\n        assert t.metadata_offset[0] == 0\n\n    def test_add_row_bad_data(self):\n        t = tskit.PopulationTable()\n        t.add_row()\n        with pytest.raises(TypeError):\n            t.add_row(metadata=[0])\n\n\nclass TestTableCollectionIndexes:\n    def test_index(self):\n        i = np.arange(20)\n        r = np.arange(20)[::-1]\n        index = tskit.TableCollectionIndexes(\n            edge_insertion_order=i, edge_removal_order=r\n        )\n        assert np.array_equal(index.edge_insertion_order, i)\n        assert np.array_equal(index.edge_removal_order, r)\n        d = index.asdict()\n        assert np.array_equal(d[\"edge_insertion_order\"], i)\n        assert np.array_equal(d[\"edge_removal_order\"], r)\n\n        index = tskit.TableCollectionIndexes()\n        assert index.edge_insertion_order is None\n        assert index.edge_removal_order is None\n        assert index.asdict() == {}\n\n\nclass TestSortTables:\n    \"\"\"\n    Tests for the TableCollection.sort() and TableCollection.canonicalise() methods.\n    \"\"\"\n\n    random_seed = 12345\n\n    def verify_sort_equality(self, tables, seed):\n        tables1 = tables.copy()\n        tsutil.shuffle_tables(\n            tables1,\n            seed,\n            shuffle_populations=False,\n        )\n        tables1.individuals.packset_metadata(\n            [bytes(str(i), \"utf-8\") for i in range(tables1.individuals.num_rows)]\n        )\n        tables2 = tables1.copy()\n        tables1.sort()\n        tsutil.py_sort(tables2)\n        # Check that both are valid tree sequences\n        tables1.tree_sequence()\n        tables2.tree_sequence()\n        tables1.assert_equals(tables2)\n\n    def verify_canonical_equality(self, tables, seed):\n        # Migrations not supported\n        tables.migrations.clear()\n\n        for ru in [True, False]:\n            tsk_tables = tables.copy()\n            tsutil.shuffle_tables(\n                tsk_tables,\n                seed,\n            )\n            py_tables = tsk_tables.copy()\n            tsk_tables.canonicalise(remove_unreferenced=ru)\n            tsutil.py_canonicalise(py_tables, remove_unreferenced=ru)\n            tsk_tables.assert_equals(py_tables)\n\n    def verify_sort_mutation_consistency(self, orig_tables, seed):\n        tables = orig_tables.copy()\n        mut_map = {s.position: [] for s in tables.sites}\n        for mut in tables.mutations:\n            mut_map[tables.sites[mut.site].position].append(\n                (mut.node, mut.derived_state, mut.metadata)\n            )\n        tsutil.shuffle_tables(tables, seed, shuffle_populations=False)\n        for mut in tables.mutations:\n            site = tables.sites[mut.site]\n            assert (mut.node, mut.derived_state, mut.metadata) in mut_map[site.position]\n        tables.sort()\n        for mut in tables.mutations:\n            site = tables.sites[mut.site]\n            assert (mut.node, mut.derived_state, mut.metadata) in mut_map[site.position]\n\n    def verify_randomise_tables(self, orig_tables, seed):\n        # Check we can shuffle everything and then put it back in canonical form\n        tables = orig_tables.copy()\n        tables.sort()\n        sorted_tables = tables.copy()\n\n        # First randomize only edges: this should work without canonical sorting.\n        tsutil.shuffle_tables(\n            tables,\n            seed=seed,\n            shuffle_edges=True,\n            shuffle_populations=False,\n            shuffle_individuals=False,\n            shuffle_sites=False,\n            shuffle_mutations=False,\n        )\n        tables.sort()\n        tables.assert_equals(sorted_tables)\n\n        # Now also randomize sites, mutations and individuals\n        tables.canonicalise(remove_unreferenced=False)\n        sorted_tables = tables.copy()\n        tsutil.shuffle_tables(\n            tables,\n            seed=1234,\n            shuffle_populations=False,\n        )\n        tables.canonicalise(remove_unreferenced=False)\n        tables.assert_equals(sorted_tables)\n\n        # Finally, randomize everything else\n        tsutil.shuffle_tables(tables, seed=1234)\n        tables.canonicalise(remove_unreferenced=False)\n        tables.assert_equals(sorted_tables)\n\n        # Check the canonicalised form meets the tree sequence requirements\n        tables.tree_sequence()\n\n    def verify_sort(self, tables, seed):\n        self.verify_sort_equality(tables, seed)\n        self.verify_canonical_equality(tables, seed)\n        self.verify_sort_mutation_consistency(tables, seed)\n        self.verify_randomise_tables(tables, seed)\n\n    def verify_sort_offset(self, ts):\n        \"\"\"\n        Verifies the behaviour of the edge_start offset value.\n        \"\"\"\n        tables = ts.dump_tables()\n        edges = tables.edges.copy()\n        starts = [0]\n        if len(edges) > 2:\n            starts = [0, 1, len(edges) // 2, len(edges) - 2]\n        for start in starts:\n            # Unsort the edges starting from index start\n            all_edges = list(ts.edges())\n            keep = all_edges[:start]\n            reversed_edges = all_edges[start:][::-1]\n            all_edges = keep + reversed_edges\n            tables.edges.clear()\n            for e in all_edges:\n                tables.edges.append(e)\n            # Verify that import fails for reversed edges\n            with pytest.raises(_tskit.LibraryError):\n                tables.tree_sequence()\n            # If we sort after the start value we should still fail.\n            tables.sort(edge_start=start + 1)\n            with pytest.raises(_tskit.LibraryError):\n                tables.tree_sequence()\n            # Sorting from the correct index should give us back the original table.\n            tables.edges.clear()\n            for e in all_edges:\n                tables.edges.append(e)\n            tables.sort(edge_start=start)\n            # Verify the new and old edges are equal.\n            assert edges == tables.edges\n\n        tables.tree_sequence()\n        if len(tables.mutations) > 2:\n            mutations = tables.mutations.copy()\n            tables.mutations.clear()\n            for m in mutations[::-1]:\n                tables.mutations.append(m)\n            with pytest.raises(_tskit.LibraryError):\n                tables.tree_sequence()\n            tables.sort(\n                0, site_start=len(tables.sites), mutation_start=len(tables.mutations)\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.tree_sequence()\n            tables.sort(0)\n            tables.tree_sequence()\n\n    def get_wf_example(self, seed):\n        tables = wf.wf_sim(\n            6,\n            3,\n            num_pops=2,\n            seed=seed,\n            num_loci=3,\n            record_migrations=True,\n        )\n        tables.sort()\n        ts = tables.tree_sequence()\n        return ts\n\n    def test_wf_example(self):\n        tables = wf.wf_sim(\n            N=6,\n            ngens=3,\n            num_pops=2,\n            mig_rate=1.0,\n            deep_history=False,\n            seed=42,\n            record_migrations=True,\n        )\n        self.verify_sort(tables, 42)\n\n    def test_single_tree_no_mutations(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        self.verify_sort_offset(ts)\n        self.verify_sort(ts.dump_tables(), 432)\n\n    def test_single_tree_no_mutations_metadata(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        ts = tsutil.add_random_metadata(ts, self.random_seed)\n        self.verify_sort(ts.dump_tables(), 12)\n\n    def test_many_trees_no_mutations(self):\n        ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed)\n        assert ts.num_trees > 2\n        self.verify_sort_offset(ts)\n        self.verify_sort(ts.dump_tables(), 31)\n\n    def test_single_tree_mutations(self):\n        ts = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed)\n        assert ts.num_sites > 2\n        self.verify_sort_offset(ts)\n        self.verify_sort(ts.dump_tables(), 83)\n\n    def test_single_tree_mutations_metadata(self):\n        ts = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed)\n        assert ts.num_sites > 2\n        ts = tsutil.add_random_metadata(ts, self.random_seed)\n        self.verify_sort(ts.dump_tables(), 384)\n\n    def test_single_tree_multichar_mutations(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        ts = tsutil.insert_multichar_mutations(ts, self.random_seed)\n        self.verify_sort(ts.dump_tables(), 185)\n\n    def test_single_tree_multichar_mutations_metadata(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        ts = tsutil.insert_multichar_mutations(ts, self.random_seed)\n        ts = tsutil.add_random_metadata(ts, self.random_seed)\n        self.verify_sort(ts.dump_tables(), 2175)\n\n    def test_many_trees_mutations(self):\n        ts = msprime.simulate(\n            10, recombination_rate=2, mutation_rate=2, random_seed=self.random_seed\n        )\n        assert ts.num_trees > 2\n        assert ts.num_sites > 2\n        self.verify_sort_offset(ts)\n        self.verify_sort(ts.dump_tables(), 173)\n\n    def test_many_trees_multichar_mutations(self):\n        ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed)\n        assert ts.num_trees > 2\n        ts = tsutil.insert_multichar_mutations(ts, self.random_seed)\n        self.verify_sort(ts.dump_tables(), 16)\n\n    def test_many_trees_multichar_mutations_metadata(self):\n        ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed)\n        assert ts.num_trees > 2\n        ts = tsutil.insert_multichar_mutations(ts, self.random_seed)\n        ts = tsutil.add_random_metadata(ts, self.random_seed)\n        self.verify_sort(ts.dump_tables(), 91)\n\n    def get_nonbinary_example(self, mutation_rate):\n        ts = msprime.simulate(\n            sample_size=20,\n            recombination_rate=10,\n            random_seed=self.random_seed,\n            mutation_rate=mutation_rate,\n            demographic_events=[\n                msprime.SimpleBottleneck(time=0.5, population=0, proportion=1)\n            ],\n        )\n        # Make sure this really has some non-binary nodes\n        found = False\n        for e in ts.edgesets():\n            if len(e.children) > 2:\n                found = True\n                break\n        assert found\n        assert ts.num_trees > 2\n        return ts\n\n    def test_nonbinary_trees(self):\n        ts = self.get_nonbinary_example(mutation_rate=0)\n        self.verify_sort_offset(ts)\n        self.verify_sort(ts.dump_tables(), 9182)\n\n    def test_nonbinary_trees_mutations(self):\n        ts = self.get_nonbinary_example(mutation_rate=2)\n        assert ts.num_trees > 2\n        assert ts.num_sites > 2\n        self.verify_sort_offset(ts)\n        self.verify_sort(ts.dump_tables(), 44)\n\n    def test_unknown_times(self):\n        ts = self.get_wf_example(seed=486)\n        ts = tsutil.insert_branch_mutations(ts, mutations_per_branch=2)\n        ts = tsutil.remove_mutation_times(ts)\n        self.verify_sort(ts.dump_tables(), 9182)\n\n    def test_stable_individual_order(self):\n        # canonical should retain individual order lacking any other information\n        tables = tskit.TableCollection(sequence_length=100)\n        for a in \"arbol\":\n            tables.individuals.add_row(metadata=a.encode())\n        tables2 = tables.copy()\n        tables2.canonicalise(remove_unreferenced=False)\n        tables.assert_equals(tables2)\n\n    def test_discrete_times(self):\n        ts = self.get_wf_example(seed=623)\n        ts = tsutil.insert_discrete_time_mutations(ts)\n        self.verify_sort(ts.dump_tables(), 9183)\n\n    def test_incompatible_edges(self):\n        ts1 = msprime.simulate(10, random_seed=self.random_seed)\n        ts2 = msprime.simulate(20, random_seed=self.random_seed)\n        tables1 = ts1.dump_tables()\n        tables2 = ts2.dump_tables()\n        tables2.edges.set_columns(**tables1.edges.asdict())\n        # The edges in tables2 will refer to nodes that don't exist.\n        with pytest.raises(_tskit.LibraryError):\n            tables2.sort()\n        with pytest.raises(_tskit.LibraryError):\n            tables2.canonicalise()\n\n    def test_incompatible_sites(self):\n        ts1 = msprime.simulate(10, random_seed=self.random_seed)\n        ts2 = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed)\n        assert ts2.num_sites > 1\n        tables1 = ts1.dump_tables()\n        tables2 = ts2.dump_tables()\n        # The mutations in tables2 will refer to sites that don't exist.\n        tables1.mutations.set_columns(**tables2.mutations.asdict())\n        with pytest.raises(_tskit.LibraryError):\n            tables1.sort()\n        with pytest.raises(_tskit.LibraryError):\n            tables1.canonicalise()\n\n    def test_incompatible_mutation_nodes(self):\n        ts1 = msprime.simulate(2, random_seed=self.random_seed)\n        ts2 = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed)\n        assert ts2.num_sites > 1\n        tables1 = ts1.dump_tables()\n        tables2 = ts2.dump_tables()\n        # The mutations in tables2 will refer to nodes that don't exist.\n        tables1.sites.set_columns(**tables2.sites.asdict())\n        tables1.mutations.set_columns(**tables2.mutations.asdict())\n        with pytest.raises(_tskit.LibraryError):\n            tables1.sort()\n        with pytest.raises(_tskit.LibraryError):\n            tables1.canonicalise()\n\n    def test_empty_tables(self):\n        tables = tskit.TableCollection(1)\n        tables.sort()\n        assert tables.nodes.num_rows == 0\n        assert tables.edges.num_rows == 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows == 0\n\n\nclass TestSortMigrations:\n    \"\"\"\n    Tests that migrations are correctly ordered when sorting tables.\n    \"\"\"\n\n    def test_msprime_output_unmodified(self):\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(3)]\n        migration_matrix = [[0, 1, 1], [1, 0, 1], [1, 1, 0]]\n        ts = msprime.simulate(\n            recombination_rate=0.1,\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            record_migrations=True,\n            random_seed=1,\n        )\n        assert ts.num_migrations > 100\n        tables = ts.tables.copy()\n        tables.sort()\n        tables.assert_equals(ts.tables, ignore_provenance=True)\n\n    def test_full_sort_order(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(3):\n            tables.nodes.add_row()\n            tables.populations.add_row()\n        for left in [0, 0.5]:\n            for a_time in range(3):\n                for node in range(2):\n                    tables.migrations.add_row(\n                        left=left, right=1, node=node, source=0, dest=1, time=a_time\n                    )\n                    tables.migrations.add_row(\n                        left=left, right=1, node=node, source=1, dest=2, time=a_time\n                    )\n\n        sorted_list = sorted(\n            tables.migrations, key=lambda m: (m.time, m.source, m.dest, m.left, m.node)\n        )\n        assert sorted_list != list(tables.migrations)\n        tables.sort()\n        assert sorted_list == list(tables.migrations)\n\n\nclass TestSortMutations:\n    \"\"\"\n    Tests that mutations are correctly ordered when sorting tables.\n    \"\"\"\n\n    def test_sort_mutations_stability(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        0.2     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        1       0       1               -1\n        1       1       1               -1\n        0       1       1               -1\n        0       1       0               2\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes,\n            edges=edges,\n            sites=sites,\n            mutations=mutations,\n            sequence_length=1,\n            strict=False,\n        )\n        # Load text automatically calls tables.sort(), so we can test the\n        # output directly.\n        sites = ts.tables.sites\n        mutations = ts.tables.mutations\n        assert len(sites) == 2\n        assert len(mutations) == 4\n        assert list(mutations.site) == [0, 0, 1, 1]\n        assert list(mutations.node) == [1, 1, 0, 1]\n        assert list(map(chr, mutations.derived_state)) == [\"1\", \"0\", \"1\", \"1\"]\n\n    def test_sort_mutations_remap_parent_id(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        0.2     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    time    derived_state   parent\n        1       0       0.5     1               -1\n        1       0       0.25    0               0\n        1       0       0       1               1\n        0       0       0.5     1               -1\n        0       0       0.125   0               3\n        0       0       0       1               4\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes,\n            edges=edges,\n            sites=sites,\n            mutations=mutations,\n            sequence_length=1,\n            strict=False,\n        )\n        # Load text automatically calls sort tables, so we can test the\n        # output directly.\n        sites = ts.tables.sites\n        mutations = ts.tables.mutations\n        assert len(sites) == 2\n        assert len(mutations) == 6\n        assert list(mutations.site) == [0, 0, 0, 1, 1, 1]\n        assert list(mutations.node) == [0, 0, 0, 0, 0, 0]\n        assert list(mutations.time) == [0.5, 0.125, 0.0, 0.5, 0.25, 0.0]\n        assert list(mutations.parent) == [-1, 0, 1, -1, 3, 4]\n\n    def test_sort_mutations_bad_parent_id(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        1       0       1               -2\n        \"\"\"\n        )\n        with pytest.raises(ValueError):\n            tskit.load_text(\n                nodes=nodes,\n                edges=edges,\n                sites=sites,\n                mutations=mutations,\n                sequence_length=1,\n                strict=False,\n            )\n\n    def test_sort_mutations_time(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           -6\n        1       1           -6\n        2       1           -6\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        0.2     0\n        0.3     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    time    derived_state   parent\n        2       0       4       a              -1\n        2       1       -5      b              -1\n        2       2       6       c              -1\n        1       0       0.5     d              -1\n        1       1       0.5     e              -1\n        1       2       0.5     f              -1\n        0       0       1       g              -1\n        0       1       2       h              -1\n        0       2       3       i              -1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes,\n            edges=edges,\n            sites=sites,\n            mutations=mutations,\n            sequence_length=1,\n            strict=False,\n        )\n        # Load text automatically calls sort tables, so we can test the\n        # output directly.\n        sites = ts.tables.sites\n        mutations = ts.tables.mutations\n        assert len(sites) == 3\n        assert len(mutations) == 9\n        assert list(mutations.site) == [0, 0, 0, 1, 1, 1, 2, 2, 2]\n        assert list(mutations.node) == [2, 1, 0, 0, 1, 2, 2, 0, 1]\n        # Nans are not equal so swap in -1\n        times = mutations.time.copy()\n        times[np.isnan(times)] = -1\n        assert list(times) == [3.0, 2.0, 1.0, 0.5, 0.5, 0.5, 6.0, 4.0, -5.0]\n        assert list(mutations.derived_state) == list(\n            map(ord, [\"i\", \"h\", \"g\", \"d\", \"e\", \"f\", \"c\", \"a\", \"b\"])\n        )\n        assert list(mutations.parent) == [-1, -1, -1, -1, -1, -1, -1, -1, -1]\n\n    def test_add_mutations_to_nodes(self):\n        # Test that adding mutations to random nodes, without parent IDs\n        # works - this requires the mutations to be sorted by node times\n\n        ts = msprime.sim_mutations(\n            msprime.sim_ancestry(\n                10, sequence_length=100, random_seed=1, recombination_rate=0.01\n            ),\n            rate=1,\n            random_seed=1,\n        )\n\n        # Add some random mutations, and delete some others\n        tables = ts.dump_tables()\n        tables.mutations.time = np.full_like(tables.mutations.time, tskit.UNKNOWN_TIME)\n        np.random.seed(10)\n        for s in ts.sites():\n            tables.mutations.add_row(\n                site=s.id, node=np.random.randint(ts.num_nodes), derived_state=\"A\"\n            )\n        keep = np.ones(tables.mutations.num_rows, dtype=bool)\n        keep[0:100] = False\n        tables.mutations.replace_with(tables.mutations[keep])\n        # Remove all the parent IDs\n        tables.mutations.parent = np.full_like(tables.mutations.parent, tskit.NULL)\n        assert np.all(tables.mutations.parent == tskit.NULL)\n        tables.sort()\n        tables.build_index()\n        tables.compute_mutation_parents()\n        tables.tree_sequence()\n\n\nclass TestTablesToTreeSequence:\n    \"\"\"\n    Tests for the .tree_sequence() method of a TableCollection.\n    \"\"\"\n\n    def test_round_trip(self):\n        a = msprime.simulate(5, mutation_rate=1, random_seed=42)\n        tables = a.dump_tables()\n        b = tables.tree_sequence()\n        assert a.tables == b.tables\n\n    @pytest.mark.parametrize(\"sequence_length\", [np.inf, np.nan])\n    def test_nonfinite_sequence_length(self, sequence_length):\n        tables = tskit.TableCollection(sequence_length=sequence_length)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_BAD_SEQUENCE_LENGTH\"):\n            tables.tree_sequence()\n\n\nclass TestMutationTimeErrors:\n    def test_younger_than_node_below(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n        tables = ts.dump_tables()\n        tables.mutations.time = np.zeros(len(tables.mutations.time), dtype=np.float64)\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"A mutation's time must be >= the node time, or be marked as\"\n            \" 'unknown'\",\n        ):\n            tables.tree_sequence()\n\n    def test_older_than_node_above(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n        tables = ts.dump_tables()\n        tables.mutations.time = (\n            np.ones(len(tables.mutations.time), dtype=np.float64) * 42\n        )\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"A mutation's time must be < the parent node of the edge on which it\"\n            \" occurs, or be marked as 'unknown'\",\n        ):\n            tables.tree_sequence()\n\n    def test_older_than_parent_node(self):\n        ts = msprime.simulate(\n            10, random_seed=42, mutation_rate=0.0, recombination_rate=1.0\n        )\n        ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=False, seed=42\n        )\n        tables = ts.dump_tables()\n        assert sum(tables.mutations.parent != -1) != 0\n        # Make all times the node time\n        times = tables.nodes.time[tables.mutations.node]\n        # Then make mutations without a parent really old\n        times[tables.mutations.parent == -1] = 64.0\n        tables.mutations.time = times\n        tables.sort()\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"A mutation's time must be < the parent node of the edge on which it\"\n            \" occurs, or be marked as 'unknown'\",\n        ):\n            tables.tree_sequence()\n\n    def test_older_than_parent_mutation(self):\n        ts = msprime.simulate(\n            10, random_seed=42, mutation_rate=0.0, recombination_rate=1.0\n        )\n        ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=False, seed=42\n        )\n        tables = ts.dump_tables()\n        tables.compute_mutation_times()\n        assert sum(tables.mutations.parent != -1) != 0\n        times = tables.mutations.time\n        # Then make mutations without a parent really old\n        times[tables.mutations.parent != -1] = 64.0\n        tables.mutations.time = times\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n    def test_unsorted_times(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    time    derived_state   parent\n        0       0       1       0              -1\n        0       1       2       0              -1\n        0       2       3       0              -1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes,\n            edges=edges,\n            sites=sites,\n            mutations=mutations,\n            sequence_length=1,\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        tables.mutations.time = tables.mutations.time[::-1]\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"Mutations must be provided in non-decreasing site order and\"\n            \" non-increasing\"\n            \" time order within each site\",\n        ):\n            tables.tree_sequence()\n\n    def test_mixed_known_and_unknown(self):\n        ts = msprime.simulate(\n            10, random_seed=42, mutation_rate=0.0, recombination_rate=1.0\n        )\n        ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=False, seed=42\n        )\n        tables = ts.dump_tables()\n        tables.compute_mutation_times()\n        tables.sort()\n        times = tables.mutations.time\n        # Unknown times on diff sites pass\n        times[(tables.mutations.site % 2) == 0] = tskit.UNKNOWN_TIME\n        tables.mutations.time = times\n        tables.tree_sequence()\n        # Mixed known/unknown times on sites fail\n        times[::2] = tskit.UNKNOWN_TIME\n        tables.mutations.time = times\n        with pytest.raises(\n            _tskit.LibraryError,\n            match=\"Mutation times must either be all marked 'unknown', or all be known \"\n            \"values for any single site.\",\n        ):\n            tables.tree_sequence()\n\n\nclass TestNanDoubleValues:\n    \"\"\"\n    In some tables we need to guard against NaN/infinite values in the input.\n    \"\"\"\n\n    def test_edge_coords(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n\n        tables = ts.dump_tables()\n        bad_coords = tables.edges.left + float(\"inf\")\n        tables.edges.left = bad_coords\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n        tables = ts.dump_tables()\n        bad_coords = tables.edges.right + float(\"nan\")\n        tables.edges.right = bad_coords\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n    def test_migrations(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n\n        tables = ts.dump_tables()\n        tables.populations.add_row()\n        tables.migrations.add_row(float(\"inf\"), 1, time=0, node=0, source=0, dest=1)\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n        tables = ts.dump_tables()\n        tables.populations.add_row()\n        tables.migrations.add_row(0, float(\"nan\"), time=0, node=0, source=0, dest=1)\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n        tables = ts.dump_tables()\n        tables.populations.add_row()\n        tables.migrations.add_row(0, 1, time=float(\"nan\"), node=0, source=0, dest=1)\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n    def test_site_positions(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n        tables = ts.dump_tables()\n        bad_pos = tables.sites.position.copy()\n        bad_pos[-1] = np.inf\n        tables.sites.position = bad_pos\n        with pytest.raises(_tskit.LibraryError):\n            tables.tree_sequence()\n\n    def test_node_times(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n        tables = ts.dump_tables()\n        bad_times = tables.nodes.time.copy()\n        bad_times[-1] = np.inf\n        tables.nodes.time = bad_times\n        with pytest.raises(_tskit.LibraryError, match=\"Times must be finite\"):\n            tables.tree_sequence()\n        bad_times[-1] = math.nan\n        tables.nodes.time = bad_times\n        with pytest.raises(_tskit.LibraryError, match=\"Times must be finite\"):\n            tables.tree_sequence()\n\n    def test_mutation_times(self):\n        ts = msprime.simulate(5, mutation_rate=1, random_seed=42)\n        tables = ts.dump_tables()\n        bad_times = tables.mutations.time.copy()\n        bad_times[-1] = np.inf\n        tables.mutations.time = bad_times\n        with pytest.raises(_tskit.LibraryError, match=\"Times must be finite\"):\n            tables.tree_sequence()\n        bad_times = tables.mutations.time.copy()\n        bad_times[-1] = math.nan\n        tables.mutations.time = bad_times\n        with pytest.raises(_tskit.LibraryError, match=\"Times must be finite\"):\n            tables.tree_sequence()\n\n    def test_individual(self):\n        ts = msprime.simulate(12, mutation_rate=1, random_seed=42)\n        ts = tsutil.insert_random_ploidy_individuals(ts, seed=42)\n        assert ts.num_individuals > 1\n        tables = ts.dump_tables()\n        bad_locations = tables.individuals.location.copy()\n        bad_locations[0] = np.inf\n        tables.individuals.location = bad_locations\n        ts = tables.tree_sequence()\n\n\nclass TestSimplifyTables:\n    \"\"\"\n    Tests for the simplify_tables function.\n    \"\"\"\n\n    random_seed = 42\n\n    def test_deprecated_zero_mutation_sites(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=self.random_seed)\n        tables = ts.dump_tables()\n        with warnings.catch_warnings(record=True) as w:\n            warnings.simplefilter(\"always\")\n            tables.simplify(ts.samples(), filter_zero_mutation_sites=True)\n            assert len(w) == 1\n            assert issubclass(w[-1].category, FutureWarning)\n\n    def test_zero_mutation_sites(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=self.random_seed)\n        for filter_sites in [True, False]:\n            t1 = ts.dump_tables()\n            with pytest.warns(FutureWarning):\n                t1.simplify([0, 1], filter_zero_mutation_sites=filter_sites)\n            t2 = ts.dump_tables()\n            t2.simplify([0, 1], filter_sites=filter_sites)\n            t1.assert_equals(t2, ignore_provenance=True)\n            if filter_sites:\n                assert ts.num_sites > len(t1.sites)\n\n    def test_full_samples(self):\n        for n in [2, 10, 100, 1000]:\n            ts = msprime.simulate(\n                n, recombination_rate=1, mutation_rate=1, random_seed=self.random_seed\n            )\n            tables = ts.dump_tables()\n            nodes_before = tables.nodes.copy()\n            edges_before = tables.edges.copy()\n            sites_before = tables.sites.copy()\n            mutations_before = tables.mutations.copy()\n            for samples in [None, list(ts.samples()), ts.samples()]:\n                node_map = tables.simplify(samples=samples)\n                assert node_map.shape == (len(nodes_before),)\n                assert nodes_before == tables.nodes\n                assert edges_before == tables.edges\n                assert sites_before == tables.sites\n                assert mutations_before == tables.mutations\n\n    def test_bad_samples(self):\n        n = 10\n        ts = msprime.simulate(n, random_seed=self.random_seed)\n        for bad_node in [-1, ts.num_nodes, 2**31 - 1]:\n            tables = ts.dump_tables()\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, bad_node])\n\n    def test_bad_edge_ordering(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        tables = ts.dump_tables()\n        edges = tables.edges\n        # Reversing the edges violates the ordering constraints.\n        edges.set_columns(\n            left=edges.left[::-1],\n            right=edges.right[::-1],\n            parent=edges.parent[::-1],\n            child=edges.child[::-1],\n        )\n        with pytest.raises(_tskit.LibraryError):\n            tables.simplify(samples=[0, 1])\n\n    def test_bad_edges(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        for bad_node in [-1, ts.num_nodes, ts.num_nodes + 1, 2**31 - 1]:\n            # Bad parent node\n            tables = ts.dump_tables()\n            edges = tables.edges\n            parent = edges.parent\n            parent[0] = bad_node\n            edges.set_columns(\n                left=edges.left, right=edges.right, parent=parent, child=edges.child\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n            # Bad child node\n            tables = ts.dump_tables()\n            edges = tables.edges\n            child = edges.child\n            child[0] = bad_node\n            edges.set_columns(\n                left=edges.left, right=edges.right, parent=edges.parent, child=child\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n            # child == parent\n            tables = ts.dump_tables()\n            edges = tables.edges\n            child = edges.child\n            child[0] = edges.parent[0]\n            edges.set_columns(\n                left=edges.left, right=edges.right, parent=edges.parent, child=child\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n            # left == right\n            tables = ts.dump_tables()\n            edges = tables.edges\n            left = edges.left\n            left[0] = edges.right[0]\n            edges.set_columns(\n                left=left, right=edges.right, parent=edges.parent, child=edges.child\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n            # left > right\n            tables = ts.dump_tables()\n            edges = tables.edges\n            left = edges.left\n            left[0] = edges.right[0] + 1\n            edges.set_columns(\n                left=left, right=edges.right, parent=edges.parent, child=edges.child\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n\n    def test_bad_mutation_nodes(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed, mutation_rate=1)\n        assert ts.num_mutations > 0\n        for bad_node in [-1, ts.num_nodes, 2**31 - 1]:\n            tables = ts.dump_tables()\n            mutations = tables.mutations\n            node = mutations.node\n            node[0] = bad_node\n            mutations.set_columns(\n                site=mutations.site,\n                node=node,\n                derived_state=mutations.derived_state,\n                derived_state_offset=mutations.derived_state_offset,\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n\n    def test_bad_mutation_sites(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed, mutation_rate=1)\n        assert ts.num_mutations > 0\n        for bad_site in [-1, ts.num_sites, 2**31 - 1]:\n            tables = ts.dump_tables()\n            mutations = tables.mutations\n            site = mutations.site\n            site[0] = bad_site\n            mutations.set_columns(\n                site=site,\n                node=mutations.node,\n                derived_state=mutations.derived_state,\n                derived_state_offset=mutations.derived_state_offset,\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n\n    def test_bad_site_positions(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed, mutation_rate=1)\n        assert ts.num_mutations > 0\n        # Positions > sequence_length are valid, as we can have gaps at the end of\n        # a tree sequence.\n        for bad_position in [-1, -1e-6]:\n            tables = ts.dump_tables()\n            sites = tables.sites\n            position = sites.position\n            position[0] = bad_position\n            sites.set_columns(\n                position=position,\n                ancestral_state=sites.ancestral_state,\n                ancestral_state_offset=sites.ancestral_state_offset,\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.simplify(samples=[0, 1])\n\n    def test_duplicate_positions(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.sites.add_row(0, ancestral_state=\"0\")\n        tables.sites.add_row(0, ancestral_state=\"0\")\n        with pytest.raises(_tskit.LibraryError):\n            tables.simplify([])\n\n    def test_samples_interface(self):\n        ts = msprime.simulate(50, random_seed=1)\n        for good_form in [[], [0, 1], (0, 1), np.array([0, 1], dtype=np.int32)]:\n            tables = ts.dump_tables()\n            tables.simplify(good_form)\n        tables = ts.dump_tables()\n        for bad_values in [[[[]]], np.array([[0, 1], [2, 3]], dtype=np.int32)]:\n            with pytest.raises(ValueError):\n                tables.simplify(bad_values)\n        for bad_type in [[0.1], [\"string\"], {}, [{}]]:\n            with pytest.raises(TypeError):\n                tables.simplify(bad_type)\n        # We only convert to int if we don't overflow\n        for bad_node in [np.iinfo(np.int32).min - 1, np.iinfo(np.int32).max + 1]:\n            with pytest.raises(OverflowError):\n                tables.simplify(samples=np.array([0, bad_node]))\n\n    @pytest.fixture(scope=\"session\")\n    def wf_sim_with_individual_metadata(self):\n        tables = wf.wf_sim(\n            9,\n            10,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        assert tables.individuals.num_rows > 50\n        assert np.all(tables.nodes.individual >= 0)\n        individuals_copy = tables.copy().individuals\n        tables.individuals.clear()\n        tables.individuals.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\"})\n        for i, individual in enumerate(individuals_copy):\n            tables.individuals.add_row(\n                flags=individual.flags,\n                location=individual.location,\n                parents=individual.parents,\n                metadata={\n                    \"original_id\": i,\n                    \"original_parents\": [int(p) for p in individual.parents],\n                },\n            )\n        tables.sort()\n        return tables\n\n    def test_individual_parent_mapping(self, wf_sim_with_individual_metadata):\n        tables = wf_sim_with_individual_metadata.copy()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        for individual in tables.individuals:\n            for parent, original_parent in zip(\n                individual.parents, individual.metadata[\"original_parents\"]\n            ):\n                if parent != tskit.NULL:\n                    assert (\n                        ts.individual(parent).metadata[\"original_id\"] == original_parent\n                    )\n        assert set(tables.individuals.parents) != {tskit.NULL}\n\n    def verify_complete_genetic_pedigree(self, tables):\n        ts = tables.tree_sequence()\n        for edge in ts.edges():\n            child = ts.individual(ts.node(edge.child).individual)\n            parent = ts.individual(ts.node(edge.parent).individual)\n            assert parent.id in child.parents\n            assert parent.metadata[\"original_id\"] in child.metadata[\"original_parents\"]\n\n    def test_no_complete_genetic_pedigree(self, wf_sim_with_individual_metadata):\n        tables = wf_sim_with_individual_metadata.copy()\n        tables.simplify()  # Will remove intermediate individuals\n        with pytest.raises(AssertionError):\n            self.verify_complete_genetic_pedigree(tables)\n\n    def test_complete_genetic_pedigree(self, wf_sim_with_individual_metadata):\n        for params in [{\"keep_unary\": True}, {\"keep_unary_in_individuals\": True}]:\n            tables = wf_sim_with_individual_metadata.copy()\n            tables.simplify(**params)  # Keep intermediate individuals\n            self.verify_complete_genetic_pedigree(tables)\n\n    def test_shuffled_individual_parent_mapping(self, wf_sim_with_individual_metadata):\n        tables = wf_sim_with_individual_metadata.copy()\n        tsutil.shuffle_tables(\n            tables,\n            42,\n            shuffle_edges=False,\n            shuffle_populations=False,\n            shuffle_individuals=True,\n            shuffle_sites=False,\n            shuffle_mutations=False,\n            shuffle_migrations=False,\n        )\n        # Check we have a mixed up order\n        tables2 = tables.copy()\n        tables2.sort_individuals()\n        with pytest.raises(AssertionError, match=\"IndividualTable row 0 differs\"):\n            tables.assert_equals(tables2)\n\n        tables.simplify()\n        metadata = [\n            tables.individuals.metadata_schema.decode_row(m)\n            for m in tskit.unpack_bytes(\n                tables.individuals.metadata, tables.individuals.metadata_offset\n            )\n        ]\n        for individual in tables.individuals:\n            for parent, original_parent in zip(\n                individual.parents, individual.metadata[\"original_parents\"]\n            ):\n                if parent != tskit.NULL:\n                    assert metadata[parent][\"original_id\"] == original_parent\n        assert set(tables.individuals.parents) != {tskit.NULL}\n\n    def test_individual_mapping(self):\n        tables = wf.wf_sim(\n            9,\n            10,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        assert tables.individuals.num_rows > 50\n        node_md = []\n        individual_md = [b\"\"] * tables.individuals.num_rows\n        for i, node in enumerate(tables.nodes):\n            node_md.append(struct.pack(\"i\", i))\n            individual_md[node.individual] = struct.pack(\"i\", i)\n        tables.nodes.packset_metadata(node_md)\n        tables.individuals.packset_metadata(individual_md)\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        for node in tables.nodes:\n            if node.individual != tskit.NULL:\n                assert ts.individual(node.individual).metadata == node.metadata\n        assert set(tables.individuals.parents) != {tskit.NULL}\n\n    def test_bad_individuals(self, simple_degree1_ts_fixture):\n        tables = simple_degree1_ts_fixture.dump_tables()\n        tables.individuals.clear()\n        tables.individuals.add_row(parents=[-2])\n        with pytest.raises(tskit.LibraryError, match=\"Individual out of bounds\"):\n            tables.simplify()\n        tables.individuals.clear()\n        tables.individuals.add_row(parents=[0])\n        with pytest.raises(\n            tskit.LibraryError, match=\"Individuals cannot be their own parents\"\n        ):\n            tables.simplify()\n\n    def test_unsorted_individuals_ok(self, simple_degree1_ts_fixture):\n        tables = simple_degree1_ts_fixture.dump_tables()\n        tables.individuals.clear()\n        tables.individuals.add_row(parents=[1])\n        tables.individuals.add_row(parents=[-1])\n        # we really just want to check that no error is thrown here\n        tables.simplify()\n        assert tables.individuals.num_rows == 0\n\n    def test_filter_none(self, simple_degree1_ts_fixture):\n        tables = simple_degree1_ts_fixture.simplify().dump_tables()\n        tables.populations.add_row()\n        tables.individuals.add_row()\n        tables.sites.add_row(\n            position=np.nextafter(tables.sequence_length, 0),\n            ancestral_state=\"XXX\",\n        )\n        orig_num_populations = len(tables.populations)\n        orig_num_individuals = len(tables.individuals)\n        orig_num_sites = len(tables.sites)\n\n        tables.simplify(\n            filter_populations=False, filter_individuals=False, filter_sites=False\n        )\n        assert len(tables.populations) == orig_num_populations\n        assert len(tables.individuals) == orig_num_individuals\n        assert len(tables.sites) == orig_num_sites\n\n        tables.simplify(\n            filter_populations=None, filter_individuals=None, filter_sites=None\n        )\n        assert len(tables.populations) < orig_num_populations\n        assert len(tables.individuals) < orig_num_individuals\n        assert len(tables.sites) < orig_num_sites\n\n\nclass TestTableCollection:\n    \"\"\"\n    Tests for the convenience wrapper around a collection of related tables.\n    \"\"\"\n\n    @pytest.fixture(params=[\"tables\", \"dump_tables\"], ids=[\"immutable\", \"mutable\"])\n    def tc(self, request, ts_fixture):\n        if request.param == \"tables\":\n            return ts_fixture.tables\n        else:\n            return ts_fixture.dump_tables()\n\n    def test_table_references(self):\n        ts = msprime.simulate(10, mutation_rate=2, random_seed=1)\n        tables = ts.tables\n        before_individuals = str(tables.individuals)\n        individuals = tables.individuals\n        before_nodes = str(tables.nodes)\n        nodes = tables.nodes\n        before_edges = str(tables.edges)\n        edges = tables.edges\n        before_migrations = str(tables.migrations)\n        migrations = tables.migrations\n        before_sites = str(tables.sites)\n        sites = tables.sites\n        before_mutations = str(tables.mutations)\n        mutations = tables.mutations\n        before_populations = str(tables.populations)\n        populations = tables.populations\n        before_provenances = str(tables.provenances)\n        provenances = tables.provenances\n        del tables\n        assert str(individuals) == before_individuals\n        assert str(nodes) == before_nodes\n        assert str(edges) == before_edges\n        assert str(migrations) == before_migrations\n        assert str(sites) == before_sites\n        assert str(mutations) == before_mutations\n        assert str(populations) == before_populations\n        assert str(provenances) == before_provenances\n\n    def test_compute_mutation_parents_ignores_existing_values(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        parent = tables.nodes.add_row(time=1.0)\n        child = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.edges.add_row(left=0.0, right=1.0, parent=parent, child=child)\n        site = tables.sites.add_row(position=0.0, ancestral_state=\"A\")\n        tables.mutations.add_row(site=site, node=child, derived_state=\"C\")\n        tables.build_index()\n        tables.mutations.parent[:] = 42\n        tables.compute_mutation_parents()\n        assert tables.mutations.parent[0] == tskit.NULL\n\n    def test_compute_mutation_parents_restores_on_index_error(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        parent = tables.nodes.add_row(time=1.0)\n        child = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.edges.add_row(left=0.0, right=1.0, parent=parent, child=child)\n        site = tables.sites.add_row(position=0.0, ancestral_state=\"A\")\n        tables.mutations.add_row(site=site, node=child, derived_state=\"C\")\n\n        mutation_columns = tables.mutations.asdict()\n        mutation_columns[\"parent\"] = np.array([123], dtype=np.int32)\n        tables.mutations.set_columns(**mutation_columns)\n\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_TABLES_NOT_INDEXED\"):\n            tables.compute_mutation_parents()\n        assert tables.mutations.parent[0] == 123\n\n    def test_compute_mutation_parents_tolerates_various_invalid_values(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        parent = tables.nodes.add_row(time=1.0)\n        child = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.edges.add_row(left=0.0, right=1.0, parent=parent, child=child)\n        site = tables.sites.add_row(position=0.0, ancestral_state=\"A\")\n        tables.mutations.add_row(site=site, node=child, derived_state=\"C\")\n        tables.build_index()\n\n        # A range of nonsensical parent values should be ignored\n        invalid_values = [\n            -2,  # less than NULL sentinel\n            0,  # equal to self for single-row case\n            1,  # out of bounds (>= num_rows)\n            42,  # arbitrary out of bounds\n            np.iinfo(np.int32).max,\n        ]\n        for val in invalid_values:\n            tables.mutations.parent[:] = val\n            tables.compute_mutation_parents()\n            assert tables.mutations.parent[0] == tskit.NULL\n\n    def test_compute_mutation_parents_tolerates_cross_site_and_loops(self):\n        # Build a simple tree with 2 samples under a common parent\n        tables = tskit.TableCollection(sequence_length=1.0)\n        root = tables.nodes.add_row(time=2.0)\n        a = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        b = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.edges.add_row(0.0, 1.0, root, a)\n        tables.edges.add_row(0.0, 1.0, root, b)\n        s0 = tables.sites.add_row(0.0, \"A\")\n        s1 = tables.sites.add_row(0.5, \"A\")\n        m0 = tables.mutations.add_row(site=s0, node=a, derived_state=\"C\")\n        m1 = tables.mutations.add_row(site=s1, node=b, derived_state=\"G\")\n        assert m0 == 0 and m1 == 1\n        tables.build_index()\n\n        # Cross-site parent should be ignored by compute_mutation_parents\n        tables.mutations.parent[:] = np.array([tskit.NULL, 0], dtype=np.int32)\n        tables.compute_mutation_parents()\n        assert np.array_equal(\n            tables.mutations.parent, np.array([tskit.NULL, tskit.NULL])\n        )\n\n        # Explicit loop in parents should be ignored by compute_mutation_parents\n        tables.mutations.parent[:] = np.array([1, 0], dtype=np.int32)\n        tables.compute_mutation_parents()\n        assert np.array_equal(\n            tables.mutations.parent, np.array([tskit.NULL, tskit.NULL])\n        )\n\n    def test_str(self):\n        ts = msprime.simulate(10, random_seed=1)\n        tables = ts.tables\n        s = str(tables)\n        assert len(s) > 0\n\n    def test_nbytes_empty_tables(self):\n        tables = tskit.TableCollection(1)\n        assert tables.nbytes == 119\n\n    def test_nbytes(self, tmp_path, tc):\n        tc.dump(tmp_path / \"tables\")\n        store = kastore.load(tmp_path / \"tables\")\n        for v in store.values():\n            # Check we really have data in every field\n            assert v.nbytes > 0\n        nbytes = sum(\n            array.nbytes * 2 if \"_offset\" in name else array.nbytes\n            for name, array in store.items()\n            # nbytes is the size of asdict, so exclude file format items\n            if name not in [\"format/version\", \"format/name\", \"uuid\"]\n        )\n        assert nbytes == tc.nbytes\n\n    def test_asdict(self, tc):\n        d1 = {\n            \"encoding_version\": (1, 6),\n            \"sequence_length\": tc.sequence_length,\n            \"metadata_schema\": repr(tc.metadata_schema),\n            \"metadata\": tc.metadata_schema.encode_row(tc.metadata),\n            \"time_units\": tc.time_units,\n            \"individuals\": tc.individuals.asdict(),\n            \"populations\": tc.populations.asdict(),\n            \"nodes\": tc.nodes.asdict(),\n            \"edges\": tc.edges.asdict(),\n            \"sites\": tc.sites.asdict(),\n            \"mutations\": tc.mutations.asdict(),\n            \"migrations\": tc.migrations.asdict(),\n            \"provenances\": tc.provenances.asdict(),\n            \"indexes\": tc.indexes.asdict(),\n            \"reference_sequence\": tc.reference_sequence.asdict(),\n        }\n        d2 = tc.asdict()\n        assert set(d1.keys()) == set(d2.keys())\n        t1 = tskit.TableCollection.fromdict(d1)\n        t2 = tskit.TableCollection.fromdict(d2)\n        t1.assert_equals(t2)\n        assert t1.has_index()\n        assert t2.has_index()\n\n    @pytest.mark.parametrize(\"force_offset_64\", [True, False])\n    def test_asdict_force_offset_64(self, tc, force_offset_64):\n        d = tc.asdict(force_offset_64=force_offset_64)\n        for table in tc.table_name_map:\n            for name, column in d[table].items():\n                if name.endswith(\"_offset\"):\n                    if force_offset_64:\n                        assert column.dtype == np.uint64\n                    else:\n                        assert column.dtype == np.uint32\n\n    def test_asdict_force_offset_64_default(self, tc):\n        d = tc.asdict()\n        for table in tc.table_name_map:\n            for name, column in d[table].items():\n                if name.endswith(\"_offset\"):\n                    assert column.dtype == np.uint32\n\n    def test_asdict_lifecycle(self, tc, ts_fixture):\n        tables_dict = tc.asdict()\n        del tc\n        tskit.TableCollection.fromdict(tables_dict).assert_equals(\n            ts_fixture.dump_tables()\n        )\n\n    def test_from_dict(self, tc):\n        t1 = tc\n        d = {\n            \"encoding_version\": (1, 1),\n            \"sequence_length\": t1.sequence_length,\n            \"metadata_schema\": repr(t1.metadata_schema),\n            \"metadata\": t1.metadata_schema.encode_row(t1.metadata),\n            \"time_units\": t1.time_units,\n            \"individuals\": t1.individuals.asdict(),\n            \"populations\": t1.populations.asdict(),\n            \"nodes\": t1.nodes.asdict(),\n            \"edges\": t1.edges.asdict(),\n            \"sites\": t1.sites.asdict(),\n            \"mutations\": t1.mutations.asdict(),\n            \"migrations\": t1.migrations.asdict(),\n            \"provenances\": t1.provenances.asdict(),\n            \"indexes\": t1.indexes.asdict(),\n            \"reference_sequence\": t1.reference_sequence.asdict(),\n        }\n        t2 = tskit.TableCollection.fromdict(d)\n        t1.assert_equals(t2)\n\n    def test_roundtrip_dict(self, tc):\n        t1 = tc\n        t2 = tskit.TableCollection.fromdict(t1.asdict())\n        t1.assert_equals(t2)\n\n    def test_table_name_map(self, tc):\n        td1 = {\n            \"individuals\": tc.individuals,\n            \"populations\": tc.populations,\n            \"nodes\": tc.nodes,\n            \"edges\": tc.edges,\n            \"sites\": tc.sites,\n            \"mutations\": tc.mutations,\n            \"migrations\": tc.migrations,\n            \"provenances\": tc.provenances,\n        }\n        td2 = tc.table_name_map\n        assert isinstance(td2, dict)\n        assert set(td1.keys()) == set(td2.keys())\n        for name in td2.keys():\n            assert td1[name] == td2[name]\n        assert td1 == td2\n\n        # Deprecated in 0.4.1 - only test for mutable TableCollection\n        if isinstance(tc, tskit.TableCollection):\n            with pytest.warns(FutureWarning):\n                td1 = tc.name_map\n            assert td1 == td2\n\n    def test_equals_empty(self):\n        assert tskit.TableCollection() == tskit.TableCollection()\n\n    def test_equals_sequence_length(self):\n        assert tskit.TableCollection(sequence_length=1) != tskit.TableCollection(\n            sequence_length=2\n        )\n\n    def test_copy(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        assert t1 is not t2\n        t1.assert_equals(t2)\n        t1.edges.clear()\n        assert t1 != t2\n\n    def test_clear_table(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        tables.clear()\n        data_tables = [t for t in tskit.TABLE_NAMES if t != \"provenances\"]\n        for table in data_tables:\n            assert getattr(tables, f\"{table}\").num_rows == 0\n            assert repr(getattr(tables, f\"{table}\").metadata_schema) != \"\"\n        assert tables.provenances.num_rows > 0\n        assert len(tables.metadata) > 0\n        assert repr(tables.metadata_schema) != \"\"\n\n        tables.clear(clear_provenance=True)\n        assert tables.provenances.num_rows == 0\n        for table in data_tables:\n            assert repr(getattr(tables, f\"{table}\").metadata_schema) != \"\"\n        assert len(tables.metadata) > 0\n        assert repr(tables.metadata_schema) != \"\"\n\n        tables.clear(clear_metadata_schemas=True)\n        for table in data_tables:\n            assert repr(getattr(tables, f\"{table}\").metadata_schema) == \"\"\n        assert len(tables.metadata) > 0\n        assert repr(tables.metadata_schema) != 0\n\n        tables.clear(clear_ts_metadata_and_schema=True)\n        assert len(tables.metadata) == 0\n        assert repr(tables.metadata_schema) == \"\"\n\n    def test_equals(self):\n        # Here we don't use the fixture as we'd like to run the same sim twice\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 1], [1, 0]]\n        t1 = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        ).dump_tables()\n        t2 = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            mutation_rate=1,\n            record_migrations=True,\n            random_seed=1,\n        ).dump_tables()\n        assert t1 == t1\n        assert t1 == t1.copy()\n        assert t1.copy() == t1\n\n        # The provenances may or may not be equal depending on the clock\n        # precision for record. So clear them first.\n        t1.provenances.clear()\n        t2.provenances.clear()\n        assert t1 == t2\n        assert t2 == t1\n        assert not (t1 != t2)\n\n        t1.nodes.clear()\n        assert t1 != t2\n        t2.nodes.clear()\n        assert t1 == t2\n\n        t1.edges.clear()\n        assert t1 != t2\n        t2.edges.clear()\n        assert t1 == t2\n\n        t1.migrations.clear()\n        assert t1 != t2\n        t2.migrations.clear()\n        assert t1 == t2\n\n        t1.sites.clear()\n        assert t1 != t2\n        t2.sites.clear()\n        assert t1 == t2\n\n        t1.mutations.clear()\n        assert t1 != t2\n        t2.mutations.clear()\n        assert t1 == t2\n\n        t1.populations.clear()\n        assert t1 != t2\n        t2.populations.clear()\n        assert t1 == t2\n\n    def test_equals_options(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n\n        t1.provenances.add_row(\"random stuff\")\n        assert not (t1 == t2)\n        t1.assert_equals(t2, ignore_provenance=True)\n        t2.assert_equals(t1, ignore_provenance=True)\n        assert not (t1.equals(t2))\n        assert not (t2.equals(t1))\n        t1.provenances.clear()\n        t2.provenances.clear()\n        t1.assert_equals(t2)\n        t2.assert_equals(t1)\n\n        t1.metadata_schema = tskit.MetadataSchema({\"codec\": \"json\", \"type\": \"object\"})\n        t1.metadata = {\"hello\": \"world\"}\n        assert not t1.equals(t2)\n        t1.assert_equals(t2, ignore_ts_metadata=True)\n        assert not t2.equals(t1)\n        t2.assert_equals(t1, ignore_ts_metadata=True)\n        t2.metadata_schema = t1.metadata_schema\n        assert not t1.equals(t2)\n        t1.assert_equals(t2, ignore_ts_metadata=True)\n        assert not t2.equals(t1)\n        t2.assert_equals(t1, ignore_ts_metadata=True)\n\n        t1.provenances.add_row(\"random stuff\")\n        assert not t1.equals(t2)\n        assert not t1.equals(t2, ignore_ts_metadata=True)\n        assert not t1.equals(t2, ignore_provenance=True)\n        t1.assert_equals(t2, ignore_ts_metadata=True, ignore_provenance=True)\n\n        t1.provenances.clear()\n        t2.metadata = t1.metadata\n        t1.assert_equals(t2)\n        t2.assert_equals(t1)\n\n        with pytest.raises(TypeError):\n            t1.equals(t2, True)\n\n        # When two tables differ, check that the right reason is given\n        t1 = tskit.TableCollection(sequence_length=1.0)\n        t2 = tskit.TableCollection(sequence_length=1.0)\n        t1.assert_equals(t2)\n        t1.metadata = b\"\"\n        t2.metadata = b\"abc\"\n        t1.assert_equals(t2, ignore_ts_metadata=True)\n        t1.edges.add_row(0, 1, 0, 1)\n        with pytest.raises(AssertionError, match=\"EdgeTable number of rows\"):\n            t1.assert_equals(t2, ignore_ts_metadata=True)\n        t2.metadata = b\"\"\n        t2.edges.add_row(0, 1, 0, 1, metadata=b\"abc\")\n        t1.assert_equals(t2, ignore_metadata=True)\n        t1.edges.add_row(0, 1, 0, 1)\n        with pytest.raises(AssertionError, match=\"EdgeTable number of rows\"):\n            t1.assert_equals(t2, ignore_metadata=True)\n        with pytest.raises(AssertionError, match=\"EdgeTable row 0 differs\"):\n            t1.assert_equals(t2)\n\n    def test_equals_cross_type(self, tc):\n        \"\"\"Test that ImmutableTableCollection and TableCollection can compare\"\"\"\n        # Get both mutable and immutable versions of the same data\n        mutable_tc = tc.copy()  # Always returns TableCollection\n        immutable_tc = (\n            mutable_tc.tree_sequence().tables\n        )  # Always returns ImmutableTableCollection\n\n        # Test all cross-type comparisons\n        assert mutable_tc == immutable_tc\n        assert immutable_tc == mutable_tc\n        assert mutable_tc.equals(immutable_tc)\n        assert immutable_tc.equals(mutable_tc)\n        mutable_tc.assert_equals(immutable_tc)\n        immutable_tc.assert_equals(mutable_tc)\n\n        # Test that tc (which might be either type) equals both versions\n        assert tc == mutable_tc\n        assert tc == immutable_tc\n        tc.assert_equals(mutable_tc)\n        tc.assert_equals(immutable_tc)\n\n        # Test ignore_provenance across types\n        mutable_tc.provenances.add_row(\"extra provenance\")\n        assert not mutable_tc.equals(immutable_tc)\n        assert not immutable_tc.equals(mutable_tc)\n        mutable_tc.assert_equals(immutable_tc, ignore_provenance=True)\n        immutable_tc.assert_equals(mutable_tc, ignore_provenance=True)\n\n        # Test ignore_ts_metadata across types\n        # Start fresh to avoid provenance confusion\n        mutable_tc2 = tc.copy()\n        mutable_tc2.metadata_schema = tskit.MetadataSchema(\n            {\"codec\": \"json\", \"type\": \"object\"}\n        )\n        mutable_tc2.metadata = {\"hello\": \"world\"}\n        immutable_tc2 = (\n            tc.copy().tree_sequence().tables\n        )  # Original without metadata changes\n        assert not mutable_tc2.equals(immutable_tc2)\n        assert not immutable_tc2.equals(mutable_tc2)\n        mutable_tc2.assert_equals(immutable_tc2, ignore_ts_metadata=True)\n        immutable_tc2.assert_equals(mutable_tc2, ignore_ts_metadata=True)\n\n        # Test ignore_metadata across types (table-level metadata like edges)\n        # Start fresh again\n        mutable_tc3 = tc.copy()\n        child = mutable_tc3.nodes.add_row(time=0)\n        parent = mutable_tc3.nodes.add_row(time=1)\n        mutable_tc3.edges.add_row(0, 1, parent, child, metadata={\"key\": \"extra\"})\n        mutable_tc3.sort()\n\n        mutable_tc4 = tc.copy()\n        child = mutable_tc4.nodes.add_row(time=0)\n        parent = mutable_tc4.nodes.add_row(time=1)\n        mutable_tc4.edges.add_row(0, 1, parent, child, metadata={\"key\": \"different\"})\n        mutable_tc4.sort()\n        immutable_tc4 = mutable_tc4.tree_sequence().tables\n\n        assert not mutable_tc3.equals(immutable_tc4)\n        assert not immutable_tc4.equals(mutable_tc3)\n        mutable_tc3.assert_equals(immutable_tc4, ignore_metadata=True)\n        immutable_tc4.assert_equals(mutable_tc3, ignore_metadata=True)\n\n    def test_sequence_length(self):\n        for sequence_length in [0, 1, 100.1234]:\n            tables = tskit.TableCollection(sequence_length=sequence_length)\n            assert tables.sequence_length == sequence_length\n\n    def test_uuid_simulation(self, tc):\n        assert tc.file_uuid is None, None\n\n    def test_uuid_empty(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        assert tables.file_uuid is None, None\n\n    def test_empty_indexes(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        assert not tables.has_index()\n        tables.build_index()\n        assert tables.has_index()\n        tables.drop_index()\n        assert not tables.has_index()\n\n    def test_index_unsorted(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=0, time=1)\n        tables.nodes.add_row(flags=0, time=2)\n        tables.edges.add_row(0, 1, 3, 0)\n        tables.edges.add_row(0, 1, 3, 1)\n        tables.edges.add_row(0, 1, 4, 3)\n        tables.edges.add_row(0, 1, 4, 2)\n\n        assert not tables.has_index()\n        with pytest.raises(tskit.LibraryError):\n            tables.build_index()\n        assert not tables.has_index()\n        tables.sort()\n        tables.build_index()\n        assert tables.has_index()\n        ts = tables.tree_sequence()\n        assert ts.tables == tables\n\n    def test_index_from_ts(self, tc):\n        assert tc.has_index()\n\n        # For mutable tables, test that tree_sequence() rebuilds the index\n        if isinstance(tc, tskit.TableCollection):\n            # Save a copy with index for comparison\n            indexed_tc = tc.copy()\n            tc.drop_index()\n            assert not tc.has_index()\n            ts = tc.tree_sequence()\n            assert ts.tables == indexed_tc\n            assert tc.has_index()\n\n    def test_set_sequence_length_errors(self):\n        tables = tskit.TableCollection(1)\n        with pytest.raises(AttributeError):\n            del tables.sequence_length\n        for bad_value in [\"asdf\", None, []]:\n            with pytest.raises(TypeError):\n                tables.sequence_length = bad_value\n\n    def test_set_sequence_length(self):\n        tables = tskit.TableCollection(1)\n        for value in [-1, 100, 2**32, 1e-6]:\n            tables.sequence_length = value\n            assert tables.sequence_length == value\n\n    def test_bad_sequence_length(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        assert tables.sequence_length == 5\n        for value in [-1, 0, -0.99, 0.9999]:\n            tables.sequence_length = value\n            with pytest.raises(tskit.LibraryError):\n                tables.tree_sequence()\n            with pytest.raises(tskit.LibraryError):\n                tables.sort()\n            with pytest.raises(tskit.LibraryError):\n                tables.build_index()\n            with pytest.raises(tskit.LibraryError):\n                tables.compute_mutation_parents()\n            with pytest.raises(tskit.LibraryError):\n                tables.simplify()\n            assert tables.sequence_length == value\n\n    def test_sequence_length_longer_than_edges(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        tables.sequence_length = 20\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 20\n        assert ts.num_trees == 6\n        trees = ts.trees()\n        tree = next(trees)\n        assert len(tree.parent_dict) > 0\n        for _ in range(5):\n            tree = next(trees)\n        assert len(tree.parent_dict) == 0\n\n    def test_indexes(self, simple_degree1_ts_fixture):\n        tc = tskit.TableCollection(sequence_length=1)\n        assert tc.indexes == tskit.TableCollectionIndexes()\n        tc = simple_degree1_ts_fixture.dump_tables()\n        assert np.array_equal(\n            tc.indexes.edge_insertion_order, np.arange(18, dtype=np.int32)\n        )\n        assert np.array_equal(\n            tc.indexes.edge_removal_order, np.arange(18, dtype=np.int32)[::-1]\n        )\n        tc.drop_index()\n        assert tc.indexes == tskit.TableCollectionIndexes()\n        tc.build_index()\n        assert np.array_equal(\n            tc.indexes.edge_insertion_order, np.arange(18, dtype=np.int32)\n        )\n        assert np.array_equal(\n            tc.indexes.edge_removal_order, np.arange(18, dtype=np.int32)[::-1]\n        )\n\n        modify_indexes = tskit.TableCollectionIndexes(\n            edge_insertion_order=np.arange(42, 42 + 18, dtype=np.int32),\n            edge_removal_order=np.arange(4242, 4242 + 18, dtype=np.int32),\n        )\n        tc.indexes = modify_indexes\n        assert np.array_equal(\n            tc.indexes.edge_insertion_order, np.arange(42, 42 + 18, dtype=np.int32)\n        )\n        assert np.array_equal(\n            tc.indexes.edge_removal_order, np.arange(4242, 4242 + 18, dtype=np.int32)\n        )\n\n    def test_indexes_roundtrip(self, simple_degree1_ts_fixture):\n        # Indexes shouldn't be made by roundtripping\n        tables = tskit.TableCollection(sequence_length=1)\n        assert not tables.has_index()\n        assert not tskit.TableCollection.fromdict(tables.asdict()).has_index()\n\n        tables = simple_degree1_ts_fixture.dump_tables()\n        tables.drop_index()\n        assert not tskit.TableCollection.fromdict(tables.asdict()).has_index()\n\n    def test_asdict_lwt_concordance(self, tc):\n        def check_concordance(d1, d2):\n            assert set(d1.keys()) == set(d2.keys())\n            for k1, v1 in d1.items():\n                v2 = d2[k1]\n                assert type(v1) is type(v2)\n                if type(v1) is dict:\n                    assert set(v1.keys()) == set(v2.keys())\n                    for sk1, sv1 in v1.items():\n                        sv2 = v2[sk1]\n                        assert type(sv1) is type(sv2)\n                        if isinstance(sv1, np.ndarray):\n                            assert np.array_equal(sv1, sv2) or (\n                                np.all(tskit.is_unknown_time(sv1))\n                                and np.all(tskit.is_unknown_time(sv2))\n                            )\n                        elif type(sv1) in [bytes, str]:\n                            assert sv1 == sv2\n                        else:\n                            raise AssertionError()\n\n                else:\n                    assert v1 == v2\n\n        # Test with index\n        assert tc.has_index()\n        lwt = _tskit.LightweightTableCollection()\n        lwt.fromdict(tc.asdict())\n        check_concordance(lwt.asdict(), tc.asdict())\n\n        # Test without index - only for mutable\n        tables = tc.copy()\n        tables.drop_index()\n        lwt = _tskit.LightweightTableCollection()\n        lwt.fromdict(tables.asdict())\n        check_concordance(lwt.asdict(), tables.asdict())\n\n    def test_dump_pathlib(self, tc, tmp_path):\n        path = pathlib.Path(tmp_path) / \"tmp.trees\"\n        assert path.exists\n        assert path.is_file\n        tc.dump(path)\n        other_tc = tskit.TableCollection.load(path)\n        tc.assert_equals(other_tc)\n\n    @pytest.mark.skipif(platform.system() == \"Windows\", reason=\"Windows doesn't raise\")\n    def test_dump_load_errors(self, tc):\n        # Try to dump/load files we don't have access to or don't exist.\n        for func in [tc.dump, tskit.TableCollection.load]:\n            for f in [\"/\", \"/test.trees\", \"/dir_does_not_exist/x.trees\"]:\n                with pytest.raises(OSError):\n                    func(f)\n                try:\n                    func(f)\n                except OSError as e:\n                    message = str(e)\n                    assert len(message) > 0\n            f = \"/\" + 4000 * \"x\"\n            with pytest.raises(OSError):\n                func(f)\n            try:\n                func(f)\n            except OSError as e:\n                message = str(e)\n            assert \"File name too long\" in message\n            for bad_filename in [[], None, {}]:\n                with pytest.raises(TypeError):\n                    func(bad_filename)\n\n    def test_set_table(self):\n        tc = tskit.TableCollection()\n        for name, table in tc.table_name_map.items():\n            with pytest.raises(AttributeError, match=\"replace_with\"):\n                setattr(tc, name, table)\n\n\nclass TestEqualityOptions:\n    def test_equals_provenance(self):\n        t1 = msprime.simulate(10, random_seed=42).tables\n        time.sleep(0.1)\n        t2 = msprime.simulate(10, random_seed=42).tables\n        # Timestamps should differ\n        assert t1.provenances[-1].timestamp != t2.provenances[-1].timestamp\n        assert not t1.equals(t2)\n        t1.assert_equals(t2, ignore_timestamps=True)\n        t1.assert_equals(t2, ignore_provenance=True)\n        t1.assert_equals(t2, ignore_provenance=True, ignore_timestamps=True)\n\n    def test_equals_node_metadata(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        t1.assert_equals(t2)\n        t1.nodes.add_row(time=0, metadata={\"a\": \"a\"})\n        t2.nodes.add_row(time=0, metadata={\"a\": \"b\"})\n        assert not t1.nodes.equals(t2.nodes)\n        assert not t1.equals(t2)\n        assert t1.nodes.equals(t2.nodes, ignore_metadata=True)\n\n    def test_equals_edge_metadata(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        child = t1.nodes.add_row(time=0)\n        parent = t1.nodes.add_row(time=1)\n        t2 = t1.copy()\n        t1.assert_equals(t2)\n        t1.edges.add_row(0, 1, parent, child, metadata={\"a\": \"a\"})\n        t2.edges.add_row(0, 1, parent, child, metadata={\"a\": \"b\"})\n        assert not t1.edges.equals(t2.edges)\n        assert not t1.equals(t2)\n        assert t1.edges.equals(t2.edges, ignore_metadata=True)\n        t1.assert_equals(t2, ignore_metadata=True)\n\n    def test_equals_migration_metadata(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        t1.assert_equals(t2)\n        t1.migrations.add_row(\n            0, 1, source=0, dest=1, node=0, time=0, metadata={\"a\": \"a\"}\n        )\n        t2.migrations.add_row(\n            0, 1, source=0, dest=1, node=0, time=0, metadata={\"a\": \"b\"}\n        )\n        assert not t1.migrations.equals(t2.migrations)\n        assert not t1.equals(t2)\n        assert t1.migrations.equals(t2.migrations, ignore_metadata=True)\n        t1.assert_equals(t2, ignore_metadata=True)\n\n    def test_equals_site_metadata(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        t1.assert_equals(t2)\n        t1.sites.add_row(0, \"A\", metadata={\"a\": \"a\"})\n        t2.sites.add_row(0, \"A\", metadata={\"a\": \"b\"})\n        assert not t1.sites.equals(t2.sites)\n        assert not t1.equals(t2)\n        assert t1.sites.equals(t2.sites, ignore_metadata=True)\n        t1.assert_equals(t2, ignore_metadata=True)\n\n    def test_equals_mutation_metadata(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        t1.assert_equals(t2)\n        t1.mutations.add_row(0, 0, \"A\", metadata={\"a\": \"a\"})\n        t2.mutations.add_row(0, 0, \"A\", metadata={\"a\": \"b\"})\n        assert not t1.mutations.equals(t2.mutations)\n        assert not t1.equals(t2)\n        assert t1.mutations.equals(t2.mutations, ignore_metadata=True)\n        t1.assert_equals(t2, ignore_metadata=True)\n\n    def test_equals_population_metadata(self, ts_fixture):\n        t1 = ts_fixture.dump_tables()\n        t2 = t1.copy()\n        t1.assert_equals(t2)\n        t1.populations.add_row({\"a\": \"a\"})\n        t2.populations.add_row({\"a\": \"b\"})\n        assert not t1.populations.equals(t2.populations)\n        assert not t1.equals(t2)\n        t1.assert_equals(t2, ignore_metadata=True)\n\n\nclass TestTableCollectionAssertEquals:\n    @pytest.fixture\n    def t1(self, ts_fixture):\n        return ts_fixture.dump_tables()\n\n    @pytest.fixture\n    def t2(self, ts_fixture):\n        return ts_fixture.dump_tables()\n\n    def test_equal(self, t1, t2):\n        assert t1 is not t2\n        t1.assert_equals(t2)\n\n    def test_type(self, t1):\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\n                \"Types differ: self=<class 'tskit.tables.TableCollection'> \"\n                \"other=<class 'int'>\"\n            ),\n        ):\n            t1.assert_equals(42)\n\n    def test_sequence_length(self, t1, t2):\n        t2.sequence_length = 42\n        with pytest.raises(\n            AssertionError, match=\"Sequence Length differs: self=5.0 other=42.0\"\n        ):\n            t1.assert_equals(t2)\n\n    def test_metadata_schema(self, t1, t2):\n        t2.metadata_schema = tskit.MetadataSchema(None)\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\"Metadata schemas differ:\"),\n        ):\n            t1.assert_equals(t2)\n        t1.assert_equals(t2, ignore_metadata=True)\n        t1.assert_equals(t2, ignore_ts_metadata=True)\n\n    def test_metadata(self, t1, t2):\n        t2.metadata = {\"foo\": \"bar\"}\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\"Metadata differs: self=Test metadata other={'foo': 'bar'}\"),\n        ):\n            t1.assert_equals(t2)\n        t1.assert_equals(t2, ignore_metadata=True)\n        t1.assert_equals(t2, ignore_ts_metadata=True)\n\n    def test_time_units(self, t1, t2):\n        t2.time_units = \"microseconds\"\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\n                \"Time units differs: self=Test time units other=microseconds\"\n            ),\n        ):\n            t1.assert_equals(t2)\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TableCollection(1).table_name_map)\n    def test_tables(self, t1, t2, table_name):\n        table = getattr(t2, table_name)\n        table.truncate(0)\n        with pytest.raises(\n            AssertionError,\n            match=f\"{type(table).__name__} number of rows differ: \"\n            f\"self={len(getattr(t1, table_name))} other=0\",\n        ):\n            t1.assert_equals(t2)\n\n    @pytest.mark.parametrize(\"table_name\", tskit.TableCollection(1).table_name_map)\n    def test_ignore_metadata(self, t1, t2, table_name):\n        table = getattr(t2, table_name)\n        if hasattr(table, \"metadata_schema\"):\n            table.metadata_schema = tskit.MetadataSchema(None)\n            with pytest.raises(\n                AssertionError,\n                match=re.escape(f\"{type(table).__name__} metadata schemas differ:\"),\n            ):\n                t1.assert_equals(t2)\n            t1.assert_equals(t2, ignore_metadata=True)\n\n    def test_ignore_provenance(self, t1, t2):\n        t2.provenances.truncate(0)\n        with pytest.raises(\n            AssertionError,\n            match=\"ProvenanceTable number of rows differ: self=5 other=0\",\n        ):\n            t1.assert_equals(t2)\n        with pytest.raises(\n            AssertionError,\n            match=\"ProvenanceTable number of rows differ: self=5 other=0\",\n        ):\n            t1.assert_equals(t2, ignore_timestamps=True)\n\n        t1.assert_equals(t2, ignore_provenance=True)\n\n    def test_ignore_timestamps(self, t1, t2):\n        table = t2.provenances\n        timestamp = table.timestamp\n        timestamp[0] = ord(\"F\")\n        table.set_columns(\n            timestamp=timestamp,\n            timestamp_offset=table.timestamp_offset,\n            record=table.record,\n            record_offset=table.record_offset,\n        )\n        with pytest.raises(\n            AssertionError,\n            match=\"ProvenanceTable row 0 differs:\\n\"\n            \"self.timestamp=.* other.timestamp=F.*\",\n        ):\n            t1.assert_equals(t2)\n        t1.assert_equals(t2, ignore_provenance=True)\n        t1.assert_equals(t2, ignore_timestamps=True)\n\n    def test_ignore_tables(self, t1, t2):\n        t2.individuals.truncate(0)\n        t2.nodes.truncate(0)\n        t2.edges.truncate(0)\n        t2.migrations.truncate(0)\n        t2.sites.truncate(0)\n        t2.mutations.truncate(0)\n        t2.populations.truncate(0)\n        with pytest.raises(\n            AssertionError,\n            match=\"EdgeTable number of rows differ: self=390 other=0\",\n        ):\n            t1.assert_equals(t2)\n        t1.assert_equals(t2, ignore_tables=True)\n\n    def test_ignore_reference_sequence(self, t1, t2):\n        t2.reference_sequence.clear()\n        with pytest.raises(\n            AssertionError,\n            match=re.escape(\"Metadata schemas differ: \"),\n        ):\n            t1.assert_equals(t2)\n        t1.assert_equals(t2, ignore_reference_sequence=True)\n\n    def test_degenerate_metadata_schema(self, t1, t2):\n        t1._ll_object.metadata_schema = (\n            '{\"codec\": \"json\", \"properties\": '\n            '{\"A\": {\"type\": \"integer\"}, \"B\": {\"type\": \"number\"}}}'\n        )\n        t2._ll_object.metadata_schema = (\n            '{\"codec\": \"json\", \"properties\": '\n            '{\"B\": {\"type\": \"number\"}, \"A\": {\"type\": \"integer\"}}}'\n        )\n        t1.assert_equals(t2)\n\n\nclass TestTableCollectionMethodSignatures:\n    tc = msprime.simulate(10, random_seed=1234).dump_tables()\n\n    def test_kwargs_only(self):\n        with pytest.raises(TypeError):\n            self.tc.simplify([], True)\n\n\nclass TestTableCollectionMetadata:\n    metadata_schema = metadata.MetadataSchema(\n        {\n            \"codec\": \"json\",\n            \"title\": \"Example Metadata\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"one\": {\"type\": \"string\"},\n                \"two\": {\"type\": \"number\"},\n                \"three\": {\"type\": \"array\"},\n                \"four\": {\"type\": \"boolean\"},\n            },\n            \"required\": [\"one\", \"two\", \"three\", \"four\"],\n            \"additionalProperties\": False,\n        },\n    )\n\n    def metadata_example_data(self, val=0):\n        return {\n            \"one\": \"val one\",\n            \"two\": val,\n            \"three\": list(range(val, val + 10)),\n            \"four\": True,\n        }\n\n    def test_set_metadata_schema(self):\n        tc = tskit.TableCollection(1)\n        metadata_schema2 = metadata.MetadataSchema({\"codec\": \"json\"})\n        # Default is no-op metadata codec\n        assert repr(tc.metadata_schema) == repr(metadata.MetadataSchema(None))\n        # Set\n        tc.metadata_schema = self.metadata_schema\n        assert repr(tc.metadata_schema) == repr(self.metadata_schema)\n        # Overwrite\n        tc.metadata_schema = metadata_schema2\n        assert repr(tc.metadata_schema) == repr(metadata_schema2)\n        # Remove\n        tc.metadata_schema = metadata.MetadataSchema(None)\n        assert repr(tc.metadata_schema) == repr(metadata.MetadataSchema(None))\n        # Set after remove\n        tc.metadata_schema = self.metadata_schema\n        assert repr(tc.metadata_schema) == repr(self.metadata_schema)\n        # Del should fail\n        with pytest.raises(AttributeError):\n            del tc.metadata_schema\n        # None should fail\n        with pytest.raises(ValueError):\n            tc.metadata_schema = None\n\n    def test_set_metadata(self):\n        tc = tskit.TableCollection(1)\n        # Default is empty bytes\n        assert tc.metadata == b\"\"\n        assert tc.metadata_bytes == b\"\"\n\n        tc.metadata_schema = self.metadata_schema\n        md1 = self.metadata_example_data()\n        md2 = self.metadata_example_data(val=2)\n        # Set\n        tc.metadata = md1\n        assert tc.metadata == md1\n        assert tc.metadata_bytes == tskit.canonical_json(md1).encode()\n        # Overwrite\n        tc.metadata = md2\n        assert tc.metadata == md2\n        assert tc.metadata_bytes == tskit.canonical_json(md2).encode()\n        # Del should fail\n        with pytest.raises(AttributeError):\n            del tc.metadata\n        with pytest.raises(AttributeError):\n            del tc.metadata_bytes\n        # None should fail\n        with pytest.raises(exceptions.MetadataValidationError):\n            tc.metadata = None\n        # Setting bytes should fail\n        with pytest.raises(AttributeError):\n            tc.metadata_bytes = b\"123\"\n\n    def test_set_time_units(self):\n        tc = tskit.TableCollection(1)\n        assert tc.time_units == tskit.TIME_UNITS_UNKNOWN\n\n        ex1 = \"years\"\n        ex2 = \"generations\"\n        # Set\n        tc.time_units = ex1\n        assert tc.time_units == ex1\n        # Overwrite\n        tc.time_units = ex2\n        assert tc.time_units == ex2\n        # Del should fail\n        with pytest.raises(AttributeError):\n            del tc.time_units\n        # None should fail\n        with pytest.raises(TypeError):\n            tc.time_units = None\n\n    def test_default_metadata_schema(self):\n        # Default should allow bytes\n        tc = tskit.TableCollection(1)\n        tc.metadata = b\"acceptable bytes\"\n        assert tc.metadata == b\"acceptable bytes\"\n        # Adding non-bytes metadata should error\n        with pytest.raises(TypeError):\n            tc.metadata = self.metadata_example_data()\n\n    def test_round_trip_metadata(self):\n        data = self.metadata_example_data()\n        tc = tskit.TableCollection(1)\n        tc.metadata_schema = self.metadata_schema\n        tc.metadata = data\n        assert tc.metadata == data\n        assert tc.metadata_bytes == tskit.canonical_json(data).encode()\n\n    def test_bad_metadata(self):\n        metadata = self.metadata_example_data()\n        metadata[\"I really shouldn't be here\"] = 6\n        tc = tskit.TableCollection(1)\n        tc.metadata_schema = self.metadata_schema\n        with pytest.raises(exceptions.MetadataValidationError):\n            tc.metadata = metadata\n        assert tc._ll_tables.metadata == b\"\"\n\n\ndef add_tc_metadata(tc):\n    tc.metadata_schema = tskit.MetadataSchema(\n        {\n            \"codec\": \"struct\",\n            \"type\": \"object\",\n            \"properties\": {\"top-level\": {\"type\": \"string\", \"binaryFormat\": \"50p\"}},\n        }\n    )\n    tc.metadata = {\"top-level\": \"top-level-metadata\"}\n    for table in tskit.TABLE_NAMES:\n        t = getattr(tc, table)\n        if hasattr(t, \"metadata_schema\"):\n            t.packset_metadata([f\"{table}-{i:10}\".encode() for i in range(t.num_rows)])\n            t.metadata_schema = tskit.MetadataSchema(\n                {\n                    \"codec\": \"struct\",\n                    \"type\": \"object\",\n                    \"properties\": {table: {\"type\": \"string\", \"binaryFormat\": \"16p\"}},\n                }\n            )\n\n\nclass TestTableCollectionPickle:\n    \"\"\"\n    Tests that we can round-trip table collections through pickle.\n    \"\"\"\n\n    def verify(self, tables):\n        add_tc_metadata(tables)\n        other_tables = pickle.loads(pickle.dumps(tables))\n        tables.assert_equals(other_tables)\n\n    def test_simple_simulation(self):\n        ts = msprime.simulate(2, random_seed=1)\n        self.verify(ts.dump_tables())\n\n    def test_simulation_populations(self):\n        ts = msprime.simulate(\n            population_configurations=[\n                msprime.PopulationConfiguration(10),\n                msprime.PopulationConfiguration(10),\n            ],\n            migration_matrix=[[0, 1], [1, 0]],\n            record_migrations=True,\n            random_seed=1,\n        )\n        self.verify(ts.dump_tables())\n\n    def test_simulation_sites(self):\n        ts = msprime.simulate(12, random_seed=1, mutation_rate=5)\n        assert ts.num_sites > 1\n        self.verify(ts.dump_tables())\n\n    def test_simulation_individuals(self):\n        ts = msprime.simulate(100, random_seed=1)\n        ts = tsutil.insert_random_ploidy_individuals(ts, seed=1)\n        assert ts.num_individuals > 1\n        self.verify(ts.dump_tables())\n\n    def test_empty_tables(self):\n        self.verify(tskit.TableCollection())\n\n\nclass TestDeduplicateSites:\n    \"\"\"\n    Tests for the TableCollection.deduplicate_sites method.\n    \"\"\"\n\n    def test_empty(self):\n        tables = tskit.TableCollection(1)\n        tables.deduplicate_sites()\n        tables.assert_equals(tskit.TableCollection(1))\n\n    def test_unsorted(self):\n        tables = msprime.simulate(10, mutation_rate=1, random_seed=1).dump_tables()\n        assert len(tables.sites) > 0\n        position = tables.sites.position\n        for _ in range(len(position) - 1):\n            position = np.roll(position, 1)\n            tables.sites.set_columns(\n                position=position,\n                ancestral_state=tables.sites.ancestral_state,\n                ancestral_state_offset=tables.sites.ancestral_state_offset,\n            )\n            with pytest.raises(_tskit.LibraryError):\n                tables.deduplicate_sites()\n\n    def test_bad_position(self):\n        for bad_position in [-1, -0.001]:\n            tables = tskit.TableCollection()\n            tables.sites.add_row(bad_position, \"0\")\n            with pytest.raises(_tskit.LibraryError):\n                tables.deduplicate_sites()\n\n    def test_no_effect(self):\n        t1 = msprime.simulate(10, mutation_rate=1, random_seed=1).dump_tables()\n        t2 = msprime.simulate(10, mutation_rate=1, random_seed=1).dump_tables()\n        assert len(t1.sites) > 0\n        t1.deduplicate_sites()\n        t1.assert_equals(t2, ignore_provenance=True)\n\n    def test_same_sites(self):\n        t1 = msprime.simulate(10, mutation_rate=1, random_seed=1).dump_tables()\n        t2 = msprime.simulate(10, mutation_rate=1, random_seed=1).dump_tables()\n        assert len(t1.sites) > 0\n        t1.sites.append_columns(\n            position=t1.sites.position,\n            ancestral_state=t1.sites.ancestral_state,\n            ancestral_state_offset=t1.sites.ancestral_state_offset,\n        )\n        assert len(t1.sites) == 2 * len(t2.sites)\n        t1.sort()\n        t1.deduplicate_sites()\n        t1.assert_equals(t2, ignore_provenance=True)\n\n    def test_order_maintained(self):\n        t1 = tskit.TableCollection(1)\n        t1.sites.add_row(position=0, ancestral_state=\"first\")\n        t1.sites.add_row(position=0, ancestral_state=\"second\")\n        t1.deduplicate_sites()\n        assert len(t1.sites) == 1\n        assert t1.sites.ancestral_state.tobytes() == b\"first\"\n\n    def test_multichar_ancestral_state(self):\n        ts = msprime.simulate(8, random_seed=3, mutation_rate=1)\n        assert ts.num_sites > 2\n        tables = ts.dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        for site in ts.sites():\n            site_id = tables.sites.add_row(\n                position=site.position, ancestral_state=\"A\" * site.id\n            )\n            tables.sites.add_row(position=site.position, ancestral_state=\"0\")\n            for mutation in site.mutations:\n                tables.mutations.add_row(\n                    site=site_id, node=mutation.node, derived_state=\"T\" * site.id\n                )\n        tables.deduplicate_sites()\n        new_ts = tables.tree_sequence()\n        assert new_ts.num_sites == ts.num_sites\n        for site in new_ts.sites():\n            assert site.ancestral_state == site.id * \"A\"\n\n    def test_multichar_metadata(self):\n        ts = msprime.simulate(8, random_seed=3, mutation_rate=1)\n        assert ts.num_sites > 2\n        tables = ts.dump_tables()\n        tables.sites.clear()\n        tables.mutations.clear()\n        for site in ts.sites():\n            site_id = tables.sites.add_row(\n                position=site.position, ancestral_state=\"0\", metadata=b\"A\" * site.id\n            )\n            tables.sites.add_row(position=site.position, ancestral_state=\"0\")\n            for mutation in site.mutations:\n                tables.mutations.add_row(\n                    site=site_id,\n                    node=mutation.node,\n                    derived_state=\"1\",\n                    metadata=b\"T\" * site.id,\n                )\n        tables.deduplicate_sites()\n        new_ts = tables.tree_sequence()\n        assert new_ts.num_sites == ts.num_sites\n        for site in new_ts.sites():\n            assert site.metadata == site.id * b\"A\"\n\n\nclass TestMutableBaseTable:\n    \"\"\"\n    Tests of the table superclass.\n    \"\"\"\n\n    def test_set_columns_not_implemented(self):\n        t = tskit.MutableBaseTable(None, None)\n        with pytest.raises(NotImplementedError):\n            t.set_columns()\n\n    def test_replace_with(self, ts_fixture):\n        # Although replace_with is a MutableBaseTable method, it is simpler to test it\n        # on the subclasses directly, as some differ e.g. in having metadata schemas\n        original_tables = ts_fixture.dump_tables()\n        original_tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json()\n        new_tables = ts_fixture.dump_tables()\n        new_tables.clear(clear_provenance=True, clear_metadata_schemas=True)\n\n        # write all the data back in again\n        for name, table in new_tables.table_name_map.items():\n            new_table = getattr(original_tables, name)\n            table.replace_with(new_table)\n        new_tables.assert_equals(original_tables)\n\n\nclass TestSubsetTables:\n    \"\"\"\n    Tests for the TableCollection.subset method.\n    \"\"\"\n\n    def get_msprime_example(self, sample_size=10, seed=1234):\n        M = [[0.0, 0.1], [1.0, 0.0]]\n        population_configurations = [\n            msprime.PopulationConfiguration(sample_size=sample_size),\n            msprime.PopulationConfiguration(sample_size=sample_size),\n        ]\n        ts = msprime.simulate(\n            population_configurations=population_configurations,\n            migration_matrix=M,\n            length=2e5,\n            recombination_rate=1e-8,\n            mutation_rate=1e-7,\n            record_migrations=False,\n            random_seed=seed,\n        )\n        # adding metadata and locations\n        ts = tsutil.add_random_metadata(ts, seed)\n        ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=1)\n        return ts.dump_tables()\n\n    def get_wf_example(self, N=5, ngens=2, seed=1249):\n        tables = wf.wf_sim(N, N, num_pops=2, seed=seed)\n        tables.sort()\n        ts = tables.tree_sequence()\n        ts = tsutil.jukes_cantor(ts, 1, 10, seed=seed)\n        ts = tsutil.add_random_metadata(ts, seed)\n        ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=2)\n        return ts.dump_tables()\n\n    def get_examples(self, seed):\n        yield self.get_msprime_example(seed=seed)\n        yield self.get_wf_example(seed=seed)\n\n    def verify_subset_equality(self, tables, nodes):\n        for rp in [True, False]:\n            for ru in [True, False]:\n                py_sub = tables.copy()\n                tsk_sub = tables.copy()\n                tsutil.py_subset(\n                    py_sub,\n                    nodes,\n                    record_provenance=False,\n                    reorder_populations=rp,\n                    remove_unreferenced=ru,\n                )\n                tsk_sub.subset(\n                    nodes,\n                    record_provenance=False,\n                    reorder_populations=rp,\n                    remove_unreferenced=ru,\n                )\n                py_sub.assert_equals(tsk_sub)\n\n    def verify_subset(self, tables, nodes):\n        self.verify_subset_equality(tables, nodes)\n        subset = tables.copy()\n        subset.subset(nodes, record_provenance=False)\n        # adding one so the last element always maps to NULL (-1 -> -1)\n        node_map = np.repeat(tskit.NULL, tables.nodes.num_rows + 1)\n        indivs = []\n        pops = []\n        for k, n in enumerate(nodes):\n            node_map[n] = k\n            ind = tables.nodes[n].individual\n            pop = tables.nodes[n].population\n            if ind not in indivs and ind != tskit.NULL:\n                indivs.append(ind)\n            if pop not in pops and pop != tskit.NULL:\n                pops.append(pop)\n        indivs.sort()  # keep individuals in the same order\n        ind_map = np.repeat(tskit.NULL, tables.individuals.num_rows + 1)\n        ind_map[indivs] = np.arange(len(indivs), dtype=\"int32\")\n        pop_map = np.repeat(tskit.NULL, tables.populations.num_rows + 1)\n        pop_map[pops] = np.arange(len(pops), dtype=\"int32\")\n        assert subset.nodes.num_rows == len(nodes)\n        for k, n in zip(nodes, subset.nodes):\n            nn = tables.nodes[k]\n            assert nn.time == n.time\n            assert nn.flags == n.flags\n            assert nn.metadata == n.metadata\n            assert ind_map[nn.individual] == n.individual\n            assert pop_map[nn.population] == n.population\n        assert subset.individuals.num_rows == len(indivs)\n        for k, i in zip(indivs, subset.individuals):\n            ii = tables.individuals[k]\n            assert np.all(np.equal(ii.location, i.location))\n            assert ii.metadata == i.metadata\n            sub_parents = []\n            for p in ii.parents:\n                if p == tskit.NULL or ind_map[p] != tskit.NULL:\n                    sub_parents.append(ind_map[p])\n            assert np.all(np.equal(sub_parents, i.parents))\n        assert subset.populations.num_rows == len(pops)\n        for k, p in zip(pops, subset.populations):\n            pp = tables.populations[k]\n            assert pp == p\n        # subset can reorder the edges: we need to check we have the same set\n        edges = {\n            e.replace(parent=node_map[e.parent], child=node_map[e.child])\n            for e in tables.edges\n            if e.parent in nodes and e.child in nodes\n        }\n        assert subset.edges.num_rows == len(edges)\n        for e in edges:\n            assert e in subset.edges\n        muts = []\n        sites = []\n        for k, m in enumerate(tables.mutations):\n            if m.node in nodes:\n                muts.append(k)\n                if m.site not in sites:\n                    sites.append(m.site)\n        site_map = np.repeat(-1, tables.sites.num_rows)\n        site_map[sites] = np.arange(len(sites), dtype=\"int32\")\n        mutation_map = np.repeat(tskit.NULL, tables.mutations.num_rows + 1)\n        mutation_map[muts] = np.arange(len(muts), dtype=\"int32\")\n        assert subset.sites.num_rows == len(sites)\n        for k, s in zip(sites, subset.sites):\n            ss = tables.sites[k]\n            assert ss == s\n\n        # subset can reorder the mutations: we need to check we have the same set\n        def normalize_time(time):\n            return -42.0 if tskit.is_unknown_time(time) else time\n\n        expected_mutations = {\n            (\n                site_map[tables.mutations[k].site],\n                node_map[tables.mutations[k].node],\n                normalize_time(tables.mutations[k].time),\n                tables.mutations[k].metadata,\n            )\n            for k in muts\n        }\n        actual_mutations = {\n            (m.site, m.node, normalize_time(m.time), m.metadata)\n            for m in subset.mutations\n        }\n        assert len(expected_mutations) == len(actual_mutations)\n        assert expected_mutations == actual_mutations\n        assert tables.migrations == subset.migrations\n        assert tables.provenances == subset.provenances\n\n    def test_ts_subset(self):\n        nodes = np.array([0, 1])\n        for tables in self.get_examples(83592):\n            ts = tables.tree_sequence()\n            tables2 = ts.subset(nodes, record_provenance=False).dump_tables()\n            tables.subset(nodes, record_provenance=False)\n            tables.assert_equals(tables2)\n\n    def test_subset_all(self):\n        # subsetting to everything shouldn't change things except the\n        # individual and population ids in the node tables if there are gaps\n        for tables in self.get_examples(123583):\n            tables.sort()\n            tables2 = tables.copy()\n            tables2.subset(np.arange(tables.nodes.num_rows))\n            tables.individuals.clear()\n            tables2.individuals.clear()\n            assert np.all(tables.nodes.time == tables2.nodes.time)\n            assert np.all(tables.nodes.flags == tables2.nodes.flags)\n            assert np.all(tables.nodes.population == tables2.nodes.population)\n            assert np.all(tables.nodes.metadata == tables2.nodes.metadata)\n            tables.nodes.clear()\n            tables2.nodes.clear()\n            tables.assert_equals(tables2, ignore_provenance=True)\n\n    def test_shuffled_tables(self):\n        # subset should work on even unsorted tables\n        # (tested more thoroughly in TestSortTables)\n        for tables in self.get_examples(95521):\n            tables2 = tables.copy()\n            tsutil.shuffle_tables(tables2, 7000)\n            tables2.subset(\n                np.arange(tables.nodes.num_rows),\n                remove_unreferenced=False,\n            )\n            assert tables.nodes.num_rows == tables2.nodes.num_rows\n            assert tables.individuals.num_rows == tables2.individuals.num_rows\n            assert tables.populations.num_rows == tables2.populations.num_rows\n            assert tables.edges.num_rows == tables2.edges.num_rows\n            assert tables.sites.num_rows == tables2.sites.num_rows\n            assert tables.mutations.num_rows == tables2.mutations.num_rows\n            tables2 = tables.copy()\n            tsutil.shuffle_tables(tables2, 7001)\n            tables2.subset([])\n            assert tables2.nodes.num_rows == 0\n            assert tables2.individuals.num_rows == 0\n            assert tables2.populations.num_rows == 0\n            assert tables2.edges.num_rows == 0\n            assert tables2.sites.num_rows == 0\n            assert tables2.mutations.num_rows == 0\n\n    def test_doesnt_reorder_individuals(self):\n        tables = wf.wf_sim(N=5, ngens=5, num_pops=2, seed=123)\n        tsutil.shuffle_tables(tables, 7000)\n        tables2 = tables.copy()\n        tables2.subset(np.arange(tables.nodes.num_rows))\n        assert tables.individuals == tables2.individuals\n\n    def test_random_subsets(self):\n        rng = np.random.default_rng(1542)\n        for tables in self.get_examples(9412):\n            for n in [2, tables.nodes.num_rows - 10]:\n                nodes = rng.choice(np.arange(tables.nodes.num_rows), n, replace=False)\n                self.verify_subset(tables, nodes)\n\n    def test_empty_nodes(self):\n        for tables in self.get_examples(8724):\n            subset = tables.copy()\n            subset.subset(np.array([]), record_provenance=False)\n            assert subset.nodes.num_rows == 0\n            assert subset.edges.num_rows == 0\n            assert subset.populations.num_rows == 0\n            assert subset.individuals.num_rows == 0\n            assert subset.migrations.num_rows == 0\n            assert subset.sites.num_rows == 0\n            assert subset.mutations.num_rows == 0\n            assert subset.provenances == tables.provenances\n\n    def test_no_remove_unreferenced(self):\n        tables = tskit.TableCollection(sequence_length=10)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.nodes.add_row(time=1)\n        tables.edges.add_row(parent=1, child=0, left=0, right=10)\n        for k in range(5):\n            tables.sites.add_row(position=k, ancestral_state=str(k))\n        # these are all unused, so should remain unchanged\n        for k in range(5):\n            tables.populations.add_row(metadata=str(k).encode())\n        for k in range(5):\n            tables.individuals.add_row(metadata=str(k).encode())\n        sub_tables = tables.copy()\n        sub_tables.subset([], remove_unreferenced=False)\n        assert tables.sites == sub_tables.sites\n        assert tables.populations == sub_tables.populations\n        assert tables.individuals == sub_tables.individuals\n        ts = tables.tree_sequence()\n        sub_tables = ts.subset([], remove_unreferenced=False).dump_tables()\n        assert tables.sites == sub_tables.sites\n        assert tables.populations == sub_tables.populations\n        assert tables.individuals == sub_tables.individuals\n\n    def test_subset_reverse_all_nodes(self):\n        ts = tskit.Tree.generate_comb(5).tree_sequence\n        assert np.all(ts.samples() == np.arange(ts.num_samples))\n        tables = ts.dump_tables()\n        flipped_ids = np.flip(np.arange(tables.nodes.num_rows))\n        self.verify_subset(tables, flipped_ids)\n        # Now test the topology is the same\n        tables.subset(flipped_ids)\n        new_ts = tables.tree_sequence()\n        assert set(new_ts.samples()) == set(flipped_ids[np.arange(ts.num_samples)])\n        r1 = ts.first().rank()\n        r2 = new_ts.first().rank()\n        assert r1.shape == r2.shape\n        assert r1.label != r2.label\n\n    def test_subset_reverse_internal_nodes(self):\n        ts = tskit.Tree.generate_balanced(5).tree_sequence\n        internal_nodes = np.ones(ts.num_nodes, dtype=bool)\n        internal_nodes[ts.samples()] = False\n        tables = ts.dump_tables()\n        node_ids = np.arange(tables.nodes.num_rows)\n        node_ids[internal_nodes] = np.flip(node_ids[internal_nodes])\n        self.verify_subset(tables, node_ids)\n        # Now test the topology and the sample labels are the same\n        tables.subset(node_ids)\n        new_ts = tables.tree_sequence()\n        assert np.any(new_ts.nodes_time != ts.nodes_time)\n        assert new_ts.first().rank() == ts.first().rank()\n\n\nclass TestUnionTables(unittest.TestCase):\n    \"\"\"\n    Tests for the TableCollection.union method\n    \"\"\"\n\n    def get_msprime_example(self, sample_size, T, seed):\n        # we assume after the split the ts are completely independent\n        M = [[0, 0], [0, 0]]\n        population_configurations = [\n            msprime.PopulationConfiguration(sample_size=sample_size),\n            msprime.PopulationConfiguration(sample_size=sample_size),\n        ]\n        demographic_events = [\n            msprime.CensusEvent(time=T),\n            msprime.MassMigration(T, source=1, dest=0, proportion=1),\n        ]\n        ts = msprime.simulate(\n            population_configurations=population_configurations,\n            demographic_events=demographic_events,\n            migration_matrix=M,\n            length=2e5,\n            recombination_rate=1e-8,\n            mutation_rate=1e-7,\n            record_migrations=False,\n            random_seed=seed,\n        )\n        ts = tsutil.add_random_metadata(ts, seed)\n        ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=1, samples_only=True)\n        return ts\n\n    def get_wf_example(self, N, T, seed):\n        twopop_tables = wf.wf_sim(N, T, num_pops=2, seed=seed, deep_history=True)\n        twopop_tables.sort()\n        ts = twopop_tables.tree_sequence()\n        ts = ts.simplify()\n        ts = tsutil.jukes_cantor(ts, 1, 10, seed=seed)\n        ts = tsutil.add_random_metadata(ts, seed)\n        ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=2, samples_only=True)\n        return ts\n\n    def split_example(self, ts, T):\n        # splitting two pop ts *with no migration* into disjoint ts\n        shared_nodes = [n.id for n in ts.nodes() if n.time >= T]\n        pop1 = list(ts.samples(population=0))\n        pop2 = list(ts.samples(population=1))\n        tables1 = ts.simplify(shared_nodes + pop1, record_provenance=False).dump_tables()\n        tables2 = ts.simplify(shared_nodes + pop2, record_provenance=False).dump_tables()\n        node_mapping = [\n            i if i < len(shared_nodes) else tskit.NULL\n            for i in range(tables2.nodes.num_rows)\n        ]\n        # adding some metadata to one of the tables\n        # union should disregard differences in metadata\n        tables1.metadata_schema = tskit.MetadataSchema(\n            {\"codec\": \"json\", \"type\": \"object\"}\n        )\n        tables1.metadata = {\"hello\": \"world\"}\n        return tables1, tables2, node_mapping\n\n    def verify_union(self, tables, other, node_mapping, add_populations=True):\n        self.verify_union_consistency(tables, other, node_mapping)\n        self.verify_union_equality(\n            tables, other, node_mapping, add_populations=add_populations\n        )\n\n    def verify_union_equality(self, tables, other, node_mapping, add_populations=True):\n        uni1 = tables.copy()\n        uni2 = tables.copy()\n        uni1.union(\n            other,\n            node_mapping,\n            record_provenance=False,\n            add_populations=add_populations,\n        )\n        tsutil.py_union(\n            uni2,\n            other,\n            node_mapping,\n            record_provenance=False,\n            add_populations=add_populations,\n        )\n        uni1.assert_equals(uni2, ignore_provenance=True)\n        # verifying that subsetting to original nodes return the same table\n        orig_nodes = [j for i, j in enumerate(node_mapping) if j != tskit.NULL]\n        uni1.subset(orig_nodes)\n        # subsetting tables just to make sure order is the same\n        tables.subset(orig_nodes)\n        uni1.assert_equals(tables, ignore_provenance=True)\n\n    def verify_union_consistency(self, tables, other, node_mapping):\n        ts1 = tsutil.insert_unique_metadata(tables)\n        ts2 = tsutil.insert_unique_metadata(other, offset=1000000)\n        tsu = ts1.union(ts2, node_mapping, check_shared_equality=False)\n        mapu = tsutil.metadata_map(tsu)\n        for j, n1 in enumerate(ts1.nodes()):\n            # nodes in ts1 should be preserved, in the same order\n            nu = tsu.node(j)\n            assert n1.metadata == nu.metadata\n            if n1.individual == tskit.NULL:\n                assert nu.individual == tskit.NULL\n            else:\n                assert (\n                    ts1.individual(n1.individual).metadata\n                    == tsu.individual(nu.individual).metadata\n                )\n        for j, k in enumerate(node_mapping):\n            # nodes in ts2 should match if they are not in node mapping\n            if k == tskit.NULL:\n                n2 = ts2.node(j)\n                md2 = n2.metadata\n                assert md2 in mapu[\"nodes\"]\n                nu = tsu.node(mapu[\"nodes\"][md2])\n                if n2.individual == tskit.NULL:\n                    assert nu.individual == tskit.NULL\n                else:\n                    assert (\n                        ts2.individual(n2.individual).metadata\n                        == tsu.individual(nu.individual).metadata\n                    )\n        for e1 in ts1.edges():\n            # relationships between nodes in ts1 should be preserved\n            p1, c1 = e1.parent, e1.child\n            assert e1.metadata in mapu[\"edges\"]\n            eu = tsu.edge(mapu[\"edges\"][e1.metadata])\n            pu, cu = eu.parent, eu.child\n            assert ts1.node(p1).metadata == tsu.node(pu).metadata\n            assert ts1.node(c1).metadata == tsu.node(cu).metadata\n        for e2 in ts2.edges():\n            # relationships between nodes in ts2 should be preserved\n            # if both are new nodes\n            p2, c2 = e2.parent, e2.child\n            if node_mapping[p2] == tskit.NULL and node_mapping[c2] == tskit.NULL:\n                assert e2.metadata in mapu[\"edges\"]\n                eu = tsu.edge(mapu[\"edges\"][e2.metadata])\n                pu, cu = eu.parent, eu.child\n                assert ts2.node(p2).metadata == tsu.node(pu).metadata\n                assert ts2.node(c2).metadata == tsu.node(cu).metadata\n\n        for i1 in ts1.individuals():\n            # individuals in ts1 should be preserved\n            assert i1.metadata in mapu[\"individuals\"]\n            iu = tsu.individual(mapu[\"individuals\"][i1.metadata])\n            assert len(i1.parents) == len(iu.parents)\n            for p1, pu in zip(i1.parents, iu.parents):\n                if p1 == tskit.NULL:\n                    assert pu == tskit.NULL\n                else:\n                    assert ts1.individual(p1).metadata == tsu.individual(pu).metadata\n        # how should individual metadata from ts2 map to ts1\n        # and only individuals without shared nodes should be added\n        indivs21 = {}\n        new_indivs2 = [True for _ in ts2.individuals()]\n        for j, k in enumerate(node_mapping):\n            n = ts2.node(j)\n            if n.individual != tskit.NULL:\n                i2 = ts2.individual(n.individual)\n                if k == tskit.NULL:\n                    indivs21[i2.metadata] = i2.metadata\n                else:\n                    new_indivs2[n.individual] = False\n                    i1 = ts1.individual(ts1.node(k).individual)\n                    if i2.metadata in indivs21:\n                        assert indivs21[i2.metadata] == i1.metadata\n                    else:\n                        indivs21[i2.metadata] = i1.metadata\n        for i2 in ts2.individuals():\n            if new_indivs2[i2.id]:\n                assert i2.metadata in mapu[\"individuals\"]\n                iu = tsu.individual(mapu[\"individuals\"][i2.metadata])\n                assert np.sum(i2.parents == tskit.NULL) == np.sum(\n                    iu.parents == tskit.NULL\n                )\n                md2 = [ts2.individual(p).metadata for p in i2.parents if p != tskit.NULL]\n                md2u = [indivs21[md] for md in md2]\n                mdu = [tsu.individual(p).metadata for p in iu.parents if p != tskit.NULL]\n                assert set(md2u) == set(mdu)\n            else:\n                # the individual *should* be there, but by a different name\n                assert i2.metadata not in mapu[\"individuals\"]\n                assert indivs21[i2.metadata] in mapu[\"individuals\"]\n        for m1 in ts1.mutations():\n            # all mutations in ts1 should be present\n            assert m1.metadata in mapu[\"mutations\"]\n            mu = tsu.mutation(mapu[\"mutations\"][m1.metadata])\n            assert m1.derived_state == mu.derived_state\n            assert m1.node == mu.node\n            if tskit.is_unknown_time(m1.time):\n                assert tskit.is_unknown_time(mu.time)\n            else:\n                assert m1.time == mu.time\n            assert ts1.site(m1.site).position == tsu.site(mu.site).position\n        for m2 in ts2.mutations():\n            # and those in ts2 if their node has been added\n            if node_mapping[m2.node] == tskit.NULL:\n                assert m2.metadata in mapu[\"mutations\"]\n                mu = tsu.mutation(mapu[\"mutations\"][m2.metadata])\n                assert m2.derived_state == mu.derived_state\n                assert ts2.node(m2.node).metadata == tsu.node(mu.node).metadata\n                if tskit.is_unknown_time(m2.time):\n                    assert tskit.is_unknown_time(mu.time)\n                else:\n                    assert m2.time == mu.time\n                assert ts2.site(m2.site).position == tsu.site(mu.site).position\n        for s1 in ts1.sites():\n            assert s1.metadata in mapu[\"sites\"]\n            su = tsu.site(mapu[\"sites\"][s1.metadata])\n            assert s1.position == su.position\n            assert s1.ancestral_state == su.ancestral_state\n        for s2 in ts2.sites():\n            if s2.position not in ts1.tables.sites.position:\n                assert s2.metadata in mapu[\"sites\"]\n                su = tsu.site(mapu[\"sites\"][s2.metadata])\n                assert s2.position == su.position\n                assert s2.ancestral_state == su.ancestral_state\n        # check mutation parents\n        expected_tables = tsu.dump_tables()\n        tables_union = expected_tables.copy()\n        tables_union.compute_mutation_parents()\n        assert tables_union.mutations == expected_tables.mutations\n\n    def test_union_empty(self):\n        tables = self.get_msprime_example(sample_size=3, T=2, seed=9328).dump_tables()\n        tables.sort()\n        empty_tables = tables.copy()\n        for table in empty_tables.table_name_map.keys():\n            getattr(empty_tables, table).clear()\n        uni = tables.copy()\n        uni.union(empty_tables, [])\n        tables.assert_equals(uni, ignore_provenance=True)\n\n    def test_contradictory_children(self):\n        # these are identical\n        ts1 = tskit.Tree.generate_comb(2, span=2).tree_sequence\n        ts2 = tskit.Tree.generate_comb(2, span=2).tree_sequence\n        with pytest.raises(_tskit.LibraryError, match=\"contradictory children\"):\n            _ = ts1.union(ts2, node_mapping=[0, 1, -1])\n\n    def test_noshared_example(self):\n        ts1 = self.get_msprime_example(sample_size=3, T=2, seed=9328)\n        ts2 = self.get_msprime_example(sample_size=3, T=2, seed=2125)\n        node_mapping = np.full(ts2.num_nodes, tskit.NULL, dtype=\"int32\")\n        uni1 = ts1.union(ts2, node_mapping, record_provenance=False)\n        uni2_tables = ts1.dump_tables()\n        tsutil.py_union(\n            uni2_tables, ts2.dump_tables(), node_mapping, record_provenance=False\n        )\n        assert uni1.tables == uni2_tables\n\n    def test_all_shared_example(self):\n        tables = self.get_wf_example(N=5, T=5, seed=11349).dump_tables()\n        tables.sort()\n        uni = tables.copy()\n        node_mapping = np.arange(tables.nodes.num_rows)\n        uni.union(tables, node_mapping, record_provenance=False)\n        uni.assert_equals(tables)\n\n    @pytest.mark.slow\n    def test_no_add_pop(self):\n        self.verify_union(\n            *self.split_example(self.get_msprime_example(10, 10, seed=135), 10),\n            add_populations=False,\n        )\n        self.verify_union(\n            *self.split_example(self.get_wf_example(10, 10, seed=157), 10),\n            add_populations=False,\n        )\n\n    def test_provenance(self):\n        tables, other, node_mapping = self.split_example(\n            self.get_msprime_example(5, T=2, seed=928), 2\n        )\n        tables_copy = tables.copy()\n        tables.union(other, node_mapping)\n        uni_other_dict = json.loads(tables.provenances[-1].record)[\"parameters\"][\"other\"]\n        recovered_prov_table = tskit.ProvenanceTable()\n        assert len(uni_other_dict[\"timestamp\"]) == len(uni_other_dict[\"record\"])\n        for timestamp, record in zip(\n            uni_other_dict[\"timestamp\"], uni_other_dict[\"record\"]\n        ):\n            recovered_prov_table.add_row(record, timestamp)\n        assert recovered_prov_table == other.provenances\n        tables.provenances.truncate(tables.provenances.num_rows - 1)\n        assert tables.provenances == tables_copy.provenances\n\n    @pytest.mark.slow\n    def test_examples(self):\n        for N in [2, 4, 5]:\n            for T in [2, 5, 20]:\n                for mut_times in [True, False]:\n                    with self.subTest(N=N, T=T):\n                        ts = self.get_msprime_example(N, T=T, seed=888)\n                        if mut_times:\n                            tables = ts.dump_tables()\n                            tables.compute_mutation_times()\n                            ts = tables.tree_sequence()\n                        self.verify_union(*self.split_example(ts, T))\n                        ts = self.get_wf_example(N=N, T=T, seed=827)\n                        if mut_times:\n                            tables = ts.dump_tables()\n                            tables.compute_mutation_times()\n                            ts = tables.tree_sequence()\n                        self.verify_union(*self.split_example(ts, T))\n\n    def test_split_and_rejoin(self):\n        ts = self.get_msprime_example(5, T=2, seed=928)\n        cutpoints = np.array([0, 0.25, 0.5, 0.75, 1]) * ts.sequence_length\n        tables1 = ts.dump_tables()\n        tables1.delete_intervals([cutpoints[0:2], cutpoints[2:4]], simplify=False)\n        tables2 = ts.dump_tables()\n        tables2.delete_intervals([cutpoints[1:3], cutpoints[3:]], simplify=False)\n        tables1.union(\n            tables2,\n            all_edges=True,\n            all_mutations=True,\n            node_mapping=np.arange(ts.num_nodes),\n            check_shared_equality=False,\n        )\n        tables1.edges.squash()\n        tables1.sort()\n        tables1.assert_equals(ts.tables, ignore_provenance=True)\n\n    def test_both_empty(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        t1 = tables.copy()\n        t2 = tables.copy()\n        t1.union(t2, node_mapping=np.arange(0), all_edges=True, all_mutations=True)\n        t1.assert_equals(tables, ignore_provenance=True)\n\n    def test_one_empty(self):\n        ts = self.get_msprime_example(5, T=2, seed=928)\n        ts = ts.simplify()  # the example has a load of unreferenced individuals\n        tables = ts.dump_tables()\n        empty = tskit.TableCollection(sequence_length=tables.sequence_length)\n        empty.time_units = tables.time_units\n\n        # union with empty should be no-op\n        tables.union(\n            empty, node_mapping=np.arange(0), all_edges=True, all_mutations=True\n        )\n        tables.assert_equals(ts.dump_tables(), ignore_provenance=True)\n\n        # empty union with tables should be tables\n        empty.union(\n            tables,\n            node_mapping=np.full(tables.nodes.num_rows, tskit.NULL),\n            all_edges=True,\n            all_mutations=True,\n            check_shared_equality=False,\n        )\n        empty.assert_equals(tables, ignore_provenance=True)\n\n    def test_reciprocal_empty(self):\n        # reciprocally add mutations from one table and edges from the other\n        edges_table = tskit.Tree.generate_comb(6, span=6).tree_sequence.dump_tables()\n        muts_table = tskit.TableCollection(sequence_length=6)\n        muts_table.nodes.replace_with(edges_table.nodes)  # same nodes, no edges\n        for j in range(0, 6):\n            site_id = muts_table.sites.add_row(position=j, ancestral_state=\"0\")\n            if j % 2 == 0:\n                # Some sites empty\n                muts_table.mutations.add_row(site=site_id, node=j, derived_state=\"1\")\n        identity_map = np.arange(len(muts_table.nodes), dtype=\"int32\")\n        params = {\"node_mapping\": identity_map, \"check_shared_equality\": False}\n\n        test_table = edges_table.copy()\n        test_table.union(muts_table, **params, all_edges=True)  # null op\n        assert len(test_table.sites) == 0\n        assert len(test_table.mutations) == 0\n        test_table.union(muts_table, **params, all_mutations=True)\n        assert test_table.sites == muts_table.sites\n        assert test_table.mutations == muts_table.mutations\n\n        muts_table.union(edges_table, **params, all_mutations=True)  # null op\n        assert len(muts_table.edges) == 0\n        muts_table.union(edges_table, **params, all_edges=True)\n        assert muts_table.edges == edges_table.edges\n\n        muts_table.assert_equals(test_table, ignore_provenance=True)\n\n\nclass TestTableSetitemMetadata:\n    @pytest.mark.parametrize(\"table_name\", tskit.TABLE_NAMES)\n    def test_setitem_metadata(self, ts_fixture, table_name):\n        table = getattr(ts_fixture.dump_tables(), table_name)\n        if hasattr(table, \"metadata_schema\"):\n            assert table.metadata_schema == tskit.MetadataSchema({\"codec\": \"json\"})\n            assert table[0].metadata != table[1].metadata\n            table[0] = table[1]\n            assert table[0] == table[1]\n\n\ndef keep_rows_definition(table, keep):\n    id_map = np.full(len(table), -1, np.int32)\n    copy = table.copy()\n    table.clear()\n    for j, row in enumerate(copy):\n        if keep[j]:\n            id_map[j] = len(table)\n            table.append(row)\n    return id_map\n\n\nclass KeepRowsBaseTest:\n    # Simple tests assuming that rows aren't self-referential\n\n    def test_keep_all(self, ts_fixture):\n        table = self.get_table(ts_fixture)\n        before = table.copy()\n        table.keep_rows(np.ones(len(table), dtype=bool))\n        assert table.equals(before)\n\n    def test_keep_none(self, ts_fixture):\n        table = self.get_table(ts_fixture)\n        table.keep_rows(np.zeros(len(table), dtype=bool))\n        assert len(table) == 0\n\n    def check_keep_rows(self, table, keep):\n        copy = table.copy()\n        id_map1 = keep_rows_definition(copy, keep)\n        id_map2 = table.keep_rows(keep)\n        table.assert_equals(copy)\n        np.testing.assert_array_equal(id_map1, id_map2)\n\n    def test_keep_even(self, ts_fixture):\n        table = self.get_table(ts_fixture)\n        keep = np.ones(len(table), dtype=bool)\n        keep[1::2] = 0\n        self.check_keep_rows(table, keep)\n\n    def test_keep_odd(self, ts_fixture):\n        table = self.get_table(ts_fixture)\n        keep = np.ones(len(table), dtype=bool)\n        keep[::2] = 0\n        self.check_keep_rows(table, keep)\n\n    def test_keep_first(self, ts_fixture):\n        table = self.get_table(ts_fixture)\n        keep = np.zeros(len(table), dtype=bool)\n        keep[0] = 1\n        self.check_keep_rows(table, keep)\n        assert len(table) == 1\n\n    def test_keep_last(self, ts_fixture):\n        table = self.get_table(ts_fixture)\n        keep = np.zeros(len(table), dtype=bool)\n        keep[-1] = 1\n        self.check_keep_rows(table, keep)\n        assert len(table) == 1\n\n    @pytest.mark.parametrize(\"dtype\", [np.int32, int, np.float32])\n    def test_bad_array_dtype(self, ts_fixture, dtype):\n        table = self.get_table(ts_fixture)\n        keep = np.zeros(len(table), dtype=dtype)\n        with pytest.raises(TypeError, match=\"Cannot cast array\"):\n            table.keep_rows(keep)\n\n    @pytest.mark.parametrize(\"truthy\", [False, 0, \"\", None])\n    def test_python_falsey_input(self, ts_fixture, truthy):\n        table = self.get_table(ts_fixture)\n        keep = [truthy] * len(table)\n        self.check_keep_rows(table, keep)\n        assert len(table) == 0\n\n    @pytest.mark.parametrize(\"truthy\", [True, 1, \"string\", 1e-6])\n    def test_python_truey_input(self, ts_fixture, truthy):\n        table = self.get_table(ts_fixture)\n        n = len(table)\n        keep = [truthy] * len(table)\n        self.check_keep_rows(table, keep)\n        assert len(table) == n\n\n    @pytest.mark.parametrize(\"offset\", [-1, 1, 100])\n    def test_bad_length(self, ts_fixture, offset):\n        table = self.get_table(ts_fixture)\n        keep = [True] * (len(table) + offset)\n        match_str = f\"need:{len(table)}, got:{len(table) + offset}\"\n        with pytest.raises(ValueError, match=match_str):\n            table.keep_rows(keep)\n\n    @pytest.mark.parametrize(\"bad_type\", [False, 0, None])\n    def test_non_list_input(self, ts_fixture, bad_type):\n        table = self.get_table(ts_fixture)\n        with pytest.raises(TypeError, match=\"has no len\"):\n            table.keep_rows(bad_type)\n\n\nclass TestNodeTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        return ts.dump_tables().nodes\n\n\nclass TestEdgeTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        return ts.dump_tables().edges\n\n\nclass TestSiteTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        return ts.dump_tables().sites\n\n\nclass TestMigrationTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        return ts.dump_tables().migrations\n\n\nclass TestPopulationTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        return ts.dump_tables().populations\n\n\nclass TestProvenanceTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        return ts.dump_tables().provenances\n\n\n# Null out the self-referential columns (this is why the tests are structed via\n# classes rather than pytest parametrize.\n\n\nclass TestIndividualTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        table = ts.dump_tables().individuals\n        table.parents = np.zeros_like(table.parents) - 1\n        return table\n\n    def check_keep_rows(self, table, keep):\n        copy = table.copy()\n        id_map1 = keep_rows_definition(copy, keep)\n        for j, row in enumerate(copy):\n            parents = [p if p == tskit.NULL else id_map1[p] for p in row.parents]\n            copy[j] = row.replace(parents=parents)\n        id_map2 = table.keep_rows(keep)\n        table.assert_equals(copy)\n        np.testing.assert_array_equal(id_map1, id_map2)\n\n    def test_delete_unreferenced(self, ts_fixture):\n        table = ts_fixture.dump_tables().individuals\n        ref_count = np.zeros(len(table))\n        for row in table:\n            for parent in row.parents:\n                ref_count[parent] += 1\n        self.check_keep_rows(table, ref_count > 0)\n\n\nclass TestMutationTableKeepRows(KeepRowsBaseTest):\n    def get_table(self, ts):\n        table = ts.dump_tables().mutations\n        table.parent = np.zeros_like(table.parent) - 1\n        return table\n\n    def check_keep_rows(self, table, keep):\n        copy = table.copy()\n        id_map1 = keep_rows_definition(copy, keep)\n        for j, row in enumerate(copy):\n            if row.parent != tskit.NULL:\n                copy[j] = row.replace(parent=id_map1[row.parent])\n        id_map2 = table.keep_rows(keep)\n        table.assert_equals(copy)\n        np.testing.assert_array_equal(id_map1, id_map2)\n\n    def test_delete_unreferenced(self, ts_fixture):\n        table = ts_fixture.dump_tables().mutations\n        parent = table.parent.copy()\n        parent[parent == tskit.NULL] = len(table)\n        references = np.bincount(parent)\n        self.check_keep_rows(table, references[:-1] > 0)\n\n    def test_error_on_bad_ids(self, ts_fixture):\n        table = ts_fixture.dump_tables().mutations\n        table.add_row(site=0, node=0, derived_state=\"A\", parent=10000)\n        before = table.copy()\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_MUTATION_OUT_OF_BOUNDS\"):\n            table.keep_rows(np.ones(len(table), dtype=bool))\n        table.assert_equals(before)\n\n\nclass TestKeepRowsExamples:\n    \"\"\"\n    Some examples of how to use the keep_rows method in an idiomatic\n    and efficient way.\n\n    TODO these should be converted into documentation examples when we\n    write an \"examples\" section for table editing.\n    \"\"\"\n\n    def test_detach_subtree(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts = tskit.Tree.generate_balanced(3).tree_sequence\n        tables = ts.dump_tables()\n        tables.edges.keep_rows(tables.edges.child != 3)\n\n        # 2.00┊ 4     ┊\n        #     ┊ ┃     ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 1\n        assert ts.first().parent_dict == {0: 4, 1: 3, 2: 3}\n\n    def test_delete_older_edges(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts = tskit.Tree.generate_balanced(3).tree_sequence\n        tables = ts.dump_tables()\n        tables.edges.keep_rows(tables.nodes.time[tables.edges.parent] <= 1)\n\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊    3  ┊\n        #     ┊   ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 1\n        assert ts.first().parent_dict == {1: 3, 2: 3}\n\n    def test_delete_unreferenced_nodes(self):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts = tskit.Tree.generate_balanced(3).tree_sequence\n        tables = ts.dump_tables()\n        edges = tables.edges\n        nodes = tables.nodes\n        edges.keep_rows(nodes.time[edges.parent] <= 1)\n        # 2.00┊       ┊\n        #     ┊       ┊\n        # 1.00┊    3  ┊\n        #     ┊   ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ref_count = np.bincount(edges.child, minlength=len(nodes))\n        ref_count += np.bincount(edges.parent, minlength=len(nodes))\n        assert list(ref_count) == [0, 1, 1, 2, 0]\n        id_map = nodes.keep_rows(ref_count > 0)\n        assert list(id_map) == [-1, 0, 1, 2, -1]\n        assert len(nodes) == 3\n        # Remap the edges IDs\n        edges.child = id_map[edges.child]\n        edges.parent = id_map[edges.parent]\n        ts = tables.tree_sequence()\n        assert ts.num_trees == 1\n        assert ts.first().parent_dict == {0: 2, 1: 2}\n\n    def test_mutation_ids_auto_remapped(self):\n        mutations = tskit.MutationTable()\n        # Add 5 initial rows with no parents\n        for j in range(5):\n            mutations.add_row(site=j, node=j, derived_state=f\"{j}\")\n        # Now 5 more in a chain\n        last = -1\n        for j in range(5):\n            last = mutations.add_row(\n                site=10 + j, node=10 + j, parent=last, derived_state=f\"{j}\"\n            )\n\n        # ╔══╤════╤════╤════╤═════════════╤══════╤════════╗\n        # ║id│site│node│time│derived_state│parent│metadata║\n        # ╠══╪════╪════╪════╪═════════════╪══════╪════════╣\n        # ║0 │   0│   0│ nan│            0│    -1│        ║\n        # ║1 │   1│   1│ nan│            1│    -1│        ║\n        # ║2 │   2│   2│ nan│            2│    -1│        ║\n        # ║3 │   3│   3│ nan│            3│    -1│        ║\n        # ║4 │   4│   4│ nan│            4│    -1│        ║\n        # ║5 │  10│  10│ nan│            0│    -1│        ║\n        # ║6 │  11│  11│ nan│            1│     5│        ║\n        # ║7 │  12│  12│ nan│            2│     6│        ║\n        # ║8 │  13│  13│ nan│            3│     7│        ║\n        # ║9 │  14│  14│ nan│            4│     8│        ║\n        # ╚══╧════╧════╧════╧═════════════╧══════╧════════╝\n\n        keep = np.ones(len(mutations), dtype=bool)\n        keep[:5] = False\n        mutations.keep_rows(keep)\n\n        # ╔══╤════╤════╤════╤═════════════╤══════╤════════╗\n        # ║id│site│node│time│derived_state│parent│metadata║\n        # ╠══╪════╪════╪════╪═════════════╪══════╪════════╣\n        # ║0 │  10│  10│ nan│            0│    -1│        ║\n        # ║1 │  11│  11│ nan│            1│     0│        ║\n        # ║2 │  12│  12│ nan│            2│     1│        ║\n        # ║3 │  13│  13│ nan│            3│     2│        ║\n        # ║4 │  14│  14│ nan│            4│     3│        ║\n        # ╚══╧════╧════╧════╧═════════════╧══════╧════════╝\n        assert list(mutations.site) == [10, 11, 12, 13, 14]\n        assert list(mutations.node) == [10, 11, 12, 13, 14]\n        assert list(mutations.parent) == [-1, 0, 1, 2, 3]\n\n    def test_individual_ids_auto_remapped(self):\n        individuals = tskit.IndividualTable()\n        # Add some rows with missing parents in different forms\n        individuals.add_row()\n        individuals.add_row(parents=[-1])\n        individuals.add_row(parents=[-1, -1])\n        # Now 5 more in a chain\n        last = -1\n        for _ in range(5):\n            last = individuals.add_row(parents=[last])\n        last = individuals.add_row(parents=[last, last])\n\n        # ╔══╤═════╤════════╤═══════╤════════╗\n        # ║id│flags│location│parents│metadata║\n        # ╠══╪═════╪════════╪═══════╪════════╣\n        # ║0 │    0│        │       │        ║\n        # ║1 │    0│        │     -1│        ║\n        # ║2 │    0│        │ -1, -1│        ║\n        # ║3 │    0│        │     -1│        ║\n        # ║4 │    0│        │      3│        ║\n        # ║5 │    0│        │      4│        ║\n        # ║6 │    0│        │      5│        ║\n        # ║7 │    0│        │      6│        ║\n        # ║8 │    0│        │   7, 7│        ║\n        # ╚══╧═════╧════════╧═══════╧════════╝\n\n        keep = np.ones(len(individuals), dtype=bool)\n        # Only delete one row\n        keep[1] = False\n        individuals.keep_rows(keep)\n\n        # ╔══╤═════╤════════╤═══════╤════════╗\n        # ║id│flags│location│parents│metadata║\n        # ╠══╪═════╪════════╪═══════╪════════╣\n        # ║0 │    0│        │       │        ║\n        # ║1 │    0│        │ -1, -1│        ║\n        # ║2 │    0│        │     -1│        ║\n        # ║3 │    0│        │      2│        ║\n        # ║4 │    0│        │      3│        ║\n        # ║5 │    0│        │      4│        ║\n        # ║6 │    0│        │      5│        ║\n        # ║7 │    0│        │   6, 6│        ║\n        # ╚══╧═════╧════════╧═══════╧════════╝\n        parents = [list(ind.parents) for ind in individuals]\n        assert parents == [[], [-1, -1], [-1], [2], [3], [4], [5], [6, 6]]\n\n\ndef test_ragged_selection_indices_with_lengths():\n    indexed_offsets = np.array([0, 3], dtype=np.uint32)\n    lengths64 = np.array([3, 2], dtype=np.int64)\n    gather = _ragged_selection_indices(indexed_offsets, lengths64)\n    expected = np.array([0, 1, 2, 3, 4], dtype=np.int64)\n    assert np.array_equal(gather, expected)\n\n\ndef test_ragged_selection_indices_with_zeros():\n    indexed_offsets = np.array([0, 2, 2, 5], dtype=np.uint32)\n    lengths64 = np.array([2, 0, 3, 0], dtype=np.int64)\n    gather = _ragged_selection_indices(indexed_offsets, lengths64)\n    expected = np.array([0, 1, 2, 3, 4], dtype=np.int64)\n    assert np.array_equal(gather, expected)\n\n\ndef test_ragged_selection_indices_non_monotonic():\n    indexed_offsets = np.array([5, 0], dtype=np.uint32)\n    lengths64 = np.array([1, 2], dtype=np.int64)\n    gather = _ragged_selection_indices(indexed_offsets, lengths64)\n    expected = np.array([5, 0, 1], dtype=np.int64)\n    assert np.array_equal(gather, expected)\n\n\nclass TestMutationParentValidation:\n    def _two_leaf_tree(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        root = tables.nodes.add_row(time=2.0)\n        a = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        b = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.edges.add_row(0.0, 1.0, root, a)\n        tables.edges.add_row(0.0, 1.0, root, b)\n        return tables, a, b\n\n    def _chain_tree(self):\n        tables = tskit.TableCollection(sequence_length=1.0)\n        root = tables.nodes.add_row(time=2.0)\n        mid = tables.nodes.add_row(time=1.0)\n        leaf = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.edges.add_row(0.0, 1.0, root, mid)\n        tables.edges.add_row(0.0, 1.0, mid, leaf)\n        return tables, mid, leaf\n\n    def test_tree_sequence_bad_mutation_parent_topology(self):\n        tables, a, b = self._two_leaf_tree()\n        s = tables.sites.add_row(0.0, \"A\")\n        tables.mutations.add_row(site=s, node=a, derived_state=\"C\")  # id 0\n        tables.mutations.add_row(site=s, node=b, derived_state=\"G\")  # id 1\n        # Make a mutation on a parallel branch the parent\n        mut_cols = tables.mutations.asdict()\n        mut_cols[\"parent\"] = np.array([tskit.NULL, 0], dtype=np.int32)\n        tables.mutations.set_columns(**mut_cols)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_BAD_MUTATION_PARENT\"):\n            tables.tree_sequence()\n\n    def test_tree_sequence_mutation_parent_after_child(self):\n        tables, mid, leaf = self._chain_tree()\n        s = tables.sites.add_row(0.0, \"A\")\n        tables.mutations.add_row(site=s, node=leaf, derived_state=\"C\")  # id 0 (child)\n        tables.mutations.add_row(site=s, node=mid, derived_state=\"G\")  # id 1 (parent)\n        tables.sort()\n        mut_cols = tables.mutations.asdict()\n        mut_cols[\"parent\"] = np.array([1, tskit.NULL], dtype=np.int32)\n        tables.mutations.set_columns(**mut_cols)\n        with pytest.raises(\n            tskit.LibraryError, match=\"TSK_ERR_MUTATION_PARENT_AFTER_CHILD\"\n        ):\n            tables.tree_sequence()\n\n    def test_tree_sequence_mutation_parent_different_site(self):\n        tables, a, _ = self._two_leaf_tree()\n        s0 = tables.sites.add_row(0.0, \"A\")\n        s1 = tables.sites.add_row(0.5, \"A\")\n        tables.mutations.add_row(site=s0, node=a, derived_state=\"C\")  # id 0\n        tables.mutations.add_row(site=s1, node=a, derived_state=\"G\")  # id 1\n        mut_cols = tables.mutations.asdict()\n        mut_cols[\"parent\"] = np.array([tskit.NULL, 0], dtype=np.int32)\n        tables.mutations.set_columns(**mut_cols)\n        with pytest.raises(\n            tskit.LibraryError, match=\"TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE\"\n        ):\n            tables.tree_sequence()\n\n    def test_tree_sequence_mutation_parent_equal(self):\n        tables, a, _ = self._two_leaf_tree()\n        s = tables.sites.add_row(0.0, \"A\")\n        tables.mutations.add_row(site=s, node=a, derived_state=\"C\")  # id 0\n        mut_cols = tables.mutations.asdict()\n        mut_cols[\"parent\"] = np.array([0], dtype=np.int32)\n        tables.mutations.set_columns(**mut_cols)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_MUTATION_PARENT_EQUAL\"):\n            tables.tree_sequence()\n\n    def test_tree_sequence_mutation_parent_out_of_bounds(self):\n        tables, a, _ = self._two_leaf_tree()\n        s = tables.sites.add_row(0.0, \"A\")\n        tables.mutations.add_row(site=s, node=a, derived_state=\"C\")  # id 0\n        # >= num_rows\n        mut_cols = tables.mutations.asdict()\n        mut_cols[\"parent\"] = np.array([1], dtype=np.int32)\n        tables.mutations.set_columns(**mut_cols)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_MUTATION_OUT_OF_BOUNDS\"):\n            tables.tree_sequence()\n        # < NULL\n        mut_cols = tables.mutations.asdict()\n        mut_cols[\"parent\"] = np.array([-2], dtype=np.int32)\n        tables.mutations.set_columns(**mut_cols)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_MUTATION_OUT_OF_BOUNDS\"):\n            tables.tree_sequence()\n"
  },
  {
    "path": "python/tests/test_text_formats.py",
    "content": "# MIT License\n#\n# Copyright (c) 2021-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for converting fam file to tskit\n\"\"\"\n\nimport dataclasses\nimport tempfile\nfrom dataclasses import asdict\n\nimport numpy as np\nimport pytest\n\nimport tskit\n\n\n@dataclasses.dataclass\nclass FamEntry:\n    fid: str = \"0\"\n    iid: str = \"0\"\n    pat: str = \"0\"\n    mat: str = \"0\"\n    sex: str = \"0\"\n    phen: str = None\n\n    def get_row(self, delimiter=\"\\t\"):\n        return delimiter.join([x for x in asdict(self).values() if x is not None])\n\n\nclass TestParseFam:\n    \"\"\"\n    Tests for the parse_fam function.\n    \"\"\"\n\n    def get_parsed_fam(self, entries, delimiter=\"\\t\"):\n        content = \"\\n\".join([entry.get_row(delimiter=delimiter) for entry in entries])\n        with tempfile.TemporaryFile() as f:\n            f.write(bytes(content, \"utf-8\"))\n            f.seek(0)\n            return tskit.parse_fam(f)\n\n    def test_empty_file(self):\n        entries = []\n        with pytest.warns(UserWarning):\n            tb = self.get_parsed_fam(entries=entries)\n        assert len(tb) == 0\n\n    @pytest.mark.parametrize(\"iid\", [\"1\", \"a\", \"100\", \"abc\"])\n    def test_single_line(self, iid):\n        entries = [FamEntry(iid=iid)]\n        tb = self.get_parsed_fam(entries=entries)\n        assert len(tb) == 1\n        assert np.array_equal(tb[0].parents, [-1, -1])\n        assert tb[0].metadata[\"plink_fid\"] == \"0\"\n        assert tb[0].metadata[\"plink_iid\"] == str(iid)\n        assert tb[0].metadata[\"sex\"] == 0\n\n    @pytest.mark.parametrize(\"iids\", [(\"1\", \"2\"), (\"a\", \"b\")])\n    def test_multiple_line_file(self, iids):\n        # test both integer and string IIDs\n        iid1, iid2 = iids\n        entries = [FamEntry(iid=iid1), FamEntry(iid=iid2)]\n        tb = self.get_parsed_fam(entries=entries)\n        assert len(tb) == 2\n        for idx in range(2):\n            assert np.array_equal(tb[idx].parents, [-1, -1])\n            assert tb[idx].metadata[\"plink_fid\"] == \"0\"\n            assert tb[idx].metadata[\"plink_iid\"] == str(entries[idx].iid)\n            assert tb[idx].metadata[\"sex\"] == 0\n\n    @pytest.mark.parametrize(\"n_cols\", range(1, 5))\n    def test_insufficient_cols(self, n_cols):\n        fields = list(asdict(FamEntry()))\n        entry = FamEntry(iid=\"1\")\n        for field in fields[n_cols:]:\n            entry.__setattr__(field, None)\n        with pytest.raises(  # noqa B017\n            Exception\n        ):  # Have to be non-specific here as numpy 1.23 changed the exception type\n            self.get_parsed_fam(entries=[entry])\n\n    def test_unrelated_duplicate_iids(self):\n        # Individuals have the same IID, but are in different families\n        entries = [FamEntry(iid=\"1\"), FamEntry(fid=\"1\", iid=\"1\")]\n        tb = self.get_parsed_fam(entries=entries)\n        assert len(tb) == 2\n        assert tb[0].metadata[\"plink_fid\"] == \"0\"\n        assert tb[0].metadata[\"plink_iid\"] == \"1\"\n        assert tb[1].metadata[\"plink_fid\"] == \"1\"\n        assert tb[1].metadata[\"plink_iid\"] == \"1\"\n\n    def test_duplicate_rows(self):\n        entries = [FamEntry(iid=\"1\"), FamEntry(iid=\"1\")]\n        with pytest.raises(ValueError):\n            self.get_parsed_fam(entries=entries)\n\n    def test_space_delimited(self):\n        entries = [FamEntry(iid=\"1\")]\n        tb = self.get_parsed_fam(entries=entries, delimiter=\" \")\n        assert np.array_equal(tb[0].parents, [-1, -1])\n        assert tb[0].metadata[\"plink_fid\"] == \"0\"\n        assert tb[0].metadata[\"plink_iid\"] == \"1\"\n        assert tb[0].metadata[\"sex\"] == 0\n\n    def test_missing_phen_col(self):\n        entries = [FamEntry(iid=\"1\", phen=\"1\")]\n        tb = self.get_parsed_fam(entries=entries)\n\n        entries = [FamEntry(iid=\"1\")]  # remove last column (PHEN column)\n        tb_missing = self.get_parsed_fam(entries=entries)\n\n        assert tb == tb_missing\n\n    @pytest.mark.parametrize(\"sex\", [-2, 3, \"F\"])\n    def test_bad_sex_value(self, sex):\n        entries = [FamEntry(iid=\"1\", sex=str(sex))]\n        with pytest.raises(ValueError):\n            self.get_parsed_fam(entries=entries)\n\n    def test_empty_sex_value(self):\n        entries = [FamEntry(iid=\"1\", sex=\"\")]\n        with pytest.raises(  # noqa B017\n            Exception\n        ):  # Have to be non-specific here as numpy 1.23 changed the exception type\n            self.get_parsed_fam(entries=entries)\n\n    def test_single_family_map_parent_ids(self):\n        # PAT is mapped if the individual exists in the dataset\n        entries = [FamEntry(iid=\"1\"), FamEntry(iid=\"2\", pat=\"1\")]\n        tb = self.get_parsed_fam(entries=entries)\n        assert np.array_equal(tb[1].parents, [0, -1])\n\n        # MAT is mapped if the individual exists in the dataset\n        entries = [FamEntry(iid=\"1\"), FamEntry(iid=\"2\", mat=\"1\")]\n        tb = self.get_parsed_fam(entries=entries)\n        assert np.array_equal(tb[1].parents, [-1, 0])\n\n        # both parent IDs are remapped if the both parents exist in the dataset\n        entries = [\n            FamEntry(iid=\"1\"),\n            FamEntry(iid=\"2\"),\n            FamEntry(iid=\"3\", pat=\"1\", mat=\"2\"),\n        ]\n        tb = self.get_parsed_fam(entries=entries)\n        assert np.array_equal(tb[2].parents, [0, 1])\n\n    def test_missing_parent_id(self):\n        # KeyError raised if at least one parent (PAT) does not exist in dataset\n        entries = [\n            FamEntry(iid=\"2\"),\n            FamEntry(iid=\"3\", pat=\"1\", mat=\"2\"),\n        ]\n        with pytest.raises(KeyError):\n            self.get_parsed_fam(entries=entries)\n\n        # KeyError raised if at least one parent (MAT) does not exist in dataset\n        entries = [\n            FamEntry(iid=\"1\"),\n            FamEntry(iid=\"3\", pat=\"1\", mat=\"2\"),\n        ]\n        with pytest.raises(KeyError):\n            self.get_parsed_fam(entries=entries)\n\n        # KeyError raised if both parents do not exist in dataset\n        entries = [FamEntry(iid=\"1\", pat=\"2\", mat=\"3\")]\n        with pytest.raises(KeyError):\n            self.get_parsed_fam(entries=entries)\n\n    def test_multiple_family_map_parent_ids(self):\n        # parents mapped correctly when the same parent ID is used in different families\n        entries = [\n            FamEntry(iid=\"2\"),\n            FamEntry(iid=\"1\"),\n            FamEntry(fid=\"1\", iid=\"2\"),\n            FamEntry(fid=\"1\", iid=\"1\"),\n            FamEntry(iid=\"3\", pat=\"1\", mat=\"2\"),\n            FamEntry(fid=\"1\", iid=\"3\", pat=\"1\", mat=\"2\"),\n        ]\n        tb = self.get_parsed_fam(entries=entries)\n        for idx in range(4):\n            assert np.array_equal(tb[idx].parents, [-1, -1])\n        assert np.array_equal(tb[4].parents, [1, 0])\n        assert np.array_equal(tb[5].parents, [3, 2])\n\n        # KeyError raised when FID does not match, even if parent ID matches\n        entries = [\n            FamEntry(iid=\"2\"),\n            FamEntry(iid=\"1\"),\n            FamEntry(iid=\"3\", pat=\"1\", mat=\"2\"),\n            FamEntry(\n                fid=\"1\", iid=\"1\", pat=\"2\", mat=\"3\"\n            ),  # there is no parent with FID=1, IID=3\n            FamEntry(fid=\"1\", iid=\"2\"),\n        ]\n        with pytest.raises(KeyError):\n            self.get_parsed_fam(entries)\n\n    def test_grandparents(self):\n        entries = [\n            FamEntry(iid=\"4\"),\n            FamEntry(iid=\"3\"),\n            FamEntry(iid=\"2\"),\n            FamEntry(iid=\"1\"),\n            FamEntry(iid=\"6\", pat=\"3\", mat=\"4\"),\n            FamEntry(iid=\"5\", pat=\"1\", mat=\"2\"),\n            FamEntry(iid=\"7\", pat=\"5\", mat=\"6\"),\n        ]\n        tb = self.get_parsed_fam(entries=entries)\n        assert np.array_equal(tb[4].parents, [1, 0])\n        assert np.array_equal(tb[5].parents, [3, 2])\n        assert np.array_equal(tb[6].parents, [5, 4])\n\n    def test_children_before_parents(self, tmp_path):\n        entries = [\n            FamEntry(iid=\"1\", pat=\"2\", mat=\"3\"),\n            FamEntry(iid=\"2\"),\n            FamEntry(iid=\"3\"),\n        ]\n        content = \"\\n\".join([entry.get_row() for entry in entries])\n        fam_path = f\"{tmp_path}/test.fam\"\n        with open(fam_path, \"w+\") as f:\n            f.write(content)\n            f.seek(0)\n            tb = tskit.parse_fam(f)\n\n        tc = tskit.TableCollection(1)\n        # Issue 1489 will make this better\n        tc.individuals.metadata_schema = tb.metadata_schema\n        for row in tb:\n            tc.individuals.append(row)\n        tc.tree_sequence()  # creating tree sequence should succeed\n"
  },
  {
    "path": "python/tests/test_threads.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2021 Tskit Developers\n# Copyright (c) 2016-2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for threading enabled aspects of the API.\n\"\"\"\n\nimport platform\nimport threading\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests.tsutil as tsutil\nimport tskit\n\nIS_WINDOWS = platform.system() == \"Windows\"\nIS_OSX = platform.system() == \"Darwin\"\n\n\ndef run_threads(worker, num_threads):\n    results = [None for _ in range(num_threads)]\n    threads = [\n        threading.Thread(target=worker, args=(j, results)) for j in range(num_threads)\n    ]\n    for t in threads:\n        t.start()\n    for t in threads:\n        t.join()\n    return results\n\n\nclass TestLdCalculatorReplicates:\n    \"\"\"\n    Tests the LdCalculator object to ensure we get correct results\n    when using threads.\n    \"\"\"\n\n    num_test_sites = 25\n\n    def get_tree_sequence(self):\n        ts = msprime.simulate(20, mutation_rate=10, recombination_rate=10, random_seed=8)\n        return tsutil.subsample_sites(ts, self.num_test_sites)\n\n    def test_get_r2_multiple_instances(self):\n        # This is the nominal case where we have a separate LdCalculator\n        # instance in each thread.\n        ts = self.get_tree_sequence()\n        ld_calc = tskit.LdCalculator(ts)\n        A = ld_calc.get_r2_matrix()\n        del ld_calc\n        m = A.shape[0]\n\n        def worker(thread_index, results):\n            ld_calc = tskit.LdCalculator(ts)\n            row = np.zeros(m)\n            results[thread_index] = row\n            for j in range(m):\n                row[j] = ld_calc.get_r2(thread_index, j)\n\n        results = run_threads(worker, m)\n        for j in range(m):\n            assert np.allclose(results[j], A[j])\n\n    def test_get_r2_single_instance(self):\n        # This is the degenerate case where we have a single LdCalculator\n        # instance shared by the threads. We should have only one thread\n        # actually executing get_r2() at one time.\n        ts = self.get_tree_sequence()\n        ld_calc = tskit.LdCalculator(ts)\n        A = ld_calc.get_r2_matrix()\n        m = A.shape[0]\n\n        def worker(thread_index, results):\n            row = np.zeros(m)\n            results[thread_index] = row\n            for j in range(m):\n                row[j] = ld_calc.get_r2(thread_index, j)\n\n        results = run_threads(worker, m)\n        for j in range(m):\n            assert np.allclose(results[j], A[j])\n\n    def test_get_r2_array_multiple_instances(self):\n        # This is the nominal case where we have a separate LdCalculator\n        # instance in each thread.\n        ts = self.get_tree_sequence()\n        ld_calc = tskit.LdCalculator(ts)\n        A = ld_calc.get_r2_matrix()\n        m = A.shape[0]\n        del ld_calc\n\n        def worker(thread_index, results):\n            ld_calc = tskit.LdCalculator(ts)\n            results[thread_index] = np.array(ld_calc.get_r2_array(thread_index))\n\n        results = run_threads(worker, m)\n        for j in range(m):\n            assert np.allclose(results[j], A[j, j + 1 :])\n\n    def test_get_r2_array_single_instance(self):\n        # This is the degenerate case where we have a single LdCalculator\n        # instance shared by the threads. We should have only one thread\n        # actually executing get_r2_array() at one time. Because the buffer\n        # is shared by many different instances, we can't make any assertions\n        # about the returned values --- they are essentially gibberish.\n        # However, we shouldn't crash and burn, which is what this test\n        # is here to check for.\n        ts = self.get_tree_sequence()\n        ld_calc = tskit.LdCalculator(ts)\n        m = ts.get_num_mutations()\n\n        def worker(thread_index, results):\n            results[thread_index] = ld_calc.get_r2_array(thread_index).shape\n\n        results = run_threads(worker, m)\n        for j in range(m):\n            assert results[j][0] == m - j - 1\n\n\n# Temporarily skipping these on Windows and OSX See\n# https://github.com/tskit-dev/tskit/issues/344\n# https://github.com/tskit-dev/tskit/issues/1041\n@pytest.mark.skipif(IS_WINDOWS or IS_OSX, reason=\"Can't test thread support on Windows.\")\nclass TestTables:\n    \"\"\"\n    Tests to ensure that attempts to access tables in threads correctly\n    raise an exception.\n    \"\"\"\n\n    def get_tables(self):\n        # TODO include migrations here.\n        ts = msprime.simulate(\n            100, mutation_rate=10, recombination_rate=10, random_seed=8\n        )\n        return ts.dump_tables()\n\n    def run_multiple_writers(self, writer, num_writers=32):\n        barrier = threading.Barrier(num_writers)\n\n        def writer_proxy(thread_index, results):\n            barrier.wait()\n            # Attempts to operate on a table while locked should raise a RuntimeError\n            try:\n                writer(thread_index, results)\n                results[thread_index] = 0\n            except RuntimeError:\n                results[thread_index] = 1\n\n        results = run_threads(writer_proxy, num_writers)\n        failures = sum(results)\n        successes = num_writers - failures\n        # Note: we would like to insist that #failures is > 0, but this is too\n        # stochastic to guarantee for test purposes.\n        assert failures >= 0\n        assert successes > 0\n\n    def run_failing_reader(self, writer, reader, num_readers=32):\n        \"\"\"\n        Runs a test in which a single writer acceses some tables\n        and a bunch of other threads try to read the data.\n        \"\"\"\n        barrier = threading.Barrier(num_readers + 1)\n\n        def writer_proxy():\n            barrier.wait()\n            writer()\n\n        def reader_proxy(thread_index, results):\n            barrier.wait()\n            # Attempts to operate on a table while locked should raise a RuntimeError\n            results[thread_index] = 0\n            try:\n                reader(thread_index, results)\n            except RuntimeError:\n                results[thread_index] = 1\n\n        writer_thread = threading.Thread(target=writer_proxy)\n        writer_thread.start()\n        results = run_threads(reader_proxy, num_readers)\n        writer_thread.join()\n\n        failures = sum(results)\n        successes = num_readers - failures\n        # Note: we would like to insist that #failures is > 0, but this is too\n        # stochastic to guarantee for test purposes.\n        assert failures >= 0\n        assert successes > 0\n\n    def test_many_simplify_all_tables(self):\n        tables = self.get_tables()\n\n        def writer(thread_index, results):\n            tables.simplify([0, 1])\n\n        self.run_multiple_writers(writer)\n\n    def test_many_sort(self):\n        tables = self.get_tables()\n\n        def writer(thread_index, results):\n            tables.sort()\n\n        self.run_multiple_writers(writer)\n\n    def run_simplify_access_table(self, table_name, col_name):\n        tables = self.get_tables()\n\n        def writer():\n            tables.simplify([0, 1])\n\n        table = getattr(tables, table_name)\n\n        def reader(thread_index, results):\n            for _ in range(100):\n                x = getattr(table, col_name)\n                assert x.shape[0] == len(table)\n\n        self.run_failing_reader(writer, reader)\n\n    def run_sort_access_table(self, table_name, col_name):\n        tables = self.get_tables()\n\n        def writer():\n            tables.sort()\n\n        table = getattr(tables, table_name)\n\n        def reader(thread_index, results):\n            for _ in range(100):\n                x = getattr(table, col_name)\n                assert x.shape[0] == len(table)\n\n        self.run_failing_reader(writer, reader)\n\n    def test_simplify_access_nodes(self):\n        self.run_simplify_access_table(\"nodes\", \"time\")\n\n    def test_simplify_access_edges(self):\n        self.run_simplify_access_table(\"edges\", \"left\")\n\n    def test_simplify_access_sites(self):\n        self.run_simplify_access_table(\"sites\", \"position\")\n\n    def test_simplify_access_mutations(self):\n        self.run_simplify_access_table(\"mutations\", \"site\")\n\n    def test_sort_access_nodes(self):\n        self.run_sort_access_table(\"nodes\", \"time\")\n\n    def test_sort_access_edges(self):\n        self.run_sort_access_table(\"edges\", \"left\")\n\n    def test_sort_access_sites(self):\n        self.run_sort_access_table(\"sites\", \"position\")\n\n    def test_sort_access_mutations(self):\n        self.run_sort_access_table(\"mutations\", \"site\")\n"
  },
  {
    "path": "python/tests/test_topology.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (c) 2016-2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for the supported topological variations and operations.\n\"\"\"\n\nimport functools\nimport io\nimport itertools\nimport json\nimport random\nimport sys\nimport unittest\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport _tskit\nimport tests as tests\nimport tests.test_wright_fisher as wf\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.provenance as provenance\n\n\ndef simple_keep_intervals(tables, intervals, simplify=True, record_provenance=True):\n    \"\"\"\n    Simple Python implementation of keep_intervals.\n    \"\"\"\n    ts = tables.tree_sequence()\n    last_stop = 0\n    for start, stop in intervals:\n        if start < 0 or stop > ts.sequence_length:\n            raise ValueError(\"Slice bounds must be within the existing tree sequence\")\n        if start >= stop:\n            raise ValueError(\"Interval error: start must be < stop\")\n        if start < last_stop:\n            raise ValueError(\"Intervals must be disjoint\")\n        last_stop = stop\n    tables.edges.clear()\n    tables.sites.clear()\n    tables.mutations.clear()\n    for edge in ts.edges():\n        for interval_left, interval_right in intervals:\n            if not (edge.right <= interval_left or edge.left >= interval_right):\n                left = max(interval_left, edge.left)\n                right = min(interval_right, edge.right)\n                tables.edges.append(edge.replace(left=left, right=right))\n    for site in ts.sites():\n        for interval_left, interval_right in intervals:\n            if interval_left <= site.position < interval_right:\n                site_id = tables.sites.append(site)\n                for m in site.mutations:\n                    tables.mutations.append(m.replace(site=site_id, parent=tskit.NULL))\n    tables.build_index()\n    tables.compute_mutation_parents()\n    tables.sort()\n    if simplify:\n        tables.simplify(record_provenance=False)\n    if record_provenance:\n        parameters = {\"command\": \"keep_intervals\", \"TODO\": \"add parameters\"}\n        tables.provenances.add_row(\n            record=json.dumps(provenance.get_provenance_dict(parameters))\n        )\n\n\ndef generate_segments(n, sequence_length=100, seed=None):\n    rng = random.Random(seed)\n    segs = []\n    for j in range(n):\n        left = rng.randint(0, sequence_length - 1)\n        right = rng.randint(left + 1, sequence_length)\n        assert left < right\n        segs.append(tests.Segment(left, right, j))\n    return segs\n\n\nclass ExampleTopologyMixin:\n    \"\"\"\n    Some example topologies for tests cases.\n    \"\"\"\n\n    def test_single_coalescent_tree(self):\n        ts = msprime.simulate(10, random_seed=1, length=10)\n        self.verify(ts)\n\n    def test_coalescent_trees(self):\n        ts = msprime.simulate(8, recombination_rate=5, random_seed=1, length=2)\n        assert ts.num_trees > 2\n        self.verify(ts)\n\n    def test_coalescent_trees_internal_samples(self):\n        ts = msprime.simulate(8, recombination_rate=5, random_seed=10, length=2)\n        assert ts.num_trees > 2\n        self.verify(tsutil.jiggle_samples(ts))\n\n    def test_coalescent_trees_all_samples(self):\n        ts = msprime.simulate(8, recombination_rate=5, random_seed=10, length=2)\n        assert ts.num_trees > 2\n        tables = ts.dump_tables()\n        flags = np.zeros_like(tables.nodes.flags) + tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        self.verify(tables.tree_sequence())\n\n    def test_wright_fisher_trees_unsimplified(self):\n        tables = wf.wf_sim(10, 5, deep_history=False, seed=2)\n        tables.sort()\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n    def test_wright_fisher_trees_simplified(self):\n        tables = wf.wf_sim(10, 5, deep_history=False, seed=1)\n        tables.sort()\n        ts = tables.tree_sequence()\n        ts = ts.simplify()\n        self.verify(ts)\n\n    def test_wright_fisher_trees_simplified_one_gen(self):\n        tables = wf.wf_sim(10, 1, deep_history=False, seed=1)\n        tables.sort()\n        ts = tables.tree_sequence()\n        ts = ts.simplify()\n        self.verify(ts)\n\n    def test_nonbinary_trees(self):\n        demographic_events = [\n            msprime.SimpleBottleneck(time=1.0, population=0, proportion=0.95)\n        ]\n        ts = msprime.simulate(\n            20,\n            recombination_rate=10,\n            mutation_rate=5,\n            demographic_events=demographic_events,\n            random_seed=7,\n        )\n        found = False\n        for e in ts.edgesets():\n            if len(e.children) > 2:\n                found = True\n        assert found\n        self.verify(ts)\n\n    def test_many_multiroot_trees(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        self.verify(ts)\n\n    def test_multiroot_tree(self):\n        ts = msprime.simulate(15, random_seed=10)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        self.verify(ts)\n\n    def test_all_missing_data(self):\n        tables = tskit.TableCollection(1)\n        for _ in range(10):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        self.verify(tables.tree_sequence())\n\n\nclass TestOverlappingSegments:\n    \"\"\"\n    Tests for the overlapping segments algorithm required for simplify.\n    This test probably belongs somewhere else.\n    \"\"\"\n\n    def test_random(self):\n        segs = generate_segments(10, 20, 1)\n        for left, right, X in tests.overlapping_segments(segs):\n            assert right > left\n            assert len(X) > 0\n\n    def test_empty(self):\n        ret = list(tests.overlapping_segments([]))\n        assert len(ret) == 0\n\n    def test_single_interval(self):\n        for j in range(1, 10):\n            segs = [tests.Segment(0, 1, j) for _ in range(j)]\n            ret = list(tests.overlapping_segments(segs))\n            assert len(ret) == 1\n            left, right, X = ret[0]\n            assert left == 0\n            assert right == 1\n            assert sorted(segs) == sorted(X)\n\n    def test_stairs_down(self):\n        segs = [tests.Segment(0, 1, 0), tests.Segment(0, 2, 1), tests.Segment(0, 3, 2)]\n        ret = list(tests.overlapping_segments(segs))\n        assert len(ret) == 3\n\n        left, right, X = ret[0]\n        assert left == 0\n        assert right == 1\n        assert sorted(X) == sorted(segs)\n\n        left, right, X = ret[1]\n        assert left == 1\n        assert right == 2\n        assert sorted(X) == sorted(segs[1:])\n\n        left, right, X = ret[2]\n        assert left == 2\n        assert right == 3\n        assert sorted(X) == sorted(segs[2:])\n\n    def test_stairs_up(self):\n        segs = [tests.Segment(0, 3, 0), tests.Segment(1, 3, 1), tests.Segment(2, 3, 2)]\n        ret = list(tests.overlapping_segments(segs))\n        assert len(ret) == 3\n\n        left, right, X = ret[0]\n        assert left == 0\n        assert right == 1\n        assert X == segs[:1]\n\n        left, right, X = ret[1]\n        assert left == 1\n        assert right == 2\n        assert sorted(X) == sorted(segs[:2])\n\n        left, right, X = ret[2]\n        assert left == 2\n        assert right == 3\n        assert sorted(X) == sorted(segs)\n\n    def test_pyramid(self):\n        segs = [tests.Segment(0, 5, 0), tests.Segment(1, 4, 1), tests.Segment(2, 3, 2)]\n        ret = list(tests.overlapping_segments(segs))\n        assert len(ret) == 5\n\n        left, right, X = ret[0]\n        assert left == 0\n        assert right == 1\n        assert X == segs[:1]\n\n        left, right, X = ret[1]\n        assert left == 1\n        assert right == 2\n        assert sorted(X) == sorted(segs[:2])\n\n        left, right, X = ret[2]\n        assert left == 2\n        assert right == 3\n        assert sorted(X) == sorted(segs)\n\n        left, right, X = ret[3]\n        assert left == 3\n        assert right == 4\n        assert sorted(X) == sorted(segs[:2])\n\n        left, right, X = ret[4]\n        assert left == 4\n        assert right == 5\n        assert sorted(X) == sorted(segs[:1])\n\n    def test_gap(self):\n        segs = [tests.Segment(0, 2, 0), tests.Segment(3, 4, 1)]\n        ret = list(tests.overlapping_segments(segs))\n        assert len(ret) == 2\n\n        left, right, X = ret[0]\n        assert left == 0\n        assert right == 2\n        assert X == segs[:1]\n\n        left, right, X = ret[1]\n        assert left == 3\n        assert right == 4\n        assert X == segs[1:]\n\n\nclass TopologyTestCase:\n    \"\"\"\n    Superclass of test cases containing common utilities.\n    \"\"\"\n\n    random_seed = 123456\n\n    def assert_haplotypes_equal(self, ts1, ts2):\n        h1 = list(ts1.haplotypes())\n        h2 = list(ts2.haplotypes())\n        assert h1 == h2\n\n    def assert_variants_equal(self, ts1, ts2):\n        for v1, v2 in zip(\n            ts1.variants(copy=False),\n            ts2.variants(copy=False),\n        ):\n            assert v1.alleles == v2.alleles\n            assert np.array_equal(v1.genotypes, v2.genotypes)\n\n    def check_num_samples(self, ts, x):\n        \"\"\"\n        Compare against x, a list of tuples of the form\n        `(tree number, parent, number of samples)`.\n        \"\"\"\n        k = 0\n        tss = ts.trees()\n        t = next(tss)\n        for j, node, nl in x:\n            while k < j:\n                t = next(tss)\n                k += 1\n            assert nl == t.num_samples(node)\n\n    def check_num_tracked_samples(self, ts, tracked_samples, x):\n        k = 0\n        tss = ts.trees(tracked_samples=tracked_samples)\n        t = next(tss)\n        for j, node, nl in x:\n            while k < j:\n                t = next(tss)\n                k += 1\n            assert nl == t.num_tracked_samples(node)\n\n    def check_sample_iterator(self, ts, x):\n        \"\"\"\n        Compare against x, a list of tuples of the form\n        `(tree number, node, sample ID list)`.\n        \"\"\"\n        k = 0\n        tss = ts.trees(sample_lists=True)\n        t = next(tss)\n        for j, node, samples in x:\n            while k < j:\n                t = next(tss)\n                k += 1\n            for u, v in zip(samples, t.samples(node)):\n                assert u == v\n\n\nclass TestZeroRoots:\n    \"\"\"\n    Tests that for the case in which we have zero samples and therefore\n    zero roots in our trees.\n    \"\"\"\n\n    def remove_samples(self, ts):\n        tables = ts.dump_tables()\n        tables.nodes.flags = np.zeros_like(tables.nodes.flags)\n        return tables.tree_sequence()\n\n    def verify(self, ts, no_root_ts):\n        assert ts.num_trees == no_root_ts.num_trees\n        for tree, no_root in zip(ts.trees(), no_root_ts.trees()):\n            assert no_root.num_roots == 0\n            assert no_root.left_root == tskit.NULL\n            assert no_root.roots == []\n            assert tree.parent_dict == no_root.parent_dict\n\n    def test_single_tree(self):\n        ts = msprime.simulate(10, random_seed=1)\n        no_root_ts = self.remove_samples(ts)\n        assert ts.num_trees == 1\n        self.verify(ts, no_root_ts)\n\n    def test_multiple_trees(self):\n        ts = msprime.simulate(10, recombination_rate=2, random_seed=1)\n        no_root_ts = self.remove_samples(ts)\n        assert ts.num_trees > 1\n        self.verify(ts, no_root_ts)\n\n\nclass TestEmptyTreeSequences(TopologyTestCase):\n    \"\"\"\n    Tests covering tree sequences that have zero edges.\n    \"\"\"\n\n    def test_zero_nodes(self):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 0\n        assert ts.num_edges == 0\n        t = next(ts.trees())\n        assert t.index == 0\n        assert t.left_root == tskit.NULL\n        assert t.interval == (0, 1)\n        assert t.roots == []\n        assert t.root == tskit.NULL\n        assert t.parent_dict == {}\n        assert t.virtual_root == 0\n        assert t.left_child(t.virtual_root) == -1\n        assert t.right_child(t.virtual_root) == -1\n        assert list(t.nodes()) == []\n        assert list(ts.haplotypes()) == []\n        assert list(ts.variants()) == []\n        methods = [\n            t.parent,\n            t.left_child,\n            t.right_child,\n            t.left_sib,\n            t.right_sib,\n            t.num_children,\n        ]\n        for method in methods:\n            for u in [-1, 1, 100]:\n                with pytest.raises(ValueError):\n                    method(u)\n        tsp = ts.simplify()\n        assert tsp.num_nodes == 0\n        assert tsp.num_edges == 0\n\n    def test_one_node_zero_samples(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.nodes.add_row(time=0, flags=0)\n        # Without a sequence length this should fail.\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 1\n        assert ts.sample_size == 0\n        assert ts.num_edges == 0\n        assert ts.num_sites == 0\n        assert ts.num_mutations == 0\n        t = next(ts.trees())\n        assert t.index == 0\n        assert t.left_root == tskit.NULL\n        assert t.interval == (0, 1)\n        assert t.roots == []\n        assert t.root == tskit.NULL\n        assert t.virtual_root == 1\n        assert t.parent_dict == {}\n        assert list(t.nodes()) == []\n        assert list(ts.haplotypes()) == []\n        assert list(ts.variants()) == []\n        methods = [\n            t.parent,\n            t.left_child,\n            t.right_child,\n            t.left_sib,\n            t.right_sib,\n            t.num_children,\n        ]\n        for method in methods:\n            expected = tskit.NULL if method != t.num_children else 0\n            assert method(0) == expected\n            for u in [-1, 2, 100]:\n                with pytest.raises(ValueError):\n                    method(u)\n\n    def test_one_node_zero_samples_sites(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.nodes.add_row(time=0, flags=0)\n        tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, derived_state=\"1\", node=0)\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 1\n        assert ts.sample_size == 0\n        assert ts.num_edges == 0\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 1\n        t = next(ts.trees())\n        assert t.index == 0\n        assert t.left_root == tskit.NULL\n        assert t.interval == (0, 1)\n        assert t.roots == []\n        assert t.root == tskit.NULL\n        assert t.parent_dict == {}\n        assert len(list(t.sites())) == 1\n        assert list(t.nodes()) == []\n        assert list(ts.haplotypes()) == []\n        assert len(list(ts.variants())) == 1\n        tsp = ts.simplify()\n        assert tsp.num_nodes == 0\n        assert tsp.num_edges == 0\n\n    def test_one_node_one_sample(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 1\n        assert ts.sample_size == 1\n        assert ts.num_edges == 0\n        t = next(ts.trees())\n        assert t.index == 0\n        assert t.left_root == 0\n        assert t.interval == (0, 1)\n        assert t.roots == [0]\n        assert t.root == 0\n        assert t.virtual_root == 1\n        assert t.parent_dict == {}\n        assert list(t.nodes()) == [0]\n        assert list(ts.haplotypes(isolated_as_missing=False)) == [\"\"]\n        assert list(ts.variants()) == []\n        methods = [\n            t.parent,\n            t.left_child,\n            t.right_child,\n            t.left_sib,\n            t.right_sib,\n            t.num_children,\n        ]\n        for method in methods:\n            expected = tskit.NULL if method != t.num_children else 0\n            assert method(0) == expected\n            for u in [-1, 2, 100]:\n                with pytest.raises(ValueError):\n                    method(u)\n        tsp = ts.simplify()\n        assert tsp.num_nodes == 1\n        assert tsp.num_edges == 0\n\n    def test_one_node_one_sample_sites(self):\n        tables = tskit.TableCollection(sequence_length=1)\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, derived_state=\"1\", node=0)\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 1\n        assert ts.sample_size == 1\n        assert ts.num_edges == 0\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 1\n        t = next(ts.trees())\n        assert t.index == 0\n        assert t.left_root == 0\n        assert t.interval == (0, 1)\n        assert t.roots == [0]\n        assert t.root == 0\n        assert t.virtual_root == 1\n        assert t.parent_dict == {}\n        assert list(t.nodes()) == [0]\n        assert list(ts.haplotypes(isolated_as_missing=False)) == [\"1\"]\n        assert len(list(ts.variants())) == 1\n        methods = [\n            t.parent,\n            t.left_child,\n            t.right_child,\n            t.left_sib,\n            t.right_sib,\n            t.num_children,\n        ]\n        for method in methods:\n            expected = tskit.NULL if method != t.num_children else 0\n            assert method(0) == expected\n            for u in [-1, 2, 100]:\n                with pytest.raises(ValueError):\n                    method(u)\n        tsp = ts.simplify(filter_sites=False)\n        assert tsp.num_nodes == 1\n        assert tsp.num_edges == 0\n        assert tsp.num_sites == 1\n\n\nclass TestHoleyTreeSequences(TopologyTestCase):\n    \"\"\"\n    Tests for tree sequences in which we have partial (or no) trees defined\n    over some of the sequence.\n    \"\"\"\n\n    def verify_trees(self, ts, expected):\n        observed = []\n        for t in ts.trees():\n            observed.append((t.interval, t.parent_dict))\n        assert expected == observed\n        # Test simple algorithm also.\n        observed = []\n        for interval, parent in tsutil.algorithm_T(ts):\n            parent_dict = {j: parent[j] for j in range(ts.num_nodes) if parent[j] >= 0}\n            observed.append((interval, parent_dict))\n        assert expected == observed\n\n    def verify_zero_roots(self, ts):\n        for tree in ts.trees():\n            assert tree.num_roots == 0\n            assert tree.left_root == tskit.NULL\n            assert tree.roots == []\n\n    def test_simple_hole(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        2       3       2       0\n        0       1       2       1\n        2       3       2       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        expected = [((0, 1), {0: 2, 1: 2}), ((1, 2), {}), ((2, 3), {0: 2, 1: 2})]\n        self.verify_trees(ts, expected)\n\n    def test_simple_hole_zero_roots(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   0           0\n        1   0           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        2       3       2       0\n        0       1       2       1\n        2       3       2       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        expected = [((0, 1), {0: 2, 1: 2}), ((1, 2), {}), ((2, 3), {0: 2, 1: 2})]\n        self.verify_trees(ts, expected)\n        self.verify_zero_roots(ts)\n\n    def test_initial_gap(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        1       2       2       0,1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        expected = [((0, 1), {}), ((1, 2), {0: 2, 1: 2})]\n        self.verify_trees(ts, expected)\n\n    def test_initial_gap_zero_roots(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   0           0\n        1   0           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        1       2       2       0,1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        expected = [((0, 1), {}), ((1, 2), {0: 2, 1: 2})]\n        self.verify_trees(ts, expected)\n        self.verify_zero_roots(ts)\n\n    def test_final_gap(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       2       2       0,1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sequence_length=3, strict=False)\n        expected = [((0, 2), {0: 2, 1: 2}), ((2, 3), {})]\n        self.verify_trees(ts, expected)\n\n    def test_final_gap_zero_roots(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   0           0\n        1   0           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       2       2       0,1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sequence_length=3, strict=False)\n        expected = [((0, 2), {0: 2, 1: 2}), ((2, 3), {})]\n        self.verify_trees(ts, expected)\n        self.verify_zero_roots(ts)\n\n    def test_initial_and_final_gap(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        1       2       2       0,1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sequence_length=3, strict=False)\n        expected = [((0, 1), {}), ((1, 2), {0: 2, 1: 2}), ((2, 3), {})]\n        self.verify_trees(ts, expected)\n\n    def test_initial_and_final_gap_zero_roots(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   0           0\n        1   0           0\n        2   0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        1       2       2       0,1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sequence_length=3, strict=False)\n        expected = [((0, 1), {}), ((1, 2), {0: 2, 1: 2}), ((2, 3), {})]\n        self.verify_trees(ts, expected)\n        self.verify_zero_roots(ts)\n\n\nclass TestTsinferExamples(TopologyTestCase):\n    \"\"\"\n    Test cases on troublesome topology examples that arose from tsinfer.\n    \"\"\"\n\n    def test_no_last_tree(self):\n        # The last tree was not being generated here because of a bug in\n        # the low-level tree generation code.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1       -1              3.00000000000000\n        1       1       -1              2.00000000000000\n        2       1       -1              2.00000000000000\n        3       1       -1              2.00000000000000\n        4       1       -1              2.00000000000000\n        5       1       -1              1.00000000000000\n        6       1       -1              1.00000000000000\n        7       1       -1              1.00000000000000\n        8       1       -1              1.00000000000000\n        9       1       -1              1.00000000000000\n        10      1       -1              1.00000000000000\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       62291.41659631  79679.17408763  1       5\n        1       62291.41659631  62374.60889677  1       6\n        2       122179.36037089 138345.43104411 1       7\n        3       67608.32330402  79679.17408763  1       8\n        4       122179.36037089 138345.43104411 1       8\n        5       62291.41659631  79679.17408763  1       9\n        6       126684.47550333 138345.43104411 1       10\n        7       23972.05905068  62291.41659631  2       5\n        8       79679.17408763  82278.53390076  2       5\n        9       23972.05905068  62291.41659631  2       6\n        10      79679.17408763  110914.43816806 2       7\n        11      145458.28890561 189765.31932273 2       7\n        12      79679.17408763  110914.43816806 2       8\n        13      145458.28890561 200000.00000000 2       8\n        14      23972.05905068  62291.41659631  2       9\n        15      79679.17408763  110914.43816806 2       9\n        16      145458.28890561 145581.18329797 2       10\n        17      4331.62138785   23972.05905068  3       6\n        18      4331.62138785   23972.05905068  3       9\n        19      110914.43816806 122179.36037089 4       7\n        20      138345.43104411 145458.28890561 4       7\n        21      110914.43816806 122179.36037089 4       8\n        22      138345.43104411 145458.28890561 4       8\n        23      110914.43816806 112039.30503475 4       9\n        24      138345.43104411 145458.28890561 4       10\n        25      0.00000000      200000.00000000 0       1\n        26      0.00000000      200000.00000000 0       2\n        27      0.00000000      200000.00000000 0       3\n        28      0.00000000      200000.00000000 0       4\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sequence_length=200000, strict=False)\n        pts = tests.PythonTreeSequence(ts)\n        num_trees = 0\n        for _ in pts.trees():\n            num_trees += 1\n        assert num_trees == ts.num_trees\n        n = 0\n        for pt, t in zip(pts.trees(), ts.trees()):\n            assert (pt.left, pt.right) == t.interval\n            for j in range(ts.num_nodes):\n                assert pt.parent[j] == t.parent(j)\n                assert pt.left_child[j] == t.left_child(j)\n                assert pt.right_child[j] == t.right_child(j)\n                assert pt.left_sib[j] == t.left_sib(j)\n                assert pt.right_sib[j] == t.right_sib(j)\n                assert pt.num_children[j] == t.num_children(j)\n            n += 1\n        assert n == num_trees\n        intervals = [t.interval for t in ts.trees()]\n        assert intervals[0][0] == 0\n        assert intervals[-1][-1] == ts.sequence_length\n\n\nclass TestRecordSquashing(TopologyTestCase):\n    \"\"\"\n    Tests that we correctly squash adjacent equal records together.\n    \"\"\"\n\n    def test_single_record(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        1       2       1       0\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        tss, node_map = ts.simplify(map_nodes=True)\n        assert list(node_map) == [0, 1]\n        assert tss.tables.nodes == ts.tables.nodes\n        simplified_edges = list(tss.edges())\n        assert len(simplified_edges) == 1\n        e = simplified_edges[0]\n        assert e.left == 0\n        assert e.right == 2\n\n    def test_single_tree(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        ts_redundant = tsutil.insert_redundant_breakpoints(ts)\n        tss = ts_redundant.simplify()\n        assert tss.tables.nodes == ts.tables.nodes\n        assert tss.tables.edges == ts.tables.edges\n\n    def test_many_trees(self):\n        ts = msprime.simulate(20, recombination_rate=5, random_seed=self.random_seed)\n        assert ts.num_trees > 2\n        ts_redundant = tsutil.insert_redundant_breakpoints(ts)\n        tss = ts_redundant.simplify()\n        assert tss.tables.nodes == ts.tables.nodes\n        assert tss.tables.edges == ts.tables.edges\n\n\nclass TestRedundantBreakpoints(TopologyTestCase):\n    \"\"\"\n    Tests for dealing with redundant breakpoints within the tree sequence.\n    These are records that may be squashed together into a single record.\n    \"\"\"\n\n    def test_single_tree(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        ts_redundant = tsutil.insert_redundant_breakpoints(ts)\n        assert ts.sample_size == ts_redundant.sample_size\n        assert ts.sequence_length == ts_redundant.sequence_length\n        assert ts_redundant.num_trees == 2\n        trees = [t.parent_dict for t in ts_redundant.trees()]\n        assert len(trees) == 2\n        assert trees[0] == trees[1]\n        assert [t.parent_dict for t in ts.trees()][0] == trees[0]\n\n    def test_many_trees(self):\n        ts = msprime.simulate(20, recombination_rate=5, random_seed=self.random_seed)\n        assert ts.num_trees > 2\n        ts_redundant = tsutil.insert_redundant_breakpoints(ts)\n        assert ts.sample_size == ts_redundant.sample_size\n        assert ts.sequence_length == ts_redundant.sequence_length\n        assert ts_redundant.num_trees > ts.num_trees\n        assert ts_redundant.num_edges > ts.num_edges\n        redundant_trees = ts_redundant.trees()\n        redundant_t = next(redundant_trees)\n        comparisons = 0\n        for t in ts.trees():\n            while (\n                redundant_t is not None\n                and redundant_t.interval.right <= t.interval.right\n            ):\n                assert t.parent_dict == redundant_t.parent_dict\n                comparisons += 1\n                redundant_t = next(redundant_trees, None)\n        assert comparisons == ts_redundant.num_trees\n\n\nclass TestUnaryNodes(TopologyTestCase):\n    \"\"\"\n    Tests for situations in which we have unary nodes in the tree sequence.\n    \"\"\"\n\n    def test_simple_case(self):\n        # Simple case where we have n = 2 and some unary nodes.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           1\n        4       0           2\n        5       0           3\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       3       1\n        0       1       4       2,3\n        0       1       5       4\n        \"\"\"\n        )\n        sites = \"position    ancestral_state\\n\"\n        mutations = \"site    node    derived_state\\n\"\n        for j in range(5):\n            position = j * 1 / 5\n            sites += f\"{position} 0\\n\"\n            mutations += f\"{j} {j} 1\\n\"\n        ts = tskit.load_text(\n            nodes=nodes,\n            edges=edges,\n            sites=io.StringIO(sites),\n            mutations=io.StringIO(mutations),\n            strict=False,\n        )\n\n        assert ts.sample_size == 2\n        assert ts.num_nodes == 6\n        assert ts.num_trees == 1\n        assert ts.num_sites == 5\n        assert ts.num_mutations == 5\n        assert len(list(ts.edge_diffs())) == ts.num_trees\n        t = next(ts.trees())\n        assert t.parent_dict == {0: 2, 1: 3, 2: 4, 3: 4, 4: 5}\n        assert t.mrca(0, 1) == 4\n        assert t.mrca(0, 2) == 2\n        assert t.mrca(0, 4) == 4\n        assert t.mrca(0, 5) == 5\n        assert t.mrca(0, 3) == 4\n        H = list(ts.haplotypes())\n        assert H[0] == \"10101\"\n        assert H[1] == \"01011\"\n\n    def test_ladder_tree(self):\n        # We have a single tree with a long ladder of unary nodes along a path\n        num_unary_nodes = 30\n        n = 2\n        nodes = \"\"\"\\\n            is_sample   time\n            1           0\n            1           0\n        \"\"\"\n        edges = \"\"\"\\\n            left right parent child\n            0    1     2      0\n        \"\"\"\n        for j in range(num_unary_nodes + 2):\n            nodes += f\"0 {j + 2}\\n\"\n        for j in range(num_unary_nodes):\n            edges += f\"0 1 {n + j + 1} {n + j}\\n\"\n        root = num_unary_nodes + 3\n        root_time = num_unary_nodes + 3\n        edges += f\"0    1     {root}      1,{num_unary_nodes + 2}\\n\"\n        ts = tskit.load_text(io.StringIO(nodes), io.StringIO(edges), strict=False)\n        t = ts.first()\n        assert t.mrca(0, 1) == root\n        assert t.tmrca(0, 1) == root_time\n        ts_simplified, node_map = ts.simplify(map_nodes=True)\n        test_map = [tskit.NULL for _ in range(ts.num_nodes)]\n        test_map[0] = 0\n        test_map[1] = 1\n        test_map[root] = 2\n        assert list(node_map) == test_map\n        assert ts_simplified.num_edges == 2\n        t = ts_simplified.first()\n        assert t.mrca(0, 1) == 2\n        assert t.tmrca(0, 1) == root_time\n        ts_simplified = ts.simplify(keep_unary=True, record_provenance=False)\n        assert ts_simplified.tables == ts.tables\n\n    def verify_unary_tree_sequence(self, ts):\n        \"\"\"\n        Take the specified tree sequence and produce an equivalent in which\n        unary records have been interspersed, every other with an associated individual\n        \"\"\"\n        assert ts.num_trees > 2\n        assert ts.num_mutations > 2\n        tables = ts.dump_tables()\n        next_node = ts.num_nodes\n        node_times = {j: node.time for j, node in enumerate(ts.nodes())}\n        edges = []\n        for i, e in enumerate(ts.edges()):\n            node = ts.node(e.parent)\n            t = node.time - 1e-14  # Arbitrary small value.\n            next_node = len(tables.nodes)\n            indiv = tables.individuals.add_row() if i % 2 == 0 else tskit.NULL\n            tables.nodes.add_row(time=t, population=node.population, individual=indiv)\n            edges.append(\n                tskit.Edge(left=e.left, right=e.right, parent=next_node, child=e.child)\n            )\n            node_times[next_node] = t\n            edges.append(\n                tskit.Edge(left=e.left, right=e.right, parent=e.parent, child=next_node)\n            )\n        edges.sort(key=lambda e: node_times[e.parent])\n        tables.edges.reset()\n        for e in edges:\n            tables.edges.append(e)\n        ts_new = tables.tree_sequence()\n        assert ts_new.num_edges > ts.num_edges\n        self.assert_haplotypes_equal(ts, ts_new)\n        self.assert_variants_equal(ts, ts_new)\n        ts_simplified = ts_new.simplify()\n        assert list(ts_simplified.records()) == list(ts.records())\n        self.assert_haplotypes_equal(ts, ts_simplified)\n        self.assert_variants_equal(ts, ts_simplified)\n        assert len(list(ts.edge_diffs())) == ts.num_trees\n        assert 0 < ts_new.num_individuals < ts_new.num_nodes\n\n        for params in [\n            {\"keep_unary\": False, \"keep_unary_in_individuals\": False},\n            {\"keep_unary\": True, \"keep_unary_in_individuals\": False},\n            {\"keep_unary\": False, \"keep_unary_in_individuals\": True},\n        ]:\n            s = tests.Simplifier(ts_new, ts_new.samples(), **params)\n            py_ts, py_node_map = s.simplify()\n            lib_ts, lib_node_map = ts_new.simplify(map_nodes=True, **params)\n            py_tables = py_ts.dump_tables()\n            lib_tables = lib_ts.dump_tables()\n            lib_tables.assert_equals(py_tables, ignore_provenance=True)\n            assert np.all(lib_node_map == py_node_map)\n\n    def test_binary_tree_sequence_unary_nodes(self):\n        ts = msprime.simulate(\n            20, recombination_rate=5, mutation_rate=5, random_seed=self.random_seed\n        )\n        self.verify_unary_tree_sequence(ts)\n\n    def test_nonbinary_tree_sequence_unary_nodes(self):\n        demographic_events = [\n            msprime.SimpleBottleneck(time=1.0, population=0, proportion=0.95)\n        ]\n        ts = msprime.simulate(\n            20,\n            recombination_rate=10,\n            mutation_rate=5,\n            demographic_events=demographic_events,\n            random_seed=self.random_seed,\n        )\n        found = False\n        for r in ts.edgesets():\n            if len(r.children) > 2:\n                found = True\n        assert found\n        self.verify_unary_tree_sequence(ts)\n\n\nclass TestGeneralSamples(TopologyTestCase):\n    \"\"\"\n    Test cases in which we have samples at arbitrary nodes (i.e., not at\n    {0,...,n - 1}).\n    \"\"\"\n\n    def test_simple_case(self):\n        # Simple case where we have n = 3 and samples starting at n.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           2\n        1       0           1\n        2       1           0\n        3       1           0\n        4       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       1       2,3\n        0       1       0       1,4\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        0.2     0\n        0.3     0\n        0.4     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       2       1\n        1       3       1\n        2       4       1\n        3       1       1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n\n        assert ts.sample_size == 3\n        assert list(ts.samples()) == [2, 3, 4]\n        assert ts.num_nodes == 5\n        assert ts.num_nodes == 5\n        assert ts.num_sites == 4\n        assert ts.num_mutations == 4\n        assert len(list(ts.edge_diffs())) == ts.num_trees\n        t = next(ts.trees())\n        assert t.root == 0\n        assert t.parent_dict == {1: 0, 2: 1, 3: 1, 4: 0}\n        H = list(ts.haplotypes())\n        assert H[0] == \"1001\"\n        assert H[1] == \"0101\"\n        assert H[2] == \"0010\"\n\n        tss, node_map = ts.simplify(map_nodes=True)\n        assert list(node_map) == [4, 3, 0, 1, 2]\n        # We should have the same tree sequence just with canonicalised nodes.\n        assert tss.sample_size == 3\n        assert list(tss.samples()) == [0, 1, 2]\n        assert tss.num_nodes == 5\n        assert tss.num_trees == 1\n        assert tss.num_sites == 4\n        assert tss.num_mutations == 4\n        assert len(list(ts.edge_diffs())) == ts.num_trees\n        t = next(tss.trees())\n        assert t.root == 4\n        assert t.parent_dict == {0: 3, 1: 3, 2: 4, 3: 4}\n        H = list(tss.haplotypes())\n        assert H[0] == \"1001\"\n        assert H[1] == \"0101\"\n        assert H[2] == \"0010\"\n\n    def verify_permuted_nodes(self, ts):\n        \"\"\"\n        Take the specified tree sequence and permute the nodes, verifying that we\n        get back a tree sequence with the correct properties.\n        \"\"\"\n        # Mapping from the original nodes into nodes in the new tree sequence.\n        node_map = list(range(ts.num_nodes))\n        random.shuffle(node_map)\n        # Change the permutation so that the relative order of samples is maintained.\n        # Then, we should get back exactly the same tree sequence after simplify\n        # and haplotypes and variants are also equal.\n        samples = sorted(node_map[: ts.sample_size])\n        node_map = samples + node_map[ts.sample_size :]\n        permuted = tsutil.permute_nodes(ts, node_map)\n        assert ts.sequence_length == permuted.sequence_length\n        assert list(permuted.samples()) == samples\n        assert list(permuted.haplotypes()) == list(ts.haplotypes())\n        for v1, v2 in zip(\n            permuted.variants(copy=False),\n            ts.variants(copy=False),\n        ):\n            assert np.array_equal(v1.genotypes, v2.genotypes)\n\n        assert ts.num_trees == permuted.num_trees\n        j = 0\n        for t1, t2 in zip(ts.trees(), permuted.trees()):\n            t1_dict = {node_map[k]: node_map[v] for k, v in t1.parent_dict.items()}\n            assert node_map[t1.root] == t2.root\n            assert t1_dict == t2.parent_dict\n            for u1 in t1.nodes():\n                u2 = node_map[u1]\n                assert sorted(node_map[v] for v in t1.samples(u1)) == sorted(\n                    list(t2.samples(u2))\n                )\n            j += 1\n        assert j == ts.num_trees\n\n        # The simplified version of the permuted tree sequence should be in canonical\n        # form, and identical to the original.\n        simplified, s_node_map = permuted.simplify(map_nodes=True)\n\n        for u, v in enumerate(node_map):\n            assert s_node_map[v] == u\n        ts.tables.assert_equals(simplified.tables, ignore_provenance=True)\n\n    def test_single_tree_permuted_nodes(self):\n        ts = msprime.simulate(10, mutation_rate=5, random_seed=self.random_seed)\n        self.verify_permuted_nodes(ts)\n\n    def test_binary_tree_sequence_permuted_nodes(self):\n        ts = msprime.simulate(\n            20, recombination_rate=5, mutation_rate=5, random_seed=self.random_seed\n        )\n        self.verify_permuted_nodes(ts)\n\n    def test_nonbinary_tree_sequence_permuted_nodes(self):\n        demographic_events = [\n            msprime.SimpleBottleneck(time=1.0, population=0, proportion=0.95)\n        ]\n        ts = msprime.simulate(\n            20,\n            recombination_rate=10,\n            mutation_rate=5,\n            demographic_events=demographic_events,\n            random_seed=self.random_seed,\n        )\n        found = False\n        for e in ts.edgesets():\n            if len(e.children) > 2:\n                found = True\n        assert found\n        self.verify_permuted_nodes(ts)\n\n\nclass TestSimplifyExamples(TopologyTestCase):\n    \"\"\"\n    Tests for simplify where we write out the input and expected output\n    or we detect expected errors.\n    \"\"\"\n\n    def verify_simplify(\n        self,\n        samples,\n        *,\n        filter_sites=True,\n        keep_input_roots=False,\n        filter_nodes=True,\n        nodes_before=None,\n        edges_before=None,\n        sites_before=None,\n        mutations_before=None,\n        nodes_after=None,\n        edges_after=None,\n        sites_after=None,\n        mutations_after=None,\n        debug=False,\n    ):\n        \"\"\"\n        Verifies that if we run simplify on the specified input we get the\n        required output.\n        \"\"\"\n        before = tskit.load_text(\n            nodes=io.StringIO(nodes_before),\n            edges=io.StringIO(edges_before),\n            sites=io.StringIO(sites_before) if sites_before is not None else None,\n            mutations=(\n                io.StringIO(mutations_before) if mutations_before is not None else None\n            ),\n            strict=False,\n        )\n\n        after = tskit.load_text(\n            nodes=io.StringIO(nodes_after),\n            edges=io.StringIO(edges_after),\n            sites=io.StringIO(sites_after) if sites_after is not None else None,\n            mutations=(\n                io.StringIO(mutations_after) if mutations_after is not None else None\n            ),\n            strict=False,\n            sequence_length=before.sequence_length,\n        )\n\n        result, _ = do_simplify(\n            before,\n            samples=samples,\n            filter_sites=filter_sites,\n            keep_input_roots=keep_input_roots,\n            filter_nodes=filter_nodes,\n            compare_lib=True,\n        )\n        if debug:\n            print(\"before\")\n            print(before)\n            print(before.draw_text())\n            print(\"after\")\n            print(after)\n            print(after.draw_text())\n            print(\"result\")\n            print(result)\n            print(result.draw_text())\n        after.tables.assert_equals(result.tables)\n\n    def test_unsorted_edges(self):\n        # We have two nodes at the same time and interleave edges for\n        # these nodes together. This is an error because all edges for\n        # a given parent must be contigous.\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           1\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0,1\n        0       1       3       0,1\n        1       2       2       0,1\n        1       2       3       0,1\n        \"\"\"\n        nodes = tskit.parse_nodes(io.StringIO(nodes_before), strict=False)\n        edges = tskit.parse_edges(io.StringIO(edges_before), strict=False)\n        # Cannot use load_text here because it calls sort()\n        tables = tskit.TableCollection(sequence_length=2)\n        tables.nodes.set_columns(**nodes.asdict())\n        tables.edges.set_columns(**edges.asdict())\n        with pytest.raises(_tskit.LibraryError):\n            tables.simplify(samples=[0, 1])\n\n    def test_single_binary_tree(self):\n        #\n        # 2        4\n        #         / \\\n        # 1      3   \\\n        #       / \\   \\\n        # 0   (0)(1)  (2)\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        \"\"\"\n        # We sample 0 and 2, so we get\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           2\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0,1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 2],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n        )\n\n    def test_single_binary_tree_no_sample_nodes(self):\n        #\n        # 2        4\n        #         / \\\n        # 1      3   \\\n        #       / \\   \\\n        # 0   (0)(1)  (2)\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       0           0\n        1       0           0\n        2       0           0\n        3       0           1\n        4       0           2\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        \"\"\"\n        # We sample 0 and 2, so we get\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           2\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0,1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 2],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n        )\n\n    def test_single_binary_tree_keep_input_root(self):\n        #\n        # 2        4\n        #         / \\\n        # 1      3   \\\n        #       / \\   \\\n        # 0   (0)(1)  (2)\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        \"\"\"\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           2\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0,1\n        0       1       3       2\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n            keep_input_roots=True,\n        )\n\n    def test_single_binary_tree_internal_sample(self):\n        #\n        # 2        4\n        #         / \\\n        # 1     (3)  \\\n        #       / \\   \\\n        # 0   (0)  1  (2)\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           0\n        3       1           1\n        4       0           2\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        \"\"\"\n        # We sample 0 and 3, so we get\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           1\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 3],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n        )\n\n    def test_single_binary_tree_internal_sample_meet_at_root(self):\n        # 3          5\n        #           / \\\n        # 2        4  (6)\n        #         / \\\n        # 1     (3)  \\\n        #       / \\   \\\n        # 0   (0)  1   2\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           0\n        3       1           1\n        4       0           2\n        5       0           3\n        6       1           2\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        0       1       5       4,6\n        \"\"\"\n        # We sample 0 and 3 and 6, so we get\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           1\n        2       1           2\n        3       0           3\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        0       1       3       1,2\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 3, 6],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n        )\n\n    def test_single_binary_tree_simple_mutations(self):\n        # 3          5\n        #           / \\\n        # 2        4   \\\n        #         / \\   s0\n        # 1      3   s1  \\\n        #       / \\   \\   \\\n        # 0   (0) (1)  2  (6)\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           0\n        3       0           1\n        4       0           2\n        5       0           3\n        6       1           0\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        0       1       5       4,6\n        \"\"\"\n        sites_before = \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        1   0.2         0\n        \"\"\"\n        mutations_before = \"\"\"\\\n        site    node    derived_state\n        0       6       1\n        1       2       1\n        \"\"\"\n\n        # We sample 0 and 2 and 6, so we get\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        3       0           3\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        \"\"\"\n        sites_after = \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        \"\"\"\n        mutations_after = \"\"\"\\\n        site    node    derived_state\n        0       2       1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1, 6],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            sites_before=sites_before,\n            mutations_before=mutations_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n            sites_after=sites_after,\n            mutations_after=mutations_after,\n        )\n        # If we don't filter the fixed sites, we should get the same\n        # mutations and the original sites table back.\n        self.verify_simplify(\n            samples=[0, 1, 6],\n            filter_sites=False,\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            sites_before=sites_before,\n            mutations_before=mutations_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n            sites_after=sites_before,\n            mutations_after=mutations_after,\n        )\n\n    def test_single_binary_tree_keep_roots_mutations(self):\n        # 3          5\n        #        m0 / \\\n        # 2        4   \\\n        #      m1 / \\   \\\n        # 1      3   \\   \\\n        #       / \\   \\   \\\n        # 0   (0) (1)  2   6\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           0\n        3       0           1\n        4       0           2\n        5       0           3\n        6       0           0\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       2,3\n        0       1       5       4,6\n        \"\"\"\n        sites_before = \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        \"\"\"\n        mutations_before = \"\"\"\\\n        site    node    derived_state parent\n        0       4       1             -1\n        0       3       2             0\n        \"\"\"\n\n        # We sample 0 and 2\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           3\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0,1\n        0       1       3       2\n        \"\"\"\n        sites_after = \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        \"\"\"\n        mutations_after = \"\"\"\\\n        site    node    derived_state parent\n        0       2       1             -1\n        0       2       2             0\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            sites_before=sites_before,\n            mutations_before=mutations_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n            sites_after=sites_after,\n            mutations_after=mutations_after,\n            keep_input_roots=True,\n        )\n\n    def test_place_mutations_with_and_without_roots(self):\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       0           1\n        2       0           2\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       2       1       0\n        0       2       2       1\n        \"\"\"\n        sites = \"\"\"\\\n        id  position    ancestral_state\n        0   1.0         0\n        \"\"\"\n        mutations_before = \"\"\"\\\n        site    node    derived_state time parent\n        0       2       3             2    -1\n        0       1       1             1    0\n        0       0       2             0    1\n        \"\"\"\n        # expected result without keep_input_roots\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        mutations_after = \"\"\"\\\n        site    node    derived_state time parent\n        0       0       3             2    -1\n        0       0       1             1    0\n        0       0       2             0    1\n        \"\"\"\n        # expected result with keep_input_roots\n        nodes_after_keep = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       0           2\n        \"\"\"\n        edges_after_keep = \"\"\"\\\n        left    right   parent  child\n        0       2       1       0\n        \"\"\"\n        mutations_after_keep = \"\"\"\\\n        site    node    derived_state time parent\n        0       1       3             2    -1\n        0       0       1             1    0\n        0       0       2             0    1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            sites_before=sites,\n            mutations_before=mutations_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n            sites_after=sites,\n            mutations_after=mutations_after,\n            keep_input_roots=False,\n        )\n        self.verify_simplify(\n            samples=[0],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            sites_before=sites,\n            mutations_before=mutations_before,\n            nodes_after=nodes_after_keep,\n            edges_after=edges_after_keep,\n            sites_after=sites,\n            mutations_after=mutations_after_keep,\n            keep_input_roots=True,\n        )\n\n    def test_overlapping_edges(self):\n        nodes = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       2       2       0\n        1       3       2       1\n        \"\"\"\n        # We resolve the overlapping edges here. Since the flanking regions\n        # have no interesting edges, these are left out of the output.\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        1       2       2       0,1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1],\n            nodes_before=nodes,\n            edges_before=edges_before,\n            nodes_after=nodes,\n            edges_after=edges_after,\n        )\n\n    def test_overlapping_edges_internal_samples(self):\n        nodes = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           1\n        \"\"\"\n        edges = \"\"\"\\\n        left    right   parent  child\n        0       2       2       0\n        1       3       2       1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1, 2],\n            nodes_before=nodes,\n            edges_before=edges,\n            nodes_after=nodes,\n            edges_after=edges,\n        )\n\n    def test_unary_edges_no_overlap(self):\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       2       2       0\n        2       3       2       1\n        \"\"\"\n        # Because there is no overlap between the samples, we just get an\n        # empty set of output edges.\n        nodes_after = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_after,\n            edges_after=edges_after,\n        )\n\n    def test_unary_edges_no_overlap_internal_sample(self):\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           1\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        1       2       2       1\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1, 2],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_before,\n            edges_after=edges_before,\n        )\n\n    def test_keep_nodes(self):\n        nodes_before = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           2\n        4       0           3\n        \"\"\"\n        edges_before = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       2       1\n        0       1       3       2\n        0       1       4       3\n        \"\"\"\n        edges_after = \"\"\"\\\n        left    right   parent  child\n        0       1       2       0\n        0       1       2       1\n        0       1       4       2\n        \"\"\"\n        self.verify_simplify(\n            samples=[0, 1],\n            nodes_before=nodes_before,\n            edges_before=edges_before,\n            nodes_after=nodes_before,\n            edges_after=edges_after,\n            filter_nodes=False,\n            keep_input_roots=True,\n        )\n\n\nclass TestNonSampleExternalNodes(TopologyTestCase):\n    \"\"\"\n    Tests for situations in which we have tips that are not samples.\n    \"\"\"\n\n    def test_simple_case(self):\n        # Simplest case where we have n = 2 and external non-sample nodes.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           0\n        4       0           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       2       0,1,3,4\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        1   0.2         0\n        2   0.3         0\n        3   0.4         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       0       1\n        1       1       1\n        2       3       1\n        3       4       1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n        assert ts.sample_size == 2\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 5\n        assert ts.num_sites == 4\n        assert ts.num_mutations == 4\n        t = next(ts.trees())\n        assert t.parent_dict == {0: 2, 1: 2, 3: 2, 4: 2}\n        assert t.root == 2\n        ts_simplified, node_map = ts.simplify(map_nodes=True)\n        assert list(node_map) == [0, 1, 2, -1, -1]\n        assert ts_simplified.num_nodes == 3\n        assert ts_simplified.num_trees == 1\n        t = next(ts_simplified.trees())\n        assert t.parent_dict == {0: 2, 1: 2}\n        assert t.root == 2\n        # We should have removed the two non-sample mutations.\n        assert [s.position for s in t.sites()] == [0.1, 0.2]\n\n    def test_unary_non_sample_external_nodes(self):\n        # Take an ordinary tree sequence and put a bunch of external non\n        # sample nodes on it.\n        ts = msprime.simulate(\n            15, recombination_rate=5, random_seed=self.random_seed, mutation_rate=5\n        )\n        assert ts.num_trees > 2\n        assert ts.num_mutations > 2\n        tables = ts.dump_tables()\n        next_node = ts.num_nodes\n        tables.edges.reset()\n        for e in ts.edges():\n            tables.edges.append(e)\n            tables.edges.append(e.replace(child=next_node))\n            tables.nodes.add_row(time=0)\n            next_node += 1\n        tables.sort()\n        ts_new = tables.tree_sequence()\n        assert ts_new.num_nodes == next_node\n        assert ts_new.sample_size == ts.sample_size\n        self.assert_haplotypes_equal(ts, ts_new)\n        self.assert_variants_equal(ts, ts_new)\n        ts_simplified = ts_new.simplify()\n        assert ts_simplified.num_nodes == ts.num_nodes\n        assert ts_simplified.sample_size == ts.sample_size\n        assert list(ts_simplified.records()) == list(ts.records())\n        self.assert_haplotypes_equal(ts, ts_simplified)\n        self.assert_variants_equal(ts, ts_simplified)\n\n\nclass TestMultipleRoots(TopologyTestCase):\n    \"\"\"\n    Tests for situations where we have multiple roots for the samples.\n    \"\"\"\n\n    def test_simplest_degenerate_case(self):\n        # Simplest case where we have n = 2 and no edges.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        1   0.2         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       0         1\n        1       1         1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes,\n            edges=edges,\n            sites=sites,\n            mutations=mutations,\n            sequence_length=1,\n            strict=False,\n        )\n        assert ts.num_nodes == 2\n        assert ts.num_trees == 1\n        assert ts.num_sites == 2\n        assert ts.num_mutations == 2\n        t = next(ts.trees())\n        assert t.parent_dict == {}\n        assert sorted(t.roots) == [0, 1]\n        assert list(ts.haplotypes(isolated_as_missing=False)) == [\"10\", \"01\"]\n        assert np.array_equal(\n            np.stack([v.genotypes for v in ts.variants(isolated_as_missing=False)]),\n            [[1, 0], [0, 1]],\n        )\n        simplified = ts.simplify()\n        t1 = ts.dump_tables()\n        t2 = simplified.dump_tables()\n        assert t1.nodes == t2.nodes\n        assert t1.edges == t2.edges\n\n    def test_simplest_non_degenerate_case(self):\n        # Simplest case where we have n = 4 and two trees.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       1           0\n        4       0           1\n        5       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       4       0,1\n        0       1       5       2,3\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        1   0.2         0\n        2   0.3         0\n        3   0.4         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       0       1\n        1       1       1\n        2       2       1\n        3       3       1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n        assert ts.num_nodes == 6\n        assert ts.num_trees == 1\n        assert ts.num_sites == 4\n        assert ts.num_mutations == 4\n        t = next(ts.trees())\n        assert t.parent_dict == {0: 4, 1: 4, 2: 5, 3: 5}\n        assert list(ts.haplotypes()) == [\"1000\", \"0100\", \"0010\", \"0001\"]\n        assert np.array_equal(\n            np.stack([v.genotypes for v in ts.variants()]),\n            [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],\n        )\n        assert t.mrca(0, 1) == 4\n        assert t.mrca(0, 4) == 4\n        assert t.mrca(2, 3) == 5\n        assert t.mrca(0, 2) == tskit.NULL\n        assert t.mrca(0, 3) == tskit.NULL\n        assert t.mrca(2, 4) == tskit.NULL\n        ts_simplified, node_map = ts.simplify(map_nodes=True)\n        for j in range(4):\n            assert node_map[j] == j\n        assert ts_simplified.num_nodes == 6\n        assert ts_simplified.num_trees == 1\n        assert ts_simplified.num_sites == 4\n        assert ts_simplified.num_mutations == 4\n        t = next(ts_simplified.trees())\n        assert t.parent_dict == {0: 4, 1: 4, 2: 5, 3: 5}\n\n    def test_two_reducible_trees(self):\n        # We have n = 4 and two trees, with some unary nodes and non-sample leaves\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       1           0\n        4       0           1\n        5       0           1\n        6       0           2\n        7       0           3\n        8       0           0   # Non sample leaf\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1      4         0\n        0       1      5         1\n        0       1      6         4,5\n        0       1      7         2,3,8\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        1   0.2         0\n        2   0.3         0\n        3   0.4         0\n        4   0.5         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       0       1\n        1       1       1\n        2       2       1\n        3       3       1\n        4       8       1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n        assert ts.num_nodes == 9\n        assert ts.num_trees == 1\n        assert ts.num_sites == 5\n        assert ts.num_mutations == 5\n        t = next(ts.trees())\n        assert t.parent_dict == {0: 4, 1: 5, 2: 7, 3: 7, 4: 6, 5: 6, 8: 7}\n        assert list(ts.haplotypes()) == [\"10000\", \"01000\", \"00100\", \"00010\"]\n        assert np.array_equal(\n            np.stack([v.genotypes for v in ts.variants()]),\n            [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 0]],\n        )\n        assert t.mrca(0, 1) == 6\n        assert t.mrca(2, 3) == 7\n        assert t.mrca(2, 8) == 7\n        assert t.mrca(0, 2) == tskit.NULL\n        assert t.mrca(0, 3) == tskit.NULL\n        assert t.mrca(0, 8) == tskit.NULL\n        ts_simplified, node_map = ts.simplify(map_nodes=True)\n        for j in range(4):\n            assert node_map[j] == j\n        assert ts_simplified.num_nodes == 6\n        assert ts_simplified.num_trees == 1\n        t = next(ts_simplified.trees())\n        assert list(ts_simplified.haplotypes()) == [\"1000\", \"0100\", \"0010\", \"0001\"]\n        assert np.array_equal(\n            np.stack([v.genotypes for v in ts_simplified.variants()]),\n            [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],\n        )\n        # The site over the non-sample external node should have been discarded.\n        sites = list(t.sites())\n        assert sites[-1].position == 0.4\n        assert t.parent_dict == {0: 4, 1: 4, 2: 5, 3: 5}\n\n    def test_one_reducible_tree(self):\n        # We have n = 4 and two trees. One tree is reducible and the other isn't.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       1           0\n        4       0           1\n        5       0           1\n        6       0           2\n        7       0           3\n        8       0           0   # Non sample leaf\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1      4         0\n        0       1      5         1\n        0       1      6         4,5\n        0       1      7         2,3,8\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        assert ts.num_nodes == 9\n        assert ts.num_trees == 1\n        t = next(ts.trees())\n        assert t.parent_dict == {0: 4, 1: 5, 2: 7, 3: 7, 4: 6, 5: 6, 8: 7}\n        assert t.mrca(0, 1) == 6\n        assert t.mrca(2, 3) == 7\n        assert t.mrca(2, 8) == 7\n        assert t.mrca(0, 2) == tskit.NULL\n        assert t.mrca(0, 3) == tskit.NULL\n        assert t.mrca(0, 8) == tskit.NULL\n        ts_simplified = ts.simplify()\n        assert ts_simplified.num_nodes == 6\n        assert ts_simplified.num_trees == 1\n        t = next(ts_simplified.trees())\n        assert t.parent_dict == {0: 4, 1: 4, 2: 5, 3: 5}\n\n    # NOTE: This test has not been checked since updating to the text representation\n    # so there might be other problems with it.\n    def test_mutations_over_roots(self):\n        # Mutations over root nodes should be ok when we have multiple roots.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        5       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       3       0,1\n        0       1       4       3\n        0       1       5       2\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.1         0\n        1   0.2         0\n        2   0.3         0\n        3   0.4         0\n        4   0.5         0\n        5   0.6         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       0       1\n        1       1       1\n        2       3       1\n        3       4       1\n        4       2       1\n        5       5       1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n        assert ts.num_nodes == 6\n        assert ts.num_trees == 1\n        assert ts.num_sites == 6\n        assert ts.num_mutations == 6\n        t = next(ts.trees())\n        assert len(list(t.sites())) == 6\n        haplotypes = [\"101100\", \"011100\", \"000011\"]\n        variants = [[1, 0, 0], [0, 1, 0], [1, 1, 0], [1, 1, 0], [0, 0, 1], [0, 0, 1]]\n        assert list(ts.haplotypes()) == haplotypes\n        assert np.array_equal(np.stack([v.genotypes for v in ts.variants()]), variants)\n        ts_simplified = ts.simplify(filter_sites=False)\n        assert list(ts_simplified.haplotypes(isolated_as_missing=False)) == haplotypes\n        assert np.array_equal(\n            np.stack(\n                [v.genotypes for v in ts_simplified.variants(isolated_as_missing=False)]\n            ),\n            variants,\n        )\n\n    def test_break_single_tree(self):\n        # Take a single largish tree from tskit, and remove the oldest record.\n        # This breaks it into two subtrees.\n        ts = msprime.simulate(20, random_seed=self.random_seed, mutation_rate=4)\n        assert ts.num_mutations > 5\n        tables = ts.dump_tables()\n        tables.edges.set_columns(\n            left=tables.edges.left[:-1],\n            right=tables.edges.right[:-1],\n            parent=tables.edges.parent[:-1],\n            child=tables.edges.child[:-1],\n        )\n        ts_new = tables.tree_sequence()\n        assert ts.sample_size == ts_new.sample_size\n        assert ts.num_edges == ts_new.num_edges + 1\n        assert ts.num_trees == ts_new.num_trees\n        self.assert_haplotypes_equal(ts, ts_new)\n        self.assert_variants_equal(ts, ts_new)\n        roots = set()\n        t_new = next(ts_new.trees())\n        for u in ts_new.samples():\n            while t_new.parent(u) != tskit.NULL:\n                u = t_new.parent(u)\n            roots.add(u)\n        assert len(roots) == 2\n        assert sorted(roots) == sorted(t_new.roots)\n\n\nclass TestWithVisuals(TopologyTestCase):\n    \"\"\"\n    Some pedantic tests with ascii depictions of what's supposed to happen.\n    \"\"\"\n\n    def verify_simplify_topology(self, ts, sample, haplotypes=False):\n        # copies from test_highlevel.py\n        new_ts, node_map = ts.simplify(sample, map_nodes=True)\n        old_trees = ts.trees()\n        old_tree = next(old_trees)\n        assert ts.get_num_trees() >= new_ts.get_num_trees()\n        for new_tree in new_ts.trees():\n            new_left, new_right = new_tree.get_interval()\n            old_left, old_right = old_tree.get_interval()\n            # Skip ahead on the old tree until new_left is within its interval\n            while old_right <= new_left:\n                old_tree = next(old_trees)\n                old_left, old_right = old_tree.get_interval()\n            # If the TMRCA of all pairs of samples is the same, then we have the\n            # same information. We limit this to at most 500 pairs\n            pairs = itertools.islice(itertools.combinations(sample, 2), 500)\n            for pair in pairs:\n                mapped_pair = [node_map[u] for u in pair]\n                mrca1 = old_tree.get_mrca(*pair)\n                mrca2 = new_tree.get_mrca(*mapped_pair)\n                assert mrca2 == node_map[mrca1]\n        if haplotypes:\n            orig_haps = list(ts.haplotypes())\n            simp_haps = list(new_ts.haplotypes())\n            for i, j in enumerate(sample):\n                assert orig_haps[j] == simp_haps[i]\n\n    def test_partial_non_sample_external_nodes(self):\n        # A somewhat more complicated test case with a partially specified,\n        # non-sampled tip.\n        #\n        # Here is the situation:\n        #\n        # 1.0             7\n        # 0.7            / \\                                            6\n        #               /   \\                                          / \\\n        # 0.5          /     5                      5                 /   5\n        #             /     / \\                    / \\               /   / \\\n        # 0.4        /     /   4                  /   4             /   /   4\n        #           /     /   / \\                /   / \\           /   /   / \\\n        #          /     /   3   \\              /   /   \\         /   /   3   \\\n        #         /     /         \\            /   /     \\       /   /         \\\n        # 0.0    0     1           2          1   0       2     0   1           2\n        #\n        #          (0.0, 0.2),                 (0.2, 0.8),         (0.8, 1.0)\n\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           0.2  # Non sample leaf\n        4       0           0.4\n        5       0           0.5\n        6       0           0.7\n        7       0           1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.2     4       2,3\n        0.2     0.8     4       0,2\n        0.8     1.0     4       2,3\n        0.0     1.0     5       1,4\n        0.8     1.0     6       0,5\n        0.0     0.2     7       0,5\n        \"\"\"\n        )\n        true_trees = [\n            {0: 7, 1: 5, 2: 4, 3: 4, 4: 5, 5: 7, 6: -1, 7: -1},\n            {0: 4, 1: 5, 2: 4, 3: -1, 4: 5, 5: -1, 6: -1, 7: -1},\n            {0: 6, 1: 5, 2: 4, 3: 4, 4: 5, 5: 6, 6: -1, 7: -1},\n        ]\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        assert ts.sample_size == 3\n        assert ts.num_trees == 3\n        assert ts.num_nodes == 8\n        # check topologies agree:\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        # check .simplify() works here\n        self.verify_simplify_topology(ts, [0, 1, 2])\n\n    def test_partial_non_sample_external_nodes_2(self):\n        # The same situation as above, but partial tip is labeled '7' not '3':\n        #\n        # 1.0          6\n        # 0.7         / \\                                       5\n        #            /   \\                                     / \\\n        # 0.5       /     4                 4                 /   4\n        #          /     / \\               / \\               /   / \\\n        # 0.4     /     /   3             /   3             /   /   3\n        #        /     /   / \\           /   / \\           /   /   / \\\n        #       /     /   7   \\         /   /   \\         /   /   7   \\\n        #      /     /         \\       /   /     \\       /   /         \\\n        # 0.0 0     1           2     1   0       2     0   1           2\n        #\n        #          (0.0, 0.2),         (0.2, 0.8),         (0.8, 1.0)\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           0.4\n        4       0           0.5\n        5       0           0.7\n        6       0           1.0\n        7       0           0    # Non sample leaf\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.2     3       2,7\n        0.2     0.8     3       0,2\n        0.8     1.0     3       2,7\n        0.0     0.2     4       1,3\n        0.2     0.8     4       1,3\n        0.8     1.0     4       1,3\n        0.8     1.0     5       0,4\n        0.0     0.2     6       0,4\n        \"\"\"\n        )\n        true_trees = [\n            {0: 6, 1: 4, 2: 3, 3: 4, 4: 6, 5: -1, 6: -1, 7: 3},\n            {0: 3, 1: 4, 2: 3, 3: 4, 4: -1, 5: -1, 6: -1, 7: -1},\n            {0: 5, 1: 4, 2: 3, 3: 4, 4: 5, 5: -1, 6: -1, 7: 3},\n        ]\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        # sample size check works here since 7 > 3\n        assert ts.sample_size == 3\n        assert ts.num_trees == 3\n        assert ts.num_nodes == 8\n        # check topologies agree:\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        self.verify_simplify_topology(ts, [0, 1, 2])\n\n    def test_single_offspring_records(self):\n        # Here we have inserted a single-offspring record\n        # (for 6 on the left segment):\n        #\n        # 1.0             7\n        # 0.7            / 6                                                  6\n        #               /   \\                                                / \\\n        # 0.5          /     5                       5                      /   5\n        #             /     / \\                     / \\                    /   / \\\n        # 0.4        /     /   4                   /   4                  /   /   4\n        # 0.3       /     /   / \\                 /   / \\                /   /   / \\\n        #          /     /   3   \\               /   /   \\              /   /   3   \\\n        #         /     /         \\             /   /     \\            /   /         \\\n        # 0.0    0     1           2           1   0       2          0   1           2\n        #\n        #          (0.0, 0.2),               (0.2, 0.8),              (0.8, 1.0)\n        nodes = io.StringIO(\n            \"\"\"\\\n        id  is_sample   time\n        0   1           0\n        1   1           0\n        2   1           0\n        3   0           0       # Non sample leaf\n        4   0           0.4\n        5   0           0.5\n        6   0           0.7\n        7   0           1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.2     4       2,3\n        0.2     0.8     4       0,2\n        0.8     1.0     4       2,3\n        0.0     1.0     5       1,4\n        0.8     1.0     6       0,5\n        0.0     0.2     6       5\n        0.0     0.2     7       0,6\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        true_trees = [\n            {0: 7, 1: 5, 2: 4, 3: 4, 4: 5, 5: 6, 6: 7, 7: -1},\n            {0: 4, 1: 5, 2: 4, 3: -1, 4: 5, 5: -1, 6: -1, 7: -1},\n            {0: 6, 1: 5, 2: 4, 3: 4, 4: 5, 5: 6, 6: -1, 7: -1},\n        ]\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        assert ts.sample_size == 3\n        assert ts.num_trees == 3\n        assert ts.num_nodes == 8\n        # check topologies agree:\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        self.verify_simplify_topology(ts, [0, 1, 2])\n\n    def test_many_single_offspring(self):\n        # a more complex test with single offspring\n        # With `(i,j,x)->k` denoting that individual `k` inherits from `i` on `[0,x)`\n        #    and from `j` on `[x,1)`:\n        # 1. Begin with an individual `3` (and another anonymous one) at `t=0`.\n        # 2. `(3,?,1.0)->4` and `(3,?,1.0)->5` at `t=1`\n        # 3. `(4,3,0.9)->6` and `(3,5,0.1)->7` and then `3` dies at `t=2`\n        # 4. `(6,7,0.7)->8` at `t=3`\n        # 5. `(8,6,0.8)->9` and `(7,8,0.2)->10` at `t=4`.\n        # 6. `(3,9,0.6)->0` and `(9,10,0.5)->1` and `(10,4,0.4)->2` at `t=5`.\n        # 7. We sample `0`, `1`, and `2`.\n        # Here are the trees:\n        # t                  |              |              |             |\n        #\n        # 0       --3--      |     --3--    |     --3--    |    --3--    |    --3--\n        #        /  |  \\     |    /  |  \\   |    /     \\   |   /     \\   |   /     \\\n        # 1     4   |   5    |   4   *   5  |   4       5  |  4       5  |  4       5\n        #       |\\ / \\ /|    |   |\\   \\     |   |\\     /   |  |\\     /   |  |\\     /|\n        # 2     | 6   7 |    |   | 6   7    |   | 6   7    |  | 6   7    |  | 6   7 |\n        #       | |\\ /| |    |   |  \\  *    |   |  \\  |    |  |  *       |  |  *    | ...\n        # 3     | | 8 | |    |   |   8 |    |   *   8 *    |  |   8      |  |   8   |\n        #       | |/ \\| |    |   |  /  |    |   |  /  |    |  |  * *     |  |  / \\  |\n        # 4     | 9  10 |    |   | 9  10    |   | 9  10    |  | 9  10    |  | 9  10 |\n        #       |/ \\ / \\|    |   |  \\   *   |   |  \\   \\   |  |  \\   *   |  |  \\    |\n        # 5     0   1   2    |   0   1   2  |   0   1   2  |  0   1   2  |  0   1   2\n        #\n        #                    |   0.0 - 0.1  |   0.1 - 0.2  |  0.2 - 0.4  |  0.4 - 0.5\n        # ... continued:\n        # t                  |             |             |             |\n        #\n        # 0         --3--    |    --3--    |    --3--    |    --3--    |    --3--\n        #          /     \\   |   /     \\   |   /     \\   |   /     \\   |   /  |  \\\n        # 1       4       5  |  4       5  |  4       5  |  4       5  |  4   |   5\n        #         |\\     /|  |   \\     /|  |   \\     /|  |   \\     /|  |     /   /|\n        # 2       | 6   7 |  |    6   7 |  |    6   7 |  |    6   7 |  |    6   7 |\n        #         |  \\    |  |     \\    |  |       /  |  |    |  /  |  |    |  /  |\n        # 3  ...  |   8   |  |      8   |  |      8   |  |    | 8   |  |    | 8   |\n        #         |  / \\  |  |     / \\  |  |     / \\  |  |    |  \\  |  |    |  \\  |\n        # 4       | 9  10 |  |    9  10 |  |    9  10 |  |    9  10 |  |    9  10 |\n        #         |    /  |  |   /   /  |  |   /   /  |  |   /   /  |  |   /   /  |\n        # 5       0   1   2  |  0   1   2  |  0   1   2  |  0   1   2  |  0   1   2\n        #\n        #         0.5 - 0.6  |  0.6 - 0.7  |  0.7 - 0.8  |  0.8 - 0.9  |  0.9 - 1.0\n\n        true_trees = [\n            {0: 4, 1: 9, 2: 10, 3: -1, 4: 3, 5: 3, 6: 4, 7: 3, 8: 6, 9: 8, 10: 7},\n            {0: 4, 1: 9, 2: 10, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 6, 9: 8, 10: 7},\n            {0: 4, 1: 9, 2: 10, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 6, 9: 8, 10: 8},\n            {0: 4, 1: 9, 2: 5, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 6, 9: 8, 10: 8},\n            {0: 4, 1: 10, 2: 5, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 6, 9: 8, 10: 8},\n            {0: 9, 1: 10, 2: 5, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 6, 9: 8, 10: 8},\n            {0: 9, 1: 10, 2: 5, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 7, 9: 8, 10: 8},\n            {0: 9, 1: 10, 2: 5, 3: -1, 4: 3, 5: 3, 6: 4, 7: 5, 8: 7, 9: 6, 10: 8},\n            {0: 9, 1: 10, 2: 5, 3: -1, 4: 3, 5: 3, 6: 3, 7: 5, 8: 7, 9: 6, 10: 8},\n        ]\n        true_haplotypes = [\"0100\", \"0001\", \"1110\"]\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           5\n        4       0           4\n        5       0           4\n        6       0           3\n        7       0           3\n        8       0           2\n        9       0           1\n        10      0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.5     1.0     10      1\n        0.0     0.4     10      2\n        0.6     1.0     9       0\n        0.0     0.5     9       1\n        0.8     1.0     8       10\n        0.2     0.8     8       9,10\n        0.0     0.2     8       9\n        0.7     1.0     7       8\n        0.0     0.2     7       10\n        0.8     1.0     6       9\n        0.0     0.7     6       8\n        0.4     1.0     5       2,7\n        0.1     0.4     5       7\n        0.6     0.9     4       6\n        0.0     0.6     4       0,6\n        0.9     1.0     3       4,5,6\n        0.1     0.9     3       4,5\n        0.0     0.1     3       4,5,7\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.05        0\n        0.15        0\n        0.25        0\n        0.4         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       7       1               -1\n        0      10       0               0\n        0       2       1               1\n        1       0       1               -1\n        1      10       1               -1\n        2       8       1               -1\n        2       9       0               5\n        2      10       0               5\n        2       2       1               7\n        3       8       1               -1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sites, mutations, strict=False)\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        assert ts.sample_size == 3\n        assert ts.num_trees == len(true_trees)\n        assert ts.num_nodes == 11\n        assert len(list(ts.edge_diffs())) == ts.num_trees\n        # check topologies agree:\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        for j, x in enumerate(ts.haplotypes()):\n            assert x == true_haplotypes[j]\n        self.verify_simplify_topology(ts, [0, 1, 2], haplotypes=True)\n        self.verify_simplify_topology(ts, [1, 0, 2], haplotypes=True)\n        self.verify_simplify_topology(ts, [0, 1], haplotypes=False)\n        self.verify_simplify_topology(ts, [1, 2], haplotypes=False)\n        self.verify_simplify_topology(ts, [2, 0], haplotypes=False)\n\n    def test_tricky_switches(self):\n        # suppose the topology has:\n        # left right parent child\n        #  0.0   0.5      6      0,1\n        #  0.5   1.0      6      4,5\n        #  0.0   0.4      7      2,3\n        #\n        # --------------------------\n        #\n        #        12         .        12         .        12         .\n        #       /  \\        .       /  \\        .       /  \\        .\n        #     11    \\       .      /    \\       .      /    \\       .\n        #     / \\    \\      .     /     10      .     /     10      .\n        #    /   \\    \\     .    /     /  \\     .    /     /  \\     .\n        #   6     7    8    .   6     9    8    .   6     9    8    .\n        #  / \\   / \\   /\\   .  / \\   / \\   /\\   .  / \\   / \\   /\\   .\n        # 0   1 2   3 4  5  . 0   1 2   3 4  5  . 4   5 2   3 0  1  .\n        #                   .                   .                   .\n        # 0.0              0.4                 0.5                 1.0\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       1           0\n        4       1           0\n        5       1           0\n        6       0           1\n        7       0           1\n        8       0           1\n        9       0           1\n        10      0           2\n        11      0           3\n        12      0           4\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left right parent child\n        0.0  0.5   6      0\n        0.0  0.5   6      1\n        0.5  1.0   6      4\n        0.5  1.0   6      5\n        0.0  0.4   7      2,3\n        0.5  1.0   8      0\n        0.5  1.0   8      1\n        0.0  0.5   8      4\n        0.0  0.5   8      5\n        0.4  1.0   9      2,3\n        0.4  1.0   10     8,9\n        0.0  0.4   11     6,7\n        0.4  1.0   12     6\n        0.0  0.4   12     8\n        0.4  1.0   12     10\n        0.0  0.4   12     11\n        \"\"\"\n        )\n        true_trees = [\n            {\n                0: 6,\n                1: 6,\n                2: 7,\n                3: 7,\n                4: 8,\n                5: 8,\n                6: 11,\n                7: 11,\n                8: 12,\n                9: -1,\n                10: -1,\n                11: 12,\n                12: -1,\n            },\n            {\n                0: 6,\n                1: 6,\n                2: 9,\n                3: 9,\n                4: 8,\n                5: 8,\n                6: 12,\n                7: -1,\n                8: 10,\n                9: 10,\n                10: 12,\n                11: -1,\n                12: -1,\n            },\n            {\n                0: 8,\n                1: 8,\n                2: 9,\n                3: 9,\n                4: 6,\n                5: 6,\n                6: 12,\n                7: -1,\n                8: 10,\n                9: 10,\n                10: 12,\n                11: -1,\n                12: -1,\n            },\n        ]\n        ts = tskit.load_text(nodes, edges, strict=False)\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        assert ts.sample_size == 6\n        assert ts.num_trees == len(true_trees)\n        assert ts.num_nodes == 13\n        assert len(list(ts.edge_diffs())) == ts.num_trees\n        # check topologies agree:\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        self.verify_simplify_topology(ts, [0, 2])\n        self.verify_simplify_topology(ts, [0, 4])\n        self.verify_simplify_topology(ts, [2, 4])\n\n    def test_tricky_simplify(self):\n        # Continue as above but invoke simplfy:\n        #\n        #         12         .          12         .\n        #        /  \\        .         /  \\        .\n        #      11    \\       .       11    \\       .\n        #      / \\    \\      .       / \\    \\      .\n        #    13   \\    \\     .      /  15    \\     .\n        #    / \\   \\    \\    .     /   / \\    \\    .\n        #   6  14   7    8   .    6  14   7    8   .\n        #  / \\     / \\   /\\  .   / \\     / \\   /\\  .\n        # 0   1   2   3 4  5 .  0   1   2   3 4  5 .\n        #                    .                     .\n        # 0.0               0.1                   0.4\n        #\n        #  .        12         .        12         .\n        #  .       /  \\        .       /  \\        .\n        #  .      /    \\       .      /    \\       .\n        #  .     /     10      .     /     10      .\n        #  .    /     /  \\     .    /     /  \\     .\n        #  .   6     9    8    .   6     9    8    .\n        #  .  / \\   / \\   /\\   .  / \\   / \\   /\\   .\n        #  . 0   1 2   3 4  5  . 4   5 2   3 0  1  .\n        #  .                   .                   .\n        # 0.4                 0.5                 1.0\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       1           0\n        4       1           0\n        5       1           0\n        6       0           1\n        7       0           1\n        8       0           1\n        9       0           1\n        10      0           2\n        11      0           3\n        12      0           4\n        13      0           2\n        14      0           1\n        15      0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left right parent child\n        0.0  0.5   6      0,1\n        0.5  1.0   6      4,5\n        0.0  0.4   7      2,3\n        0.0  0.5   8      4,5\n        0.5  1.0   8      0,1\n        0.4  1.0   9      2,3\n        0.4  1.0   10     8,9\n        0.0  0.1   13     6,14\n        0.1  0.4   15     7,14\n        0.0  0.1   11     7,13\n        0.1  0.4   11     6,15\n        0.0  0.4   12     8,11\n        0.4  1.0   12     6,10\n        \"\"\"\n        )\n        true_trees = [\n            {\n                0: 6,\n                1: 6,\n                2: 7,\n                3: 7,\n                4: 8,\n                5: 8,\n                6: 11,\n                7: 11,\n                8: 12,\n                9: -1,\n                10: -1,\n                11: 12,\n                12: -1,\n            },\n            {\n                0: 6,\n                1: 6,\n                2: 9,\n                3: 9,\n                4: 8,\n                5: 8,\n                6: 12,\n                7: -1,\n                8: 10,\n                9: 10,\n                10: 12,\n                11: -1,\n                12: -1,\n            },\n            {\n                0: 8,\n                1: 8,\n                2: 9,\n                3: 9,\n                4: 6,\n                5: 6,\n                6: 12,\n                7: -1,\n                8: 10,\n                9: 10,\n                10: 12,\n                11: -1,\n                12: -1,\n            },\n        ]\n        big_ts = tskit.load_text(nodes, edges, strict=False)\n        assert big_ts.num_trees == 1 + len(true_trees)\n        assert big_ts.num_nodes == 16\n        ts, node_map = big_ts.simplify(map_nodes=True)\n        assert list(node_map[:6]) == list(range(6))\n        assert ts.sample_size == 6\n        assert ts.num_nodes == 13\n\n    def test_ancestral_samples(self):\n        # Check that specifying samples to be not at time 0.0 works.\n        #\n        # 1.0             7\n        # 0.7            / \\                      8                     6\n        #               /   \\                    / \\                   / \\\n        # 0.5          /     5                  /   5                 /   5\n        #             /     / \\                /   / \\               /   / \\\n        # 0.4        /     /   4              /   /   4             /   /   4\n        #           /     /   / \\            /   /   / \\           /   /   / \\\n        # 0.2      /     /   3   \\          3   /   /   \\         /   /   3   \\\n        #         /     /    *    \\         *  /   /     \\       /   /    *    \\\n        # 0.0    0     1           2          1   0       2     0   1           2\n        #              *           *          *           *         *           *\n        #          (0.0, 0.2),                 (0.2, 0.8),         (0.8, 1.0)\n        #\n        # Simplified, keeping [1,2,3]\n        #\n        # 1.0\n        # 0.7                                     5\n        #                                        / \\\n        # 0.5                4                  /   4                     4\n        #                   / \\                /   / \\                   / \\\n        # 0.4              /   3              /   /   3                 /   3\n        #                 /   / \\            /   /     \\               /   / \\\n        # 0.2            /   2   \\          2   /       \\             /   2   \\\n        #               /    *    \\         *  /         \\           /    *    \\\n        # 0.0          0           1          0           1         0           1\n        #              *           *          *           *         *           *\n        #          (0.0, 0.2),                 (0.2, 0.8),         (0.8, 1.0)\n\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           0\n        1       1           0\n        2       1           0\n        3       1           0.2\n        4       0           0.4\n        5       0           0.5\n        6       0           0.7\n        7       0           1.0\n        8       0           0.8\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.2     4       2,3\n        0.2     0.8     4       0,2\n        0.8     1.0     4       2,3\n        0.0     1.0     5       1,4\n        0.8     1.0     6       0,5\n        0.2     0.8     8       3,5\n        0.0     0.2     7       0,5\n        \"\"\"\n        )\n        first_ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        ts, node_map = first_ts.simplify(map_nodes=True)\n        true_trees = [\n            {0: 7, 1: 5, 2: 4, 3: 4, 4: 5, 5: 7, 6: -1, 7: -1},\n            {0: 4, 1: 5, 2: 4, 3: 8, 4: 5, 5: 8, 6: -1, 7: -1},\n            {0: 6, 1: 5, 2: 4, 3: 4, 4: 5, 5: 6, 6: -1, 7: -1},\n        ]\n        # maps [1,2,3] -> [0,1,2]\n        assert node_map[1] == 0\n        assert node_map[2] == 1\n        assert node_map[3] == 2\n        true_simplified_trees = [\n            {0: 4, 1: 3, 2: 3, 3: 4},\n            {0: 4, 1: 4, 2: 5, 4: 5},\n            {0: 4, 1: 3, 2: 3, 3: 4},\n        ]\n        assert first_ts.sample_size == 3\n        assert ts.sample_size == 3\n        assert first_ts.num_trees == 3\n        assert ts.num_trees == 3\n        assert first_ts.num_nodes == 9\n        assert ts.num_nodes == 6\n        assert first_ts.node(3).time == 0.2\n        assert ts.node(2).time == 0.2\n        # check topologies agree:\n        tree_dicts = [t.parent_dict for t in first_ts.trees()]\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        tree_simplified_dicts = [t.parent_dict for t in ts.trees()]\n        for a, t in zip(true_simplified_trees, tree_simplified_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        # check .simplify() works here\n        self.verify_simplify_topology(first_ts, [1, 2, 3])\n\n    def test_all_ancestral_samples(self):\n        # Check that specifying samples all to be not at time 0.0 works.\n        #\n        # 1.0             7\n        # 0.7            / \\                      8                     6\n        #               /   \\                    / \\                   / \\\n        # 0.5          /     5                  /   5                 /   5\n        #             /     / \\                /   / \\               /   / \\\n        # 0.4        /     /   4              /   /   4             /   /   4\n        #           /     /   / \\            /   /   / \\           /   /   / \\\n        # 0.2      /     /   3   \\          3   /   /   \\         /   /   3   \\\n        #         /     1    *    2         *  1   /     2       /   1    *    2\n        # 0.0    0      *         *            *  0      *      0    *         *\n        #\n        #          (0.0, 0.2),                 (0.2, 0.8),         (0.8, 1.0)\n\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           0\n        1       1           0.1\n        2       1           0.1\n        3       1           0.2\n        4       0           0.4\n        5       0           0.5\n        6       0           0.7\n        7       0           1.0\n        8       0           0.8\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.2     4       2,3\n        0.2     0.8     4       0,2\n        0.8     1.0     4       2,3\n        0.0     1.0     5       1,4\n        0.8     1.0     6       0,5\n        0.2     0.8     8       3,5\n        0.0     0.2     7       0,5\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        true_trees = [\n            {0: 7, 1: 5, 2: 4, 3: 4, 4: 5, 5: 7, 6: -1, 7: -1},\n            {0: 4, 1: 5, 2: 4, 3: 8, 4: 5, 5: 8, 6: -1, 7: -1},\n            {0: 6, 1: 5, 2: 4, 3: 4, 4: 5, 5: 6, 6: -1, 7: -1},\n        ]\n        assert ts.sample_size == 3\n        assert ts.num_trees == 3\n        assert ts.num_nodes == 9\n        assert ts.node(0).time == 0.0\n        assert ts.node(1).time == 0.1\n        assert ts.node(2).time == 0.1\n        assert ts.node(3).time == 0.2\n        # check topologies agree:\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        # check .simplify() works here\n        self.verify_simplify_topology(ts, [1, 2, 3])\n\n    def test_internal_sampled_node(self):\n        # 1.0             7\n        # 0.7            / \\                      8                     6\n        #               /   \\                    / \\                   / \\\n        # 0.5          /     5                  /   5                 /   5\n        #             /     /*\\                /   /*\\               /   /*\\\n        # 0.4        /     /   4              /   /   4             /   /   4\n        #           /     /   / \\            /   /   / \\           /   /   / \\\n        # 0.2      /     /   3   \\          3   /   /   \\         /   /   3   \\\n        #         /     1    *    2         *  1   /     2       /   1    *    2\n        # 0.0    0      *         *            *  0      *      0    *         *\n        #\n        #          (0.0, 0.2),                 (0.2, 0.8),         (0.8, 1.0)\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           0\n        1       1           0.1\n        2       1           0.1\n        3       1           0.2\n        4       0           0.4\n        5       1           0.5\n        6       0           0.7\n        7       0           1.0\n        8       0           0.8\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.2     4       2,3\n        0.2     0.8     4       0,2\n        0.8     1.0     4       2,3\n        0.0     1.0     5       1,4\n        0.8     1.0     6       0,5\n        0.2     0.8     8       3,5\n        0.0     0.2     7       0,5\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        true_trees = [\n            {0: 7, 1: 5, 2: 4, 3: 4, 4: 5, 5: 7, 6: -1, 7: -1},\n            {0: 4, 1: 5, 2: 4, 3: 8, 4: 5, 5: 8, 6: -1, 7: -1},\n            {0: 6, 1: 5, 2: 4, 3: 4, 4: 5, 5: 6, 6: -1, 7: -1},\n        ]\n        assert ts.sample_size == 4\n        assert ts.num_trees == 3\n        assert ts.num_nodes == 9\n        assert ts.node(0).time == 0.0\n        assert ts.node(1).time == 0.1\n        assert ts.node(2).time == 0.1\n        assert ts.node(3).time == 0.2\n        # check topologies agree:\n        tree_dicts = [t.parent_dict for t in ts.trees()]\n        for a, t in zip(true_trees, tree_dicts):\n            for k in a.keys():\n                if k in t.keys():\n                    assert t[k] == a[k]\n                else:\n                    assert a[k] == tskit.NULL\n        # check .simplify() works here\n        self.verify_simplify_topology(ts, [1, 2, 3])\n        self.check_num_samples(\n            ts,\n            [\n                (0, 5, 4),\n                (0, 2, 1),\n                (0, 7, 4),\n                (0, 4, 2),\n                (1, 4, 1),\n                (1, 5, 3),\n                (1, 8, 4),\n                (1, 0, 0),\n                (2, 5, 4),\n                (2, 1, 1),\n            ],\n        )\n        self.check_num_tracked_samples(\n            ts,\n            [1, 2, 5],\n            [\n                (0, 5, 3),\n                (0, 2, 1),\n                (0, 7, 3),\n                (0, 4, 1),\n                (1, 4, 1),\n                (1, 5, 3),\n                (1, 8, 3),\n                (1, 0, 0),\n                (2, 5, 3),\n                (2, 1, 1),\n            ],\n        )\n        self.check_sample_iterator(\n            ts,\n            [\n                (0, 0, []),\n                (0, 5, [5, 1, 2, 3]),\n                (0, 4, [2, 3]),\n                (1, 5, [5, 1, 2]),\n                (2, 4, [2, 3]),\n            ],\n        )\n        # pedantically check the Tree methods on the second tree\n        tst = ts.trees()\n        t = next(tst)\n        t = next(tst)\n        assert t.branch_length(1) == 0.4\n        assert not t.is_internal(0)\n        assert t.is_leaf(0)\n        assert not t.is_sample(0)\n        assert not t.is_internal(1)\n        assert t.is_leaf(1)\n        assert t.is_sample(1)\n        assert t.is_internal(5)\n        assert not t.is_leaf(5)\n        assert t.is_sample(5)\n        assert t.is_internal(4)\n        assert not t.is_leaf(4)\n        assert not t.is_sample(4)\n        assert t.root == 8\n        assert t.mrca(0, 1) == 5\n        assert t.sample_size == 4\n\n\nclass TestBadTrees:\n    \"\"\"\n    Tests for bad tree sequence topologies that can only be detected when we\n    try to create trees.\n    \"\"\"\n\n    def test_simplest_contradictory_children(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     1.0     2       0\n        0.0     1.0     3       0\n        \"\"\"\n        )\n        with pytest.raises(_tskit.LibraryError):\n            tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def test_partial_overlap_contradictory_children(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        3       0           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     1.0     2       0,1\n        0.5     1.0     3       0\n        \"\"\"\n        )\n        with pytest.raises(_tskit.LibraryError):\n            tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n\nclass TestCoiteration:\n    \"\"\"\n    Test ability to iterate over multiple (currently 2) tree sequences simultaneously\n    \"\"\"\n\n    def test_identical_ts(self):\n        ts = msprime.simulate(4, recombination_rate=1, random_seed=123)\n        assert ts.num_trees > 1\n        total_iterations = 0\n        for tree, (_, t1, t2) in zip(ts.trees(), ts.coiterate(ts)):\n            total_iterations += 1\n            assert tree == t1 == t2\n        assert ts.num_trees == total_iterations\n\n    def test_intervals(self):\n        ts1 = msprime.simulate(4, recombination_rate=1, random_seed=1)\n        assert ts1.num_trees > 1\n        one_tree_ts = msprime.simulate(5, random_seed=2)\n        multi_tree_ts = msprime.simulate(5, recombination_rate=1, random_seed=2)\n        assert multi_tree_ts.num_trees > 1\n        for ts2 in (one_tree_ts, multi_tree_ts):\n            bp1 = set(ts1.breakpoints())\n            bp2 = set(ts2.breakpoints())\n            assert bp1 != bp2\n            breaks = set()\n            for interval, t1, t2 in ts1.coiterate(ts2):\n                assert set(interval) <= set(t1.interval) | set(t2.interval)\n                breaks.add(interval.left)\n                breaks.add(interval.right)\n                assert t1.tree_sequence == ts1\n                assert t2.tree_sequence == ts2\n            assert breaks == bp1 | bp2\n\n    def test_simple_ts(self):\n        nodes = \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           1\n        4       0           2\n        \"\"\"\n        edges1 = \"\"\"\\\n        left    right   parent  child\n        0       0.2       3       0,1\n        0       0.2       4       2,3\n        0.2     1         3       2,1\n        0.2     1         4       0,3\n        \"\"\"\n        edges2 = \"\"\"\\\n        left    right   parent  child\n        0       0.8       3       2,1\n        0       0.8       4       0,3\n        0.8     1         3       0,1\n        0.8     1         4       2,3\n        \"\"\"\n        ts1 = tskit.load_text(io.StringIO(nodes), io.StringIO(edges1), strict=False)\n        ts2 = tskit.load_text(io.StringIO(nodes), io.StringIO(edges2), strict=False)\n        coiterator = ts1.coiterate(ts2)\n        interval, tree1, tree2 = next(coiterator)\n        assert interval.left == 0\n        assert interval.right == 0.2\n        assert tree1 == ts1.at_index(0)\n        assert tree2 == ts2.at_index(0)\n        interval, tree1, tree2 = next(coiterator)\n        assert interval.left == 0.2\n        assert interval.right == 0.8\n        assert tree1 == ts1.at_index(1)\n        assert tree2 == ts2.at_index(0)\n        interval, tree1, tree2 = next(coiterator)\n        assert interval.left == 0.8\n        assert interval.right == 1\n        assert tree1 == ts1.at_index(1)\n        assert tree2 == ts2.at_index(1)\n\n    def test_nonequal_lengths(self):\n        ts1 = msprime.simulate(4, random_seed=1, length=2)\n        ts2 = msprime.simulate(4, random_seed=1)\n        with pytest.raises(ValueError, match=\"equal sequence length\"):\n            next(ts1.coiterate(ts2))\n\n    def test_kwargs(self):\n        ts = msprime.simulate(4, recombination_rate=1, random_seed=123)\n        for _, t1, t2 in ts.coiterate(ts):\n            assert t1.num_tracked_samples() == t2.num_tracked_samples() == 0\n        for _, t1, t2 in ts.coiterate(ts, tracked_samples=ts.samples()):\n            assert t1.num_tracked_samples() == t2.num_tracked_samples() == 4\n\n\ndef do_simplify(\n    ts,\n    samples=None,\n    compare_lib=True,\n    filter_sites=True,\n    filter_populations=True,\n    filter_individuals=True,\n    filter_nodes=True,\n    keep_unary=False,\n    keep_input_roots=False,\n    update_sample_flags=True,\n):\n    \"\"\"\n    Runs the Python test implementation of simplify.\n    \"\"\"\n    if samples is None:\n        samples = ts.samples()\n    s = tests.Simplifier(\n        ts,\n        samples,\n        filter_sites=filter_sites,\n        filter_populations=filter_populations,\n        filter_individuals=filter_individuals,\n        filter_nodes=filter_nodes,\n        keep_unary=keep_unary,\n        keep_input_roots=keep_input_roots,\n        update_sample_flags=update_sample_flags,\n    )\n    new_ts, node_map = s.simplify()\n    if compare_lib:\n        sts, lib_node_map1 = ts.simplify(\n            samples,\n            filter_sites=filter_sites,\n            filter_individuals=filter_individuals,\n            filter_populations=filter_populations,\n            filter_nodes=filter_nodes,\n            update_sample_flags=update_sample_flags,\n            keep_unary=keep_unary,\n            keep_input_roots=keep_input_roots,\n            map_nodes=True,\n        )\n        lib_tables1 = sts.dump_tables()\n\n        py_tables = new_ts.dump_tables()\n        # Compare all tables except mutations\n        py_tables_no_mut = py_tables.copy()\n        lib_tables1_no_mut = lib_tables1.copy()\n        py_tables_no_mut.mutations.clear()\n        lib_tables1_no_mut.mutations.clear()\n        py_tables_no_mut.assert_equals(lib_tables1_no_mut, ignore_provenance=True)\n\n        # For mutations, check functional equivalence by comparing mutation properties\n        # but handling parent relationships that may differ due to reordering\n        def normalize_time(time):\n            return -42.0 if tskit.is_unknown_time(time) else time\n\n        def mutation_signature(m, mutations):\n            # Create a signature that identifies a mutation by its properties\n            # and its parent's properties (to handle parent ID remapping)\n            def make_hashable(metadata):\n                # Convert unhashable metadata (like dicts) to hashable form\n                if isinstance(metadata, dict):\n                    return tuple(sorted(metadata.items()))\n                elif isinstance(metadata, list):\n                    return tuple(metadata)\n                else:\n                    return metadata\n\n            parent_sig = None\n            if m.parent != -1 and m.parent < len(mutations):\n                parent = mutations[m.parent]\n                parent_sig = (\n                    parent.site,\n                    parent.node,\n                    parent.derived_state,\n                    make_hashable(parent.metadata),\n                    normalize_time(parent.time),\n                )\n            return (\n                m.site,\n                m.node,\n                m.derived_state,\n                make_hashable(m.metadata),\n                normalize_time(m.time),\n                parent_sig,\n            )\n\n        py_mut_sigs = {\n            mutation_signature(m, py_tables.mutations) for m in py_tables.mutations\n        }\n        lib_mut_sigs = {\n            mutation_signature(m, lib_tables1.mutations) for m in lib_tables1.mutations\n        }\n\n        assert py_mut_sigs == lib_mut_sigs\n        assert all(node_map == lib_node_map1)\n    return new_ts, node_map\n\n\nclass SimplifyTestBase:\n    \"\"\"\n    Base class for simplify tests.\n    \"\"\"\n\n\nclass TestSimplify(SimplifyTestBase):\n    \"\"\"\n    Tests that the implementations of simplify() do what they are supposed to.\n    \"\"\"\n\n    random_seed = 23\n    #\n    #          8\n    #         / \\\n    #        /   \\\n    #       /     \\\n    #      7       \\\n    #     / \\       6\n    #    /   5     / \\\n    #   /   / \\   /   \\\n    #  4   0   1 2     3\n    small_tree_ex_nodes = \"\"\"\\\n    id      is_sample   population      time\n    0       1       0               0.00000000000000\n    1       1       0               0.00000000000000\n    2       1       0               0.00000000000000\n    3       1       0               0.00000000000000\n    4       1       0               0.00000000000000\n    5       0       0               0.14567111023387\n    6       0       0               0.21385545626353\n    7       0       0               0.43508024345063\n    8       0       0               1.60156352971203\n    \"\"\"\n    small_tree_ex_edges = \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      5       0,1\n    1       0.00000000      1.00000000      6       2,3\n    2       0.00000000      1.00000000      7       4,5\n    3       0.00000000      1.00000000      8       6,7\n    \"\"\"\n\n    def verify_no_samples(self, ts, keep_unary=False):\n        \"\"\"\n        Zero out the flags column and verify that we get back the correct\n        tree sequence when we run simplify.\n        \"\"\"\n        t1 = ts.dump_tables()\n        t1.nodes.flags = np.zeros_like(t1.nodes.flags)\n        ts1, node_map1 = do_simplify(ts, samples=ts.samples(), keep_unary=keep_unary)\n        t1 = ts1.dump_tables()\n        ts2, node_map2 = do_simplify(ts, keep_unary=keep_unary)\n        t2 = ts2.dump_tables()\n        t1.assert_equals(t2)\n\n    def verify_single_childified(self, ts, keep_unary=False):\n        \"\"\"\n        Modify the specified tree sequence so that it has lots of unary\n        nodes. Run simplify and verify we get the same tree sequence back\n        if keep_unary is False. If keep_unary is True, the simplication\n        won't do anything to the original treeSequence.\n        \"\"\"\n        ts_single = tsutil.single_childify(ts)\n\n        tss, node_map = do_simplify(ts_single, keep_unary=keep_unary)\n        # All original nodes should still be present.\n        for u in range(ts.num_samples):\n            assert u == node_map[u]\n        # All introduced nodes should be mapped to null.\n        for u in range(ts.num_samples, ts_single.num_samples):\n            assert node_map[u] == tskit.NULL\n        t1 = ts.dump_tables()\n        t2 = tss.dump_tables()\n        t3 = ts_single.dump_tables()\n        if keep_unary:\n            assert set(t3.nodes.time) == set(t2.nodes.time)\n            assert len(t3.edges) == len(t2.edges)\n            assert t3.sites == t2.sites\n            assert len(t3.mutations) == len(t2.mutations)\n        else:\n            assert t1.nodes == t2.nodes\n            assert t1.edges == t2.edges\n            assert t1.sites == t2.sites\n            assert t1.mutations == t2.mutations\n\n    def verify_multiroot_internal_samples(self, ts, keep_unary=False):\n        ts_multiroot = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        ts1 = tsutil.jiggle_samples(ts_multiroot)\n        ts2, node_map = do_simplify(ts1, keep_unary=keep_unary)\n        assert ts1.num_trees >= ts2.num_trees\n        trees2 = ts2.trees()\n        t2 = next(trees2)\n        for t1 in ts1.trees():\n            assert t2.interval.left <= t1.interval.left\n            assert t2.interval.right >= t1.interval.right\n            pairs = itertools.combinations(ts1.samples(), 2)\n            for pair in pairs:\n                mapped_pair = [node_map[u] for u in pair]\n                mrca1 = t1.get_mrca(*pair)\n                mrca2 = t2.get_mrca(*mapped_pair)\n                if mrca1 == tskit.NULL:\n                    assert mrca2 == tskit.NULL\n                else:\n                    assert node_map[mrca1] == mrca2\n            if t2.interval.right == t1.interval.right:\n                t2 = next(trees2, None)\n\n    def test_single_tree(self):\n        ts = msprime.simulate(10, random_seed=self.random_seed)\n        self.verify_no_samples(ts)\n        self.verify_single_childified(ts)\n        self.verify_multiroot_internal_samples(ts)\n        # Now with keep_unary=True.\n        self.verify_no_samples(ts, keep_unary=True)\n        self.verify_single_childified(ts, keep_unary=True)\n        self.verify_multiroot_internal_samples(ts, keep_unary=True)\n\n    def test_single_tree_mutations(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=self.random_seed)\n        assert ts.num_sites > 1\n        do_simplify(ts)\n        self.verify_single_childified(ts)\n        # Also with keep_unary == True.\n        do_simplify(ts, keep_unary=True)\n        self.verify_single_childified(ts, keep_unary=True)\n\n    def test_many_trees_mutations(self):\n        ts = msprime.simulate(\n            10, recombination_rate=1, mutation_rate=10, random_seed=self.random_seed\n        )\n        assert ts.num_trees > 2\n        assert ts.num_sites > 2\n        self.verify_no_samples(ts)\n        do_simplify(ts)\n        self.verify_single_childified(ts)\n        # Also with keep_unary == True.\n        do_simplify(ts, keep_unary=True)\n        self.verify_single_childified(ts, keep_unary=True)\n\n    def test_many_trees(self):\n        ts = msprime.simulate(5, recombination_rate=4, random_seed=self.random_seed)\n        assert ts.num_trees > 2\n        self.verify_no_samples(ts)\n        self.verify_single_childified(ts)\n        self.verify_multiroot_internal_samples(ts)\n        # Also with keep_unary == True.\n        self.verify_no_samples(ts, keep_unary=True)\n        self.verify_single_childified(ts, keep_unary=True)\n        self.verify_multiroot_internal_samples(ts, keep_unary=True)\n\n    def test_small_tree_internal_samples(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        nodes = tables.nodes\n        flags = nodes.flags\n        # The parent of samples 0 and 1 is 5. Change this to an internal sample\n        # and set 0 and 1 to be unsampled.\n        flags[0] = 0\n        flags[0] = 0\n        flags[5] = tskit.NODE_IS_SAMPLE\n        nodes.flags = flags\n        ts = tables.tree_sequence()\n        assert ts.sample_size == 5\n        tss, node_map = do_simplify(ts, [3, 5])\n        assert node_map[3] == 0\n        assert node_map[5] == 1\n        assert tss.num_nodes == 3\n        assert tss.num_edges == 2\n        self.verify_no_samples(ts)\n        # with keep_unary == True\n        tss, node_map = do_simplify(ts, [3, 5], keep_unary=True)\n        assert node_map[3] == 0\n        assert node_map[5] == 1\n        assert tss.num_nodes == 5\n        assert tss.num_edges == 4\n        self.verify_no_samples(ts, keep_unary=True)\n\n    def test_small_tree_linear_samples(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        nodes = tables.nodes\n        flags = nodes.flags\n        # 7 is above 0. These are the only two samples\n        flags[:] = 0\n        flags[0] = tskit.NODE_IS_SAMPLE\n        flags[7] = tskit.NODE_IS_SAMPLE\n        nodes.flags = flags\n        ts = tables.tree_sequence()\n        assert ts.sample_size == 2\n        tss, node_map = do_simplify(ts, [0, 7])\n        assert node_map[0] == 0\n        assert node_map[7] == 1\n        assert tss.num_nodes == 2\n        assert tss.num_edges == 1\n        t = next(tss.trees())\n        assert t.parent_dict == {0: 1}\n        # with keep_unary == True\n        tss, node_map = do_simplify(ts, [0, 7], keep_unary=True)\n        assert node_map[0] == 0\n        assert node_map[7] == 1\n        assert tss.num_nodes == 4\n        assert tss.num_edges == 3\n        t = next(tss.trees())\n\n    def test_small_tree_internal_and_external_samples(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        nodes = tables.nodes\n        flags = nodes.flags\n        # 7 is above 0 and 1.\n        flags[:] = 0\n        flags[0] = tskit.NODE_IS_SAMPLE\n        flags[1] = tskit.NODE_IS_SAMPLE\n        flags[7] = tskit.NODE_IS_SAMPLE\n        nodes.flags = flags\n        ts = tables.tree_sequence()\n        assert ts.sample_size == 3\n        tss, node_map = do_simplify(ts, [0, 1, 7])\n        assert node_map[0] == 0\n        assert node_map[1] == 1\n        assert node_map[7] == 2\n        assert tss.num_nodes == 4\n        assert tss.num_edges == 3\n        t = next(tss.trees())\n        assert t.parent_dict == {0: 3, 1: 3, 3: 2}\n        # with keep_unary == True\n        tss, node_map = do_simplify(ts, [0, 1, 7], keep_unary=True)\n        assert node_map[0] == 0\n        assert node_map[1] == 1\n        assert node_map[7] == 2\n        assert tss.num_nodes == 5\n        assert tss.num_edges == 4\n        t = next(tss.trees())\n        assert t.parent_dict == {0: 3, 1: 3, 3: 2, 2: 4}\n\n    def test_small_tree_mutations(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        # Add some simple mutations here above the nodes we're keeping.\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        tables.sites.add_row(position=0.75, ancestral_state=\"0\")\n        tables.sites.add_row(position=0.8, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"1\")\n        tables.mutations.add_row(site=1, node=2, derived_state=\"1\")\n        tables.mutations.add_row(site=2, node=7, derived_state=\"1\")\n        tables.mutations.add_row(site=3, node=0, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        assert ts.num_sites == 4\n        assert ts.num_mutations == 4\n        for keep in [True, False]:\n            tss = do_simplify(ts, [0, 2], keep_unary=keep)[0]\n            assert tss.sample_size == 2\n            assert tss.num_mutations == 4\n            assert list(tss.haplotypes()) == [\"1011\", \"0100\"]\n\n    def test_small_tree_filter_zero_mutations(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        ts = tsutil.insert_branch_sites(ts)\n        assert ts.num_sites == 8\n        assert ts.num_mutations == 8\n        for keep in [True, False]:\n            tss, _ = do_simplify(ts, [4, 0, 1], filter_sites=True, keep_unary=keep)\n            assert tss.num_sites == 5\n            assert tss.num_mutations == 5\n            tss, _ = do_simplify(ts, [4, 0, 1], filter_sites=False, keep_unary=keep)\n            assert tss.num_sites == 8\n            assert tss.num_mutations == 5\n\n    def test_small_tree_fixed_sites(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        # Add some simple mutations that will be fixed after simplify\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        tables.sites.add_row(position=0.75, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=2, derived_state=\"1\")\n        tables.mutations.add_row(site=1, node=3, derived_state=\"1\")\n        tables.mutations.add_row(site=2, node=6, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        assert ts.num_sites == 3\n        assert ts.num_mutations == 3\n        for keep in [True, False]:\n            tss, _ = do_simplify(ts, [4, 1], keep_unary=keep)\n            assert tss.sample_size == 2\n            assert tss.num_mutations == 0\n            assert list(tss.haplotypes()) == [\"\", \"\"]\n\n    def test_small_tree_mutations_over_root(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=8, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 1\n        for keep_unary, filter_sites in itertools.product([True, False], repeat=2):\n            tss, _ = do_simplify(\n                ts, [0, 1], filter_sites=filter_sites, keep_unary=keep_unary\n            )\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 1\n\n    def test_small_tree_recurrent_mutations(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        # Add recurrent mutation on the root branches\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=6, derived_state=\"1\")\n        tables.mutations.add_row(site=0, node=7, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 2\n        for keep in [True, False]:\n            tss = do_simplify(ts, [4, 3], keep_unary=keep)[0]\n            assert tss.sample_size == 2\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 2\n            assert list(tss.haplotypes()) == [\"1\", \"1\"]\n\n    def test_small_tree_back_mutations(self):\n        ts = tskit.load_text(\n            nodes=io.StringIO(self.small_tree_ex_nodes),\n            edges=io.StringIO(self.small_tree_ex_edges),\n            strict=False,\n        )\n        tables = ts.dump_tables()\n        # Add a chain of mutations\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=7, derived_state=\"1\")\n        tables.mutations.add_row(site=0, node=5, derived_state=\"0\")\n        tables.mutations.add_row(site=0, node=1, derived_state=\"1\")\n        tables.compute_mutation_parents()\n        ts = tables.tree_sequence()\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 3\n        assert list(ts.haplotypes()) == [\"0\", \"1\", \"0\", \"0\", \"1\"]\n        # First check if we simplify for all samples and keep original state.\n        for keep in [True, False]:\n            tss = do_simplify(ts, [0, 1, 2, 3, 4], keep_unary=keep)[0]\n            assert tss.sample_size == 5\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 3\n            assert list(tss.haplotypes()) == [\"0\", \"1\", \"0\", \"0\", \"1\"]\n\n        # The ancestral state above 5 should be 0.\n        for keep in [True, False]:\n            tss = do_simplify(ts, [0, 1], keep_unary=keep)[0]\n            assert tss.sample_size == 2\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 3\n            assert list(tss.haplotypes()) == [\"0\", \"1\"]\n\n        # The ancestral state above 7 should be 1.\n        for keep in [True, False]:\n            tss = do_simplify(ts, [4, 0, 1], keep_unary=keep)[0]\n            assert tss.sample_size == 3\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 3\n            assert list(tss.haplotypes()) == [\"1\", \"0\", \"1\"]\n\n    def test_overlapping_unary_edges(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       2       2       0\n        1       3       2       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        assert ts.sample_size == 2\n        assert ts.num_trees == 3\n        assert ts.sequence_length == 3\n        for keep in [True, False]:\n            tss, node_map = do_simplify(ts, samples=[0, 1, 2], keep_unary=keep)\n            assert list(node_map) == [0, 1, 2]\n            trees = [{0: 2}, {0: 2, 1: 2}, {1: 2}]\n            for t in tss.trees():\n                assert t.parent_dict == trees[t.index]\n\n    def test_overlapping_unary_edges_internal_samples(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       2       2       0\n        1       3       2       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, strict=False)\n        assert ts.sample_size == 3\n        assert ts.num_trees == 3\n        trees = [{0: 2}, {0: 2, 1: 2}, {1: 2}]\n        for t in ts.trees():\n            assert t.parent_dict == trees[t.index]\n        tss, node_map = do_simplify(ts)\n        assert list(node_map) == [0, 1, 2]\n\n    def test_isolated_samples(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           1\n        2       1           2\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes, edges, sequence_length=1, strict=False)\n        assert ts.num_samples == 3\n        assert ts.num_trees == 1\n        assert ts.num_nodes == 3\n        for keep in [True, False]:\n            tss, node_map = do_simplify(ts, keep_unary=keep)\n            assert ts.tables.nodes == tss.tables.nodes\n            assert ts.tables.edges == tss.tables.edges\n            assert list(node_map) == [0, 1, 2]\n\n    def test_internal_samples(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1       -1              1.00000000000000\n        1       0       -1              1.00000000000000\n        2       1       -1              1.00000000000000\n        3       0       -1              1.31203521181726\n        4       0       -1              2.26776380586006\n        5       1       -1              0.00000000000000\n        6       0       -1              0.50000000000000\n        7       0       -1              1.50000000000000\n\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.62185118      1.00000000      1       6\n        1       0.00000000      0.62185118      2       6\n        2       0.00000000      1.00000000      3       0,2\n        3       0.00000000      1.00000000      4       7,3\n        4       0.00000000      1.00000000      6       5\n        5       0.00000000      1.00000000      7       1\n        \"\"\"\n        )\n\n        ts = tskit.load_text(nodes, edges, strict=False)\n        tss, node_map = do_simplify(ts, [5, 2, 0])\n        assert node_map[0] == 2\n        assert node_map[1] == -1\n        assert node_map[2] == 1\n        assert node_map[3] == 3\n        assert node_map[4] == 4\n        assert node_map[5] == 0\n        assert node_map[6] == -1\n        assert node_map[7] == -1\n        assert tss.sample_size == 3\n        assert tss.num_trees == 2\n        trees = [{0: 1, 1: 3, 2: 3}, {0: 4, 1: 3, 2: 3, 3: 4}]\n        for t in tss.trees():\n            assert t.parent_dict == trees[t.index]\n        # with keep_unary == True\n        tss, node_map = do_simplify(ts, [5, 2, 0], keep_unary=True)\n        assert node_map[0] == 2\n        assert node_map[1] == 4\n        assert node_map[2] == 1\n        assert node_map[3] == 5\n        assert node_map[4] == 7\n        assert node_map[5] == 0\n        assert node_map[6] == 3\n        assert node_map[7] == 6\n        assert tss.sample_size == 3\n        assert tss.num_trees == 2\n        trees = [\n            {0: 3, 1: 5, 2: 5, 3: 1, 5: 7},\n            {0: 3, 1: 5, 2: 5, 3: 4, 4: 6, 5: 7, 6: 7},\n        ]\n        for t in tss.trees():\n            assert t.parent_dict == trees[t.index]\n\n    def test_many_mutations_over_single_sample_ancestral_state(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0           0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       0       1               -1\n        0       0       0               0\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes, edges, sites=sites, mutations=mutations, strict=False\n        )\n        assert ts.sample_size == 1\n        assert ts.num_trees == 1\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 2\n        for keep in [True, False]:\n            tss, node_map = do_simplify(ts, keep_unary=keep)\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 2\n            assert list(tss.haplotypes(isolated_as_missing=False)) == [\"0\"]\n\n    def test_many_mutations_over_single_sample_derived_state(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       0           1\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0       1       1       0\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0           0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       0       1               -1\n        0       0       0               0\n        0       0       1               1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes, edges, sites=sites, mutations=mutations, strict=False\n        )\n        assert ts.sample_size == 1\n        assert ts.num_trees == 1\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 3\n        for keep in [True, False]:\n            tss, node_map = do_simplify(ts, keep_unary=keep)\n            assert tss.num_sites == 1\n            assert tss.num_mutations == 3\n            assert list(tss.haplotypes(isolated_as_missing=False)) == [\"1\"]\n\n    def test_many_trees_filter_zero_mutations(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = tsutil.insert_branch_sites(ts)\n        assert ts.num_sites == ts.num_mutations\n        assert ts.num_sites > ts.num_trees\n        for keep in [True, False]:\n            for filter_sites in [True, False]:\n                tss, _ = do_simplify(\n                    ts, samples=None, filter_sites=filter_sites, keep_unary=keep\n                )\n                assert ts.num_sites == tss.num_sites\n                assert ts.num_mutations == tss.num_mutations\n\n    def test_many_trees_filter_zero_multichar_mutations(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = tsutil.insert_multichar_mutations(ts)\n        assert ts.num_sites == ts.num_trees\n        assert ts.num_mutations == ts.num_trees\n        for keep in [True, False]:\n            for filter_sites in [True, False]:\n                tss, _ = do_simplify(\n                    ts, samples=None, filter_sites=filter_sites, keep_unary=keep\n                )\n                assert ts.num_sites == tss.num_sites\n                assert ts.num_mutations == tss.num_mutations\n\n    def test_simple_population_filter(self):\n        ts = msprime.simulate(10, random_seed=2)\n        tables = ts.dump_tables()\n        tables.populations.add_row(metadata=b\"unreferenced\")\n        assert len(tables.populations) == 2\n        for keep in [True, False]:\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_populations=True, keep_unary=keep\n            )\n            assert tss.num_populations == 1\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_populations=False, keep_unary=keep\n            )\n            assert tss.num_populations == 2\n\n    def test_interleaved_populations_filter(self):\n        ts = msprime.simulate(\n            population_configurations=[\n                msprime.PopulationConfiguration(),\n                msprime.PopulationConfiguration(10),\n                msprime.PopulationConfiguration(),\n                msprime.PopulationConfiguration(),\n            ],\n            random_seed=2,\n        )\n        assert ts.num_populations == 4\n        tables = ts.dump_tables()\n        # Edit the populations so we can identify the rows.\n        tables.populations.clear()\n        for j in range(4):\n            tables.populations.add_row(metadata=bytes([j]))\n        ts = tables.tree_sequence()\n        id_map = np.array([-1, 0, -1, -1], dtype=np.int32)\n        for keep in [True, False]:\n            tss, _ = do_simplify(ts, filter_populations=True, keep_unary=keep)\n            assert tss.num_populations == 1\n            population = tss.population(0)\n            assert population.metadata == bytes([1])\n            assert np.array_equal(\n                id_map[ts.tables.nodes.population], tss.tables.nodes.population\n            )\n            tss, _ = do_simplify(ts, filter_populations=False, keep_unary=keep)\n            assert tss.num_populations == 4\n\n    def test_removed_node_population_filter(self):\n        tables = tskit.TableCollection(1)\n        tables.populations.add_row(metadata=bytes(0))\n        tables.populations.add_row(metadata=bytes(1))\n        tables.populations.add_row(metadata=bytes(2))\n        tables.nodes.add_row(flags=1, population=0)\n        # Because flags=0 here, this node will be simplified out and the node\n        # will disappear.\n        tables.nodes.add_row(flags=0, population=1)\n        tables.nodes.add_row(flags=1, population=2)\n        for keep in [True, False]:\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_populations=True, keep_unary=keep\n            )\n            assert tss.num_nodes == 2\n            assert tss.num_populations == 2\n            assert tss.population(0).metadata == bytes(0)\n            assert tss.population(1).metadata == bytes(2)\n            assert tss.node(0).population == 0\n            assert tss.node(1).population == 1\n\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_populations=False, keep_unary=keep\n            )\n            assert tss.tables.populations == tables.populations\n\n    def test_simple_individual_filter(self):\n        tables = tskit.TableCollection(1)\n        tables.individuals.add_row(flags=0)\n        tables.individuals.add_row(flags=1)\n        tables.nodes.add_row(flags=1, individual=0)\n        tables.nodes.add_row(flags=1, individual=0)\n        for keep in [True, False]:\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_individuals=True, keep_unary=keep\n            )\n            assert tss.num_nodes == 2\n            assert tss.num_individuals == 1\n            assert tss.individual(0).flags == 0\n\n        tss, _ = do_simplify(tables.tree_sequence(), filter_individuals=False)\n        assert tss.tables.individuals == tables.individuals\n\n    def test_interleaved_individual_filter(self):\n        tables = tskit.TableCollection(1)\n        tables.individuals.add_row(flags=0)\n        tables.individuals.add_row(flags=1)\n        tables.individuals.add_row(flags=2)\n        tables.nodes.add_row(flags=1, individual=1)\n        tables.nodes.add_row(flags=1, individual=-1)\n        tables.nodes.add_row(flags=1, individual=1)\n        for keep in [True, False]:\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_individuals=True, keep_unary=keep\n            )\n            assert tss.num_nodes == 3\n            assert tss.num_individuals == 1\n            assert tss.individual(0).flags == 1\n\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_individuals=False, keep_unary=keep\n            )\n            assert tss.tables.individuals == tables.individuals\n\n    def test_removed_node_individual_filter(self):\n        tables = tskit.TableCollection(1)\n        tables.individuals.add_row(flags=0)\n        tables.individuals.add_row(flags=1)\n        tables.individuals.add_row(flags=2)\n        tables.nodes.add_row(flags=1, individual=0)\n        # Because flags=0 here, this node will be simplified out and the node\n        # will disappear.\n        tables.nodes.add_row(flags=0, individual=1)\n        tables.nodes.add_row(flags=1, individual=2)\n        for keep in [True, False]:\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_individuals=True, keep_unary=keep\n            )\n            assert tss.num_nodes == 2\n            assert tss.num_individuals == 2\n            assert tss.individual(0).flags == 0\n            assert tss.individual(1).flags == 2\n            assert tss.node(0).individual == 0\n            assert tss.node(1).individual == 1\n\n            tss, _ = do_simplify(\n                tables.tree_sequence(), filter_individuals=False, keep_unary=keep\n            )\n            assert tss.tables.individuals == tables.individuals\n\n    def verify_simplify_haplotypes(self, ts, samples, keep_unary=False):\n        sub_ts, node_map = do_simplify(\n            ts, samples, filter_sites=False, keep_unary=keep_unary\n        )\n        assert ts.num_sites == sub_ts.num_sites\n        sub_haplotypes = list(sub_ts.haplotypes(isolated_as_missing=False))\n        all_samples = list(ts.samples())\n        k = 0\n        for j, h in enumerate(ts.haplotypes(isolated_as_missing=False)):\n            if k == len(samples):\n                break\n            if samples[k] == all_samples[j]:\n                assert h == sub_haplotypes[k]\n                k += 1\n\n    def test_single_tree_recurrent_mutations(self):\n        ts = msprime.simulate(6, random_seed=10)\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    for keep in [True, False]:\n                        self.verify_simplify_haplotypes(ts, samples, keep_unary=keep)\n\n    def test_many_trees_recurrent_mutations(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    for keep in [True, False]:\n                        self.verify_simplify_haplotypes(ts, samples, keep_unary=keep)\n\n    def test_single_multiroot_tree_recurrent_mutations(self):\n        ts = msprime.simulate(6, random_seed=10)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    for keep in [True, False]:\n                        self.verify_simplify_haplotypes(ts, samples, keep_unary=keep)\n\n    @pytest.mark.slow\n    def test_many_multiroot_trees_recurrent_mutations(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    for keep in [True, False]:\n                        self.verify_simplify_haplotypes(ts, samples, keep_unary=keep)\n\n    def test_single_tree_recurrent_mutations_internal_samples(self):\n        ts = msprime.simulate(6, random_seed=10)\n        ts = tsutil.jiggle_samples(ts)\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    for keep in [True, False]:\n                        self.verify_simplify_haplotypes(ts, samples, keep_unary=keep)\n\n    def test_many_trees_recurrent_mutations_internal_samples(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=10)\n        ts = tsutil.jiggle_samples(ts)\n        assert ts.num_trees > 3\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    for keep in [True, False]:\n                        self.verify_simplify_haplotypes(ts, samples, keep_unary=keep)\n\n\nclass TestSimplifyUnreferencedPopulations:\n    def example(self):\n        tables = tskit.TableCollection(1)\n        tables.populations.add_row()\n        tables.populations.add_row()\n        # No references to population 0\n        tables.nodes.add_row(time=0, population=1, flags=1)\n        tables.nodes.add_row(time=0, population=1, flags=1)\n        tables.nodes.add_row(time=1, population=1, flags=0)\n        # Unreference node\n        tables.nodes.add_row(time=1, population=1, flags=0)\n        tables.edges.add_row(0, 1, parent=2, child=0)\n        tables.edges.add_row(0, 1, parent=2, child=1)\n        tables.sort()\n        return tables\n\n    def test_no_filter_populations(self):\n        tables = self.example()\n        tables.simplify(filter_populations=False)\n        assert len(tables.populations) == 2\n        assert len(tables.nodes) == 3\n        assert np.all(tables.nodes.population == 1)\n\n    def test_no_filter_populations_nodes(self):\n        tables = self.example()\n        tables.simplify(filter_populations=False, filter_nodes=False)\n        assert len(tables.populations) == 2\n        assert len(tables.nodes) == 4\n        assert np.all(tables.nodes.population == 1)\n\n    def test_filter_populations_no_filter_nodes(self):\n        tables = self.example()\n        tables.simplify(filter_populations=True, filter_nodes=False)\n        assert len(tables.populations) == 1\n        assert len(tables.nodes) == 4\n        assert np.all(tables.nodes.population == 0)\n\n    def test_remapped_default(self):\n        tables = self.example()\n        tables.simplify()\n        assert len(tables.populations) == 1\n        assert len(tables.nodes) == 3\n        assert np.all(tables.nodes.population == 0)\n\n\nclass TestSimplifyUnreferencedIndividuals:\n    def example(self):\n        tables = tskit.TableCollection(1)\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        # No references to individual 0\n        tables.nodes.add_row(time=0, individual=1, flags=1)\n        tables.nodes.add_row(time=0, individual=1, flags=1)\n        tables.nodes.add_row(time=1, individual=1, flags=0)\n        # Unreference node\n        tables.nodes.add_row(time=1, individual=1, flags=0)\n        tables.edges.add_row(0, 1, parent=2, child=0)\n        tables.edges.add_row(0, 1, parent=2, child=1)\n        tables.sort()\n        return tables\n\n    def test_no_filter_individuals(self):\n        tables = self.example()\n        tables.simplify(filter_individuals=False)\n        assert len(tables.individuals) == 2\n        assert len(tables.nodes) == 3\n        assert np.all(tables.nodes.individual == 1)\n\n    def test_no_filter_individuals_nodes(self):\n        tables = self.example()\n        tables.simplify(filter_individuals=False, filter_nodes=False)\n        assert len(tables.individuals) == 2\n        assert len(tables.nodes) == 4\n        assert np.all(tables.nodes.individual == 1)\n\n    def test_filter_individuals_no_filter_nodes(self):\n        tables = self.example()\n        tables.simplify(filter_individuals=True, filter_nodes=False)\n        assert len(tables.individuals) == 1\n        assert len(tables.nodes) == 4\n        assert np.all(tables.nodes.individual == 0)\n\n    def test_remapped_default(self):\n        tables = self.example()\n        tables.simplify()\n        assert len(tables.individuals) == 1\n        assert len(tables.nodes) == 3\n        assert np.all(tables.nodes.individual == 0)\n\n\nclass TestSimplifyKeepInputRoots(SimplifyTestBase, ExampleTopologyMixin):\n    \"\"\"\n    Tests for the keep_input_roots option to simplify.\n    \"\"\"\n\n    def verify(self, ts):\n        # Called by the examples in ExampleTopologyMixin\n        samples = ts.samples()\n        self.verify_keep_input_roots(ts, samples[:2])\n        self.verify_keep_input_roots(ts, samples[:3])\n        self.verify_keep_input_roots(ts, samples[:-1])\n        self.verify_keep_input_roots(ts, samples)\n\n    def verify_keep_input_roots(self, ts, samples):\n        ts = tsutil.insert_unique_metadata(ts, [\"individuals\"])\n        ts_with_roots, node_map = do_simplify(\n            ts, samples, keep_input_roots=True, filter_sites=False, compare_lib=True\n        )\n        new_to_input_map = {\n            value: key for key, value in enumerate(node_map) if value != tskit.NULL\n        }\n        for (left, right), input_tree, tree_with_roots in ts.coiterate(ts_with_roots):\n            input_roots = input_tree.roots\n            assert len(tree_with_roots.roots) > 0\n            for root in tree_with_roots.roots:\n                # Check that the roots in the current\n                input_root = new_to_input_map[root]\n                assert input_root in input_roots\n                input_node = ts.node(input_root)\n                new_node = ts_with_roots.node(root)\n                assert new_node.time == input_node.time\n                assert new_node.population == input_node.population\n                if new_node.individual == tskit.NULL:\n                    assert new_node.individual == input_node.individual\n                else:\n                    assert (\n                        ts_with_roots.individual(new_node.individual).metadata\n                        == ts.individual(input_node.individual).metadata\n                    )\n                assert new_node.metadata == input_node.metadata\n                # This should only be marked as a sample if it's an\n                # element of the samples list.\n                assert new_node.is_sample() == (input_root in samples)\n                # Find the MRCA of the samples below this root.\n                root_samples = list(tree_with_roots.samples(root))\n                mrca = functools.reduce(tree_with_roots.mrca, root_samples)\n                if mrca != root:\n                    # If the MRCA is not equal to the root, then there should\n                    # be a unary branch joining them.\n                    assert tree_with_roots.parent(mrca) == root\n                    assert tree_with_roots.children(root) == (mrca,)\n\n                    # Any mutations that were on the path from the old MRCA\n                    # to the root should be mapped to this node, and any mutations\n                    # above the root should still be there.\n                    u = new_to_input_map[mrca]\n                    root_path = []\n                    while u != tskit.NULL:\n                        root_path.append(u)\n                        u = input_tree.parent(u)\n                    input_sites = {\n                        site.position: site\n                        for site in input_tree.sites()\n                        if site.position >= left and site.position < right\n                    }\n                    new_sites = {\n                        site.position: site\n                        for site in tree_with_roots.sites()\n                        if site.position >= left and site.position < right\n                    }\n                    assert set(input_sites.keys()) == set(new_sites.keys())\n                    positions = input_sites.keys()\n                    for position in positions:\n                        assert left <= position < right\n                        new_site = new_sites[position]\n                        # We assume the metadata contains a unique key for each mutation.\n                        new_mutations = {mut.metadata: mut for mut in new_site.mutations}\n                        # Just make sure the metadata is actually unique.\n                        assert len(new_mutations) == len(new_site.mutations)\n                        input_site = input_sites[position]\n                        for input_mutation in input_site.mutations:\n                            if input_mutation.node in root_path:\n                                new_node = (\n                                    mrca if input_mutation.node != input_root else root\n                                )\n                                # The same mutation should exist and be mapped to\n                                # new_node\n                                new_mutation = new_mutations[input_mutation.metadata]\n                                # We have turned filter sites off, so sites should\n                                # be comparable\n                                assert new_mutation.site == input_mutation.site\n                                assert (\n                                    new_mutation.derived_state\n                                    == input_mutation.derived_state\n                                )\n                                assert new_mutation.node == new_node\n\n        return ts_with_roots\n\n    def test_many_trees(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        for num_samples in range(1, ts.num_samples):\n            for samples in itertools.combinations(ts.samples(), num_samples):\n                self.verify_keep_input_roots(ts, samples)\n\n    def test_many_trees_internal_samples(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=10)\n        ts = tsutil.jiggle_samples(ts)\n        assert ts.num_trees > 3\n        for num_samples in range(1, ts.num_samples):\n            for samples in itertools.combinations(ts.samples(), num_samples):\n                self.verify_keep_input_roots(ts, samples)\n\n    def test_many_multiroot_trees(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for num_samples in range(1, ts.num_samples):\n            for samples in itertools.combinations(ts.samples(), num_samples):\n                self.verify_keep_input_roots(ts, samples)\n\n    def test_wright_fisher_unsimplified(self):\n        num_generations = 10\n        tables = wf.wf_sim(10, num_generations, deep_history=False, seed=2)\n        tables.sort()\n        ts = tables.tree_sequence()\n        simplified = self.verify_keep_input_roots(ts, ts.samples())\n        roots = set()\n        for tree in simplified.trees():\n            for root in tree.roots:\n                roots.add(root)\n                assert tree.time(root) == num_generations\n        init_nodes = np.where(simplified.tables.nodes.time == num_generations)[0]\n        assert set(init_nodes) == roots\n\n    def test_single_tree_recurrent_mutations(self):\n        ts = msprime.simulate(6, random_seed=10)\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    self.verify_keep_input_roots(ts, samples)\n\n    def test_many_trees_recurrent_mutations(self):\n        ts = msprime.simulate(5, recombination_rate=1, random_seed=8)\n        assert ts.num_trees > 2\n        for mutations_per_branch in [1, 2, 3]:\n            ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n            for num_samples in range(1, ts.num_samples):\n                for samples in itertools.combinations(ts.samples(), num_samples):\n                    self.verify_keep_input_roots(ts, samples)\n\n\nclass TestSimplifyFilterNodes:\n    \"\"\"\n    Tests simplify when nodes are kept in the ts with filter_nodes=False\n    \"\"\"\n\n    def reverse_node_indexes(self, ts):\n        tables = ts.dump_tables()\n        nodes = tables.nodes\n        edges = tables.edges\n        mutations = tables.mutations\n        nodes.replace_with(nodes[::-1])\n        edges.parent = ts.num_nodes - edges.parent - 1\n        edges.child = ts.num_nodes - edges.child - 1\n        mutations.node = ts.num_nodes - mutations.node - 1\n        tables.sort()\n        return tables.tree_sequence()\n\n    def verify_nodes_unchanged(self, ts_in, resample_size=None, **kwargs):\n        if resample_size is None:\n            samples = None\n        else:\n            np.random.seed(42)\n            samples = np.sort(\n                np.random.choice(ts_in.num_nodes, resample_size, replace=False)\n            )\n\n        for ts in (ts_in, self.reverse_node_indexes(ts_in)):\n            filtered, n_map = do_simplify(\n                ts, samples=samples, filter_nodes=False, compare_lib=True, **kwargs\n            )\n            assert np.array_equal(n_map, np.arange(ts.num_nodes, dtype=n_map.dtype))\n            referenced_nodes = set(filtered.samples())\n            referenced_nodes.update(filtered.edges_parent)\n            referenced_nodes.update(filtered.edges_child)\n            for n1, n2 in zip(ts.nodes(), filtered.nodes()):\n                # Ignore the tskit.NODE_IS_SAMPLE flag which can be changed by simplify\n                n1 = n1.replace(flags=n1.flags | tskit.NODE_IS_SAMPLE)\n                n2 = n2.replace(flags=n2.flags | tskit.NODE_IS_SAMPLE)\n                assert n1 == n2\n\n            # Check that edges are identical to the normal simplify(),\n            # with the normal \"simplify\" having altered IDs\n            simplified, node_map = ts.simplify(samples=samples, map_nodes=True, **kwargs)\n            simplified_edges = {e for e in simplified.tables.edges}\n            filtered_edges = {\n                e.replace(parent=node_map[e.parent], child=node_map[e.child])\n                for e in filtered.tables.edges\n            }\n            assert filtered_edges == simplified_edges\n\n    def test_empty(self):\n        ts = tskit.TableCollection(1).tree_sequence()\n        self.verify_nodes_unchanged(ts)\n\n    def test_all_samples(self):\n        ts = tskit.Tree.generate_comb(5).tree_sequence\n        tables = ts.dump_tables()\n        flags = tables.nodes.flags\n        flags |= tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        ts = tables.tree_sequence()\n        assert ts.num_samples == ts.num_nodes\n        self.verify_nodes_unchanged(ts)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 4])\n    def test_no_topology(self, resample_size):\n        ts = tskit.Tree.generate_comb(5).tree_sequence\n        ts = ts.keep_intervals([], simplify=False)\n        assert ts.num_nodes > 5  # has unreferenced nodes\n        self.verify_nodes_unchanged(ts, resample_size=resample_size)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 2])\n    def test_stick_tree(self, resample_size):\n        ts = tskit.Tree.generate_comb(2).tree_sequence\n        ts = ts.simplify([0], keep_unary=True)\n        assert ts.first().parent(0) != tskit.NULL\n        self.verify_nodes_unchanged(ts, resample_size=resample_size)\n\n        # switch to an internal sample\n        tables = ts.dump_tables()\n        flags = tables.nodes.flags\n        flags[0] = 0\n        flags[1] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        self.verify_nodes_unchanged(tables.tree_sequence(), resample_size=resample_size)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 4])\n    def test_internal_samples(self, resample_size):\n        ts = tskit.Tree.generate_comb(4).tree_sequence\n        tables = ts.dump_tables()\n        flags = tables.nodes.flags\n        flags ^= tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        ts = tables.tree_sequence()\n        assert np.all(ts.samples() >= ts.num_samples)\n        self.verify_nodes_unchanged(ts, resample_size=resample_size)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 4])\n    def test_blank_flanks(self, resample_size):\n        ts = tskit.Tree.generate_comb(4).tree_sequence\n        ts = ts.keep_intervals([[0.25, 0.75]], simplify=False)\n        self.verify_nodes_unchanged(ts, resample_size=resample_size)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 4])\n    def test_multiroot(self, resample_size):\n        ts = tskit.Tree.generate_balanced(6).tree_sequence\n        ts = ts.decapitate(2.5)\n        self.verify_nodes_unchanged(ts, resample_size=resample_size)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 10])\n    def test_with_metadata(self, ts_fixture_for_simplify, resample_size):\n        assert ts_fixture_for_simplify.num_nodes > 10\n        self.verify_nodes_unchanged(ts_fixture_for_simplify, resample_size=resample_size)\n\n    @pytest.mark.parametrize(\"resample_size\", [None, 7])\n    def test_complex_ts_with_unary(self, resample_size):\n        ts = msprime.sim_ancestry(\n            3,\n            sequence_length=10,\n            recombination_rate=1,\n            record_full_arg=True,\n            random_seed=123,\n        )\n        assert ts.num_trees > 2\n        ts = msprime.sim_mutations(ts, rate=1, random_seed=123)\n        # Add some unreferenced nodes\n        tables = ts.dump_tables()\n        tables.nodes.add_row(flags=0)\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE)\n        ts = tables.tree_sequence()\n        self.verify_nodes_unchanged(ts, resample_size=resample_size)\n\n    def test_keeping_unary(self):\n        # Test interaction with keeping unary nodes\n        n_samples = 6\n        ts = tskit.Tree.generate_comb(n_samples).tree_sequence\n        num_nodes = ts.num_nodes\n        reduced_n_samples = [2, n_samples - 1]  # last sample is most deeply nested\n        ts_with_unary = ts.simplify(reduced_n_samples, keep_unary=True)\n        assert ts_with_unary.num_nodes == num_nodes - n_samples + len(reduced_n_samples)\n        tree = ts_with_unary.first()\n        assert any([tree.num_children(u) == 1 for u in tree.nodes()])\n        self.verify_nodes_unchanged(ts_with_unary, keep_unary=True)\n        self.verify_nodes_unchanged(ts_with_unary, keep_unary=False)\n\n    def test_find_unreferenced_nodes(self):\n        # Simple test to show we can find unreferenced nodes easily.\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts1 = tskit.Tree.generate_balanced(4).tree_sequence\n        ts2, node_map = do_simplify(\n            ts1,\n            [0, 1, 2],\n            filter_nodes=False,\n        )\n        assert np.array_equal(node_map, np.arange(ts1.num_nodes))\n        node_references = np.zeros(ts1.num_nodes, dtype=np.int32)\n        node_references[ts2.edges_parent] += 1\n        node_references[ts2.edges_child] += 1\n        # Simplifying for [0, 1, 2] should remove references to node 3 and 5\n        assert list(node_references) == [1, 1, 1, 0, 2, 0, 1]\n\n    def test_mutations_on_removed_branches(self):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        tables = tskit.Tree.generate_balanced(4).tree_sequence.dump_tables()\n        # A mutation on a removed branch should get removed\n        tables.sites.add_row(0.5, \"A\")\n        tables.mutations.add_row(0, node=3, derived_state=\"T\")\n        ts2, node_map = do_simplify(\n            tables.tree_sequence(),\n            [0, 1, 2],\n            filter_nodes=False,\n        )\n        assert ts2.num_sites == 0\n        assert ts2.num_mutations == 0\n\n\nclass TestSimplifyNoUpdateSampleFlags:\n    \"\"\"\n    Tests for simplify when we don't update the sample flags.\n    \"\"\"\n\n    def test_simple_case_filter_nodes(self):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts1 = tskit.Tree.generate_balanced(4).tree_sequence\n        ts2, node_map = do_simplify(\n            ts1,\n            [0, 1, 6],\n            update_sample_flags=False,\n        )\n        # Because we don't retain 2 and 3 here, they don't stay as\n        # samples. But, we specified 6 as a sample, so it's coming\n        # through where it would ordinarily be dropped.\n\n        # 2.00┊  2  ┊\n        #     ┊  ┃  ┊\n        # 1.00┊  3  ┊\n        #     ┊ ┏┻┓ ┊\n        # 0.00┊ 0 1 ┊\n        #     0     1\n        assert list(ts2.nodes_flags) == [1, 1, 0, 0]\n        tree = ts2.first()\n        assert list(tree.parent_array) == [3, 3, -1, 2, -1]\n\n    def test_simple_case_no_filter_nodes(self):\n        # 2.00┊    6    ┊\n        #     ┊  ┏━┻━┓  ┊\n        # 1.00┊  4   5  ┊\n        #     ┊ ┏┻┓ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        ts1 = tskit.Tree.generate_balanced(4).tree_sequence\n        ts2, node_map = do_simplify(\n            ts1,\n            [0, 1, 6],\n            update_sample_flags=False,\n            filter_nodes=False,\n        )\n\n        # 2.00┊  6      ┊\n        #     ┊  ┃      ┊\n        # 1.00┊  4      ┊\n        #     ┊ ┏┻┓     ┊\n        # 0.00┊ 0 1 2 3 ┊\n        #     0         1\n        assert list(ts2.nodes_flags) == list(ts1.nodes_flags)\n        tree = ts2.first()\n        assert list(tree.parent_array) == [4, 4, -1, -1, 6, -1, -1, -1]\n\n\nclass TestMapToAncestors:\n    \"\"\"\n    Tests the AncestorMap class.\n    \"\"\"\n\n    random_seed = 13\n    #\n    #          8\n    #         / \\\n    #        /   \\\n    #       /     \\\n    #      7       \\\n    #     / \\       6\n    #    /   5     / \\\n    #   /   / \\   /   \\\n    #  4   0   1 2     3\n    nodes = \"\"\"\\\n    id      is_sample   population      time\n    0       1       0               0.00000000000000\n    1       1       0               0.00000000000000\n    2       1       0               0.00000000000000\n    3       1       0               0.00000000000000\n    4       1       0               0.00000000000000\n    5       0       0               0.14567111023387\n    6       0       0               0.21385545626353\n    7       0       0               0.43508024345063\n    8       0       0               1.60156352971203\n    \"\"\"\n    edges = \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      5       0,1\n    1       0.00000000      1.00000000      6       2,3\n    2       0.00000000      1.00000000      7       4,5\n    3       0.00000000      1.00000000      8       6,7\n    \"\"\"\n    #\n    #          9                        10\n    #         / \\                      / \\\n    #        /   \\                    /   8\n    #       /     \\                  /   / \\\n    #      7       \\                /   /   \\\n    #     / \\       6              /   /     6\n    #    /   5     / \\            /   5     / \\\n    #   /   / \\   /   \\          /   / \\   /   \\\n    #  4   0   1 2     3        4   0   1 2     3\n    #\n    # 0 ------------------ 0.5 ------------------ 1.0\n    nodes0 = \"\"\"\\\n    id      is_sample   population      time\n    0       1       0               0.00000000000000\n    1       1       0               0.00000000000000\n    2       1       0               0.00000000000000\n    3       1       0               0.00000000000000\n    4       1       0               0.00000000000000\n    5       0       0               0.14567111023387\n    6       0       0               0.21385545626353\n    7       0       0               0.43508024345063\n    8       0       0               0.60156352971203\n    9       0       0               0.90000000000000\n    10      0       0               1.20000000000000\n    \"\"\"\n    edges0 = \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      5       0,1\n    1       0.00000000      1.00000000      6       2,3\n    2       0.00000000      0.50000000      7       4,5\n    3       0.50000000      1.00000000      8       5,6\n    4       0.00000000      0.50000000      9       6,7\n    5       0.50000000      1.00000000      10      4,8\n    \"\"\"\n    nodes1 = \"\"\"\\\n    id      is_sample   population      time\n    0       0           0           1.0\n    1       1           0           0.0\n    2       1           0           0.0\n    \"\"\"\n    edges1 = \"\"\"\\\n    id      left            right           parent  child\n    0       0.00000000      1.00000000      0       1,2\n    \"\"\"\n\n    def do_map(self, ts, ancestors, samples=None, compare_lib=True):\n        \"\"\"\n        Runs the Python test implementation of link_ancestors.\n        \"\"\"\n        if samples is None:\n            samples = ts.samples()\n        s = tests.AncestorMap(ts, samples, ancestors)\n        ancestor_table = s.link_ancestors()\n        if compare_lib:\n            lib_result = ts.dump_tables().link_ancestors(samples, ancestors)\n            assert ancestor_table == lib_result\n            ts_result = ts.link_ancestors(samples, ancestors)\n            assert ancestor_table == ts_result\n            tables_result = ts.tables.link_ancestors(samples, ancestors)\n            assert ancestor_table == tables_result\n        return ancestor_table\n\n    def test_deprecated_name(self):\n        # copied from test_single_tree_one_ancestor below\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        samples = ts.samples()\n        ancestors = [8]\n        s = tests.AncestorMap(ts, samples, ancestors)\n        tss = s.link_ancestors()\n        lib_result = ts.dump_tables().map_ancestors(samples, ancestors)\n        assert tss == lib_result\n        ts_result = ts.link_ancestors(samples, ancestors)\n        assert tss == ts_result\n        immutable_result = ts.tables.map_ancestors(samples, ancestors)\n        assert tss == immutable_result\n        assert list(tss.parent) == [8, 8, 8, 8, 8]\n        assert list(tss.child) == [0, 1, 2, 3, 4]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_one_ancestor(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, ancestors=[8])\n        assert list(tss.parent) == [8, 8, 8, 8, 8]\n        assert list(tss.child) == [0, 1, 2, 3, 4]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_unordered_nodes(self):\n        nodes = io.StringIO(self.nodes1)\n        edges = io.StringIO(self.edges1)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, ancestors=[0])\n        assert list(tss.parent) == [0, 0]\n        assert list(tss.child) == [1, 2]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_two_ancestors(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, ancestors=[6, 7])\n        assert list(tss.parent) == [6, 6, 7, 7, 7]\n        assert list(tss.child) == [2, 3, 0, 1, 4]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_no_ancestors(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, samples=[2, 3], ancestors=[7])\n        assert tss.num_rows == 0\n\n    def test_single_tree_samples_or_ancestors_not_in_tree(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        with pytest.raises(AssertionError):\n            self.do_map(ts, samples=[-1, 3], ancestors=[5])\n        with pytest.raises(AssertionError):\n            self.do_map(ts, samples=[2, 3], ancestors=[10])\n\n    def test_single_tree_ancestors_descend_from_other_ancestors(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, ancestors=[7, 8])\n        assert list(tss.parent) == [7, 7, 7, 8, 8, 8]\n        assert list(tss.child) == [0, 1, 4, 2, 3, 7]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_internal_samples(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, samples=[2, 3, 4, 5], ancestors=[7, 8])\n        assert list(tss.parent) == [7, 7, 8, 8, 8]\n        assert list(tss.child) == [4, 5, 2, 3, 7]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_samples_and_ancestors_overlap(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, samples=[1, 2, 3, 5], ancestors=[5, 6, 7])\n        assert list(tss.parent) == [5, 6, 6, 7]\n        assert list(tss.child) == [1, 2, 3, 5]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_unary_ancestor(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, samples=[1, 2, 4], ancestors=[5, 7, 8])\n        assert list(tss.parent) == [5, 7, 7, 8, 8]\n        assert list(tss.child) == [1, 4, 5, 2, 7]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_ancestors_descend_from_samples(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, samples=[1, 7], ancestors=[5, 8])\n        assert list(tss.parent) == [5, 7, 8]\n        assert list(tss.child) == [1, 5, 7]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_single_tree_samples_descend_from_samples(self):\n        nodes = io.StringIO(self.nodes)\n        edges = io.StringIO(self.edges)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, samples=[3, 6], ancestors=[8])\n        assert list(tss.parent) == [6, 8]\n        assert list(tss.child) == [3, 6]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_multiple_trees_to_single_tree(self):\n        nodes = io.StringIO(self.nodes0)\n        edges = io.StringIO(self.edges0)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, ancestors=[5, 6])\n        assert list(tss.parent) == [5, 5, 6, 6]\n        assert list(tss.child) == [0, 1, 2, 3]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def test_multiple_trees_one_ancestor(self):\n        nodes = io.StringIO(self.nodes0)\n        edges = io.StringIO(self.edges0)\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        tss = self.do_map(ts, ancestors=[9, 10])\n        assert list(tss.parent) == [9, 9, 9, 9, 9, 10, 10, 10, 10, 10]\n        assert list(tss.child) == [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]\n        assert all(tss.left) == 0\n        assert all(tss.right) == 1\n\n    def verify(self, ts, sample_nodes, ancestral_nodes):\n        tss = self.do_map(ts, ancestors=ancestral_nodes, samples=sample_nodes)\n        # ancestors = list(set(tss.parent))\n        # Loop through the rows of the ancestral branch table.\n        current_ancestor = tss.parent[0]\n        current_descendants = [tss.child[0]]\n        current_left = tss.left[0]\n        current_right = tss.right[0]\n        for _, row in enumerate(tss):\n            if (\n                row.parent != current_ancestor\n                or row.left != current_left\n                or row.right != current_right\n            ):\n                # Loop through trees.\n                for tree in ts.trees():\n                    if tree.interval.left >= current_right:\n                        break\n                    while tree.interval.right <= current_left:\n                        tree.next()\n                    # Check that the most recent ancestor of the descendants is the\n                    # current_ancestor.\n                    current_descendants = list(set(current_descendants))\n                    for des in current_descendants:\n                        par = tree.get_parent(des)\n                        while par not in ancestral_nodes and par not in sample_nodes:\n                            par = tree.get_parent(par)\n                        assert par == current_ancestor\n                # Reset the current ancestor and descendants, left and right coords.\n                current_ancestor = row.parent\n                current_descendants = [row.child]\n                current_left = row.left\n                current_right = row.right\n            else:\n                # Collate a list of children corresponding to each ancestral node.\n                current_descendants.append(row.child)\n\n    def test_sim_single_coalescent_tree(self):\n        ts = msprime.simulate(30, random_seed=1, length=10)\n        ancestors = [3 * n for n in np.arange(0, ts.num_nodes // 3)]\n        self.verify(ts, ts.samples(), ancestors)\n        random_samples = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(ts, random_samples, ancestors)\n\n    def test_sim_coalescent_trees(self):\n        ts = msprime.simulate(8, recombination_rate=5, random_seed=1, length=2)\n        ancestors = [3 * n for n in np.arange(0, ts.num_nodes // 3)]\n        self.verify(ts, ts.samples(), ancestors)\n        random_samples = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(ts, random_samples, ancestors)\n\n    def test_sim_coalescent_trees_internal_samples(self):\n        ts = msprime.simulate(8, recombination_rate=5, random_seed=10, length=2)\n        assert ts.num_trees > 2\n        ancestors = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(tsutil.jiggle_samples(ts), ts.samples(), ancestors)\n        random_samples = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(tsutil.jiggle_samples(ts), random_samples, ancestors)\n\n    def test_sim_many_multiroot_trees(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        ancestors = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(ts, ts.samples(), ancestors)\n        random_samples = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(ts, random_samples, ancestors)\n\n    def test_sim_wright_fisher_generations(self):\n        number_of_gens = 5\n        tables = wf.wf_sim(10, number_of_gens, deep_history=False, seed=2)\n        tables.sort()\n        ts = tables.tree_sequence()\n        ancestors = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(ts, ts.samples(), ancestors)\n        for gen in range(1, number_of_gens):\n            ancestors = [u.id for u in ts.nodes() if u.time == gen]\n            self.verify(ts, ts.samples(), ancestors)\n\n        random_samples = [4 * n for n in np.arange(0, ts.num_nodes // 4)]\n        self.verify(ts, random_samples, ancestors)\n        for gen in range(1, number_of_gens):\n            ancestors = [u.id for u in ts.nodes() if u.time == gen]\n            self.verify(ts, random_samples, ancestors)\n\n\nclass TestMutationParent:\n    \"\"\"\n    Tests that mutation parent is correctly specified, and that we correctly\n    recompute it with compute_mutation_parent.\n    \"\"\"\n\n    seed = 42\n\n    def verify_parents(self, ts):\n        parent = tsutil.compute_mutation_parent(ts)\n        tables = ts.dump_tables()\n        assert np.array_equal(parent, tables.mutations.parent)\n        tables.mutations.parent = np.zeros_like(tables.mutations.parent) - 1\n        assert np.all(tables.mutations.parent == tskit.NULL)\n        tables.compute_mutation_parents()\n        assert np.array_equal(parent, tables.mutations.parent)\n\n    def test_example(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           2.0\n        1       0           1.0\n        2       0           1.0\n        3       1           0\n        4       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0    0.5   2  3\n        0.0    0.8   2  4\n        0.5    1.0   1  3\n        0.0    1.0   0  1\n        0.0    1.0   0  2\n        0.8    1.0   0  4\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        0.5     0\n        0.9     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       1       1               -1\n        0       2       1               -1\n        0       3       2               1\n        1       0       1               -1\n        1       1       1               3\n        1       3       2               4\n        1       2       1               3\n        1       4       2               6\n        2       0       1               -1\n        2       1       1               8\n        2       2       1               8\n        2       4       1               8\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n        self.verify_parents(ts)\n\n    def test_single_muts(self):\n        ts = msprime.simulate(\n            10, random_seed=self.seed, mutation_rate=3.0, recombination_rate=1.0\n        )\n        self.verify_parents(ts)\n\n    def test_with_jukes_cantor(self):\n        ts = msprime.simulate(\n            10, random_seed=self.seed, mutation_rate=0.0, recombination_rate=1.0\n        )\n        # make *lots* of recurrent mutations\n        mut_ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=False, seed=self.seed\n        )\n        self.verify_parents(mut_ts)\n\n    def test_with_jukes_cantor_multiple_per_node(self):\n        ts = msprime.simulate(\n            10, random_seed=self.seed, mutation_rate=0.0, recombination_rate=1.0\n        )\n        # make *lots* of recurrent mutations\n        mut_ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=True, seed=self.seed\n        )\n        self.verify_parents(mut_ts)\n\n    def verify_branch_mutations(self, ts, mutations_per_branch):\n        ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n        assert ts.num_mutations > 1\n        self.verify_parents(ts)\n\n    def test_single_tree_one_mutation_per_branch(self):\n        ts = msprime.simulate(6, random_seed=10)\n        self.verify_branch_mutations(ts, 1)\n\n    def test_single_tree_two_mutations_per_branch(self):\n        ts = msprime.simulate(10, random_seed=9)\n        self.verify_branch_mutations(ts, 2)\n\n    def test_single_tree_three_mutations_per_branch(self):\n        ts = msprime.simulate(8, random_seed=9)\n        self.verify_branch_mutations(ts, 3)\n\n    def test_single_multiroot_tree_recurrent_mutations(self):\n        ts = msprime.simulate(6, random_seed=10)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            self.verify_branch_mutations(ts, mutations_per_branch)\n\n    def test_many_multiroot_trees_recurrent_mutations(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            self.verify_branch_mutations(ts, mutations_per_branch)\n\n\nclass TestMutationEdge:\n    def verify_mutation_edge(self, ts):\n        # print(ts.tables)\n        for mutation in ts.mutations():\n            site = ts.site(mutation.site)\n            if mutation.edge == tskit.NULL:\n                edges = [\n                    edge\n                    for edge in ts.edges()\n                    if edge.left <= site.position < edge.right\n                    and mutation.node == edge.child\n                ]\n                assert len(edges) == 0\n            else:\n                edge = ts.edge(mutation.edge)\n                assert edge.left <= site.position < edge.right\n                assert edge.child == mutation.node\n\n        for tree in ts.trees():\n            for site in tree.sites():\n                for mutation in site.mutations:\n                    assert mutation.edge == ts.mutation(mutation.id).edge\n                    if mutation.edge == tskit.NULL:\n                        assert tree.parent(mutation.node) == tskit.NULL\n\n    def verify_branch_mutations(self, ts, mutations_per_branch):\n        ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n        assert ts.num_mutations > 1\n        self.verify_mutation_edge(ts)\n\n    def test_single_tree_one_mutation_per_branch(self):\n        ts = msprime.simulate(6, random_seed=10)\n        self.verify_branch_mutations(ts, 1)\n\n    def test_single_tree_two_mutations_per_branch(self):\n        ts = msprime.simulate(10, random_seed=9)\n        self.verify_branch_mutations(ts, 2)\n\n    def test_single_tree_three_mutations_per_branch(self):\n        ts = msprime.simulate(8, random_seed=9)\n        self.verify_branch_mutations(ts, 3)\n\n    def test_single_multiroot_tree_recurrent_mutations(self):\n        ts = msprime.simulate(6, random_seed=10)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            self.verify_branch_mutations(ts, mutations_per_branch)\n\n    def test_many_multiroot_trees_recurrent_mutations(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            self.verify_branch_mutations(ts, mutations_per_branch)\n\n    @pytest.mark.parametrize(\"n\", range(2, 5))\n    @pytest.mark.parametrize(\"mutations_per_branch\", range(3))\n    def test_balanced_binary_tree(self, n, mutations_per_branch):\n        ts = tskit.Tree.generate_balanced(4).tree_sequence\n        # These trees have a handy property\n        assert all(edge.id == edge.child for edge in ts.edges())\n        for mutation in ts.mutations():\n            assert mutation.edge == mutation.node\n        for site in ts.first().sites():\n            for mutation in site.mutations:\n                assert mutation.edge == mutation.node\n\n\nclass TestMutationTime:\n    \"\"\"\n    Tests that mutation time is correctly specified, and that we correctly\n    recompute it with compute_mutation_times.\n    \"\"\"\n\n    seed = 42\n\n    def verify_times(self, ts):\n        tables = ts.dump_tables()\n        # Clear out the existing mutations as they come from msprime\n        tables.mutations.time = np.full(\n            tables.mutations.time.shape, -1, dtype=np.float64\n        )\n        assert np.all(tables.mutations.time == -1)\n        # Compute times with C method and dumb python method\n        tables.compute_mutation_times()\n        python_time = tsutil.compute_mutation_times(ts)\n        assert np.allclose(python_time, tables.mutations.time, rtol=1e-15, atol=1e-15)\n\n    def test_example(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       0           2.0\n        1       0           1.0\n        2       0           1.0\n        3       1           0\n        4       1           0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0    0.5   2  3\n        0.0    0.8   2  4\n        0.5    1.0   1  3\n        0.0    1.0   0  1\n        0.0    1.0   0  2\n        0.8    1.0   0  4\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        position    ancestral_state\n        0.1     0\n        0.5     0\n        0.9     0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site\tnode\ttime\tderived_state\tparent\n        0       1       1.5     1               -1\n        0       2       1.5     1               -1\n        0       3       0.5     2               1\n        1       0       2.0     1               -1\n        1       1       1.5     1               3\n        1       3       0.5     2               4\n        1       2       1.5     1               3\n        1       4       0.5     2               6\n        2       0       2.0     1               -1\n        2       1       1.5     1               8\n        2       2       1.5     1               8\n        2       4       1.0     1               8\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n        # ts.dump_text(mutations=sys.stdout)\n        # self.assertFalse(True)\n        tables = ts.dump_tables()\n        python_time = tsutil.compute_mutation_times(ts)\n        assert np.allclose(python_time, tables.mutations.time, rtol=1e-15, atol=1e-15)\n        tables.mutations.time = np.full(\n            tables.mutations.time.shape, -1, dtype=np.float64\n        )\n        assert np.all(tables.mutations.time == -1)\n        tables.compute_mutation_times()\n        assert np.allclose(python_time, tables.mutations.time, rtol=1e-15, atol=1e-15)\n\n    def test_single_muts(self):\n        ts = msprime.simulate(\n            10, random_seed=self.seed, mutation_rate=3.0, recombination_rate=1.0\n        )\n        self.verify_times(ts)\n\n    def test_with_jukes_cantor(self):\n        ts = msprime.simulate(\n            10, random_seed=self.seed, mutation_rate=0.0, recombination_rate=1.0\n        )\n        # make *lots* of recurrent mutations\n        mut_ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=False, seed=self.seed\n        )\n        self.verify_times(mut_ts)\n\n    def test_with_jukes_cantor_multiple_per_node(self):\n        ts = msprime.simulate(\n            10, random_seed=self.seed, mutation_rate=0.0, recombination_rate=1.0\n        )\n        # make *lots* of recurrent mutations\n        mut_ts = tsutil.jukes_cantor(\n            ts, num_sites=10, mu=1, multiple_per_node=True, seed=self.seed\n        )\n        self.verify_times(mut_ts)\n\n    def verify_branch_mutations(self, ts, mutations_per_branch):\n        ts = tsutil.insert_branch_mutations(ts, mutations_per_branch)\n        assert ts.num_mutations > 1\n        self.verify_times(ts)\n\n    def test_single_tree_one_mutation_per_branch(self):\n        ts = msprime.simulate(6, random_seed=10)\n        self.verify_branch_mutations(ts, 1)\n\n    def test_single_tree_two_mutations_per_branch(self):\n        ts = msprime.simulate(10, random_seed=9)\n        self.verify_branch_mutations(ts, 2)\n\n    def test_single_tree_three_mutations_per_branch(self):\n        ts = msprime.simulate(8, random_seed=9)\n        self.verify_branch_mutations(ts, 3)\n\n    def test_single_multiroot_tree_recurrent_mutations(self):\n        ts = msprime.simulate(6, random_seed=10)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            self.verify_branch_mutations(ts, mutations_per_branch)\n\n    def test_many_multiroot_trees_recurrent_mutations(self):\n        ts = msprime.simulate(7, recombination_rate=1, random_seed=10)\n        assert ts.num_trees > 3\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        for mutations_per_branch in [1, 2, 3]:\n            self.verify_branch_mutations(ts, mutations_per_branch)\n\n\nclass TestSimpleTreeAlgorithm:\n    \"\"\"\n    Tests for the direct implementation of Algorithm T in tsutil.py.\n\n    See TestHoleyTreeSequences above for further tests on wacky topologies.\n    \"\"\"\n\n    def test_zero_nodes(self):\n        tables = tskit.TableCollection(1)\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        # Test the simple tree iterator.\n        trees = list(tsutil.algorithm_T(ts))\n        assert len(trees) == 1\n        (left, right), parent = trees[0]\n        assert left == 0\n        assert right == 1\n        assert parent == []\n\n    def test_one_node(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row()\n        ts = tables.tree_sequence()\n        assert ts.sequence_length == 1\n        assert ts.num_trees == 1\n        # Test the simple tree iterator.\n        trees = list(tsutil.algorithm_T(ts))\n        assert len(trees) == 1\n        (left, right), parent = trees[0]\n        assert left == 0\n        assert right == 1\n        assert parent == [-1]\n\n    def test_single_coalescent_tree(self):\n        ts = msprime.simulate(10, random_seed=1, length=10)\n        tree = ts.first()\n        p1 = [tree.parent(j) for j in range(ts.num_nodes)]\n        interval, p2 = next(tsutil.algorithm_T(ts))\n        assert interval == tree.interval\n        assert p1 == p2\n\n    def test_coalescent_trees(self):\n        ts = msprime.simulate(8, recombination_rate=5, random_seed=1, length=2)\n        assert ts.num_trees > 2\n        new_trees = tsutil.algorithm_T(ts)\n        for tree in ts.trees():\n            interval, p2 = next(new_trees)\n            p1 = [tree.parent(j) for j in range(ts.num_nodes)]\n            assert interval == tree.interval\n            assert p1 == p2\n        with pytest.raises(StopIteration):\n            next(new_trees)\n\n\nclass TestVirtualRootAPIs(ExampleTopologyMixin):\n    \"\"\"\n    Tests the APIs based on getting roots.\n    \"\"\"\n\n    def verify(self, ts):\n        for tree in ts.trees():\n            left_child = tree.left_child_array\n            right_child = tree.right_child_array\n            assert tree.virtual_root == ts.num_nodes\n            assert tree.left_root == tree.left_child(tree.virtual_root)\n            assert tree.right_root == tree.right_child(tree.virtual_root)\n            assert tree.left_root == left_child[-1]\n            assert tree.right_root == right_child[-1]\n            assert tree.parent(tree.virtual_root) == tskit.NULL\n            assert tree.left_sib(tree.virtual_root) == tskit.NULL\n            assert tree.right_sib(tree.virtual_root) == tskit.NULL\n            assert tree.num_children(tree.virtual_root) == tree.num_roots\n\n            u = tree.left_root\n            roots = []\n            while u != tskit.NULL:\n                roots.append(u)\n                u = tree.right_sib(u)\n            assert roots == list(tree.roots)\n\n            # The branch_length for roots is defined as 0, and it's consistent\n            # to have the same for the virtual root.\n            assert tree.branch_length(tree.virtual_root) == 0\n            # The virtual root has depth -1 from the root\n            assert tree.depth(tree.virtual_root) == -1\n            assert tree.num_children(tree.virtual_root) == tree.num_roots\n            assert tree.num_samples(tree.virtual_root) == tree.num_samples()\n            # We're not using tracked samples here.\n            assert tree.num_tracked_samples(tree.virtual_root) == 0\n            # The virtual_root is internal because it has children (the roots)\n            assert tree.is_internal(tree.virtual_root)\n            assert not tree.is_leaf(tree.virtual_root)\n            assert not tree.is_sample(tree.virtual_root)\n            # The mrca of the virtual_root and anything is itself\n            assert tree.mrca(0, tree.virtual_root) == tree.virtual_root\n            assert tree.mrca(tree.virtual_root, 0) == tree.virtual_root\n            assert tree.mrca(tree.virtual_root, tree.virtual_root) == tree.virtual_root\n            # The virtual_root is a descendant of nothing other than itself\n            assert not tree.is_descendant(0, tree.virtual_root)\n            assert tree.is_descendant(tree.virtual_root, tree.virtual_root)\n\n            assert list(tree.leaves(tree.virtual_root)) == list(tree.leaves())\n            assert list(tree.samples(tree.virtual_root)) == list(tree.samples())\n\n            orders = [\n                \"preorder\",\n                \"inorder\",\n                \"levelorder\",\n                \"breadthfirst\",\n                \"postorder\",\n                \"timeasc\",\n                \"timedesc\",\n                \"minlex_postorder\",\n            ]\n            for order in orders:\n                l_vr = list(tree.nodes(tree.virtual_root, order=order))\n                l_standard = list(tree.nodes(order=order))\n                assert len(l_vr) == 1 + len(l_standard)\n                assert tree.virtual_root in l_vr\n\n            # For pre-order, virtual_root should be first node visited:\n            assert next(tree.nodes(tree.virtual_root)) == tree.virtual_root\n\n            # Methods that imply looking up tree sequence properties of the\n            # node raise an error\n            # Some methods don't apply\n            for method in [tree.population]:\n                with pytest.raises(tskit.LibraryError, match=\"Node out of bounds\"):\n                    method(tree.virtual_root)\n\n\nclass TestSampleLists(ExampleTopologyMixin):\n    \"\"\"\n    Tests for the sample lists algorithm.\n    \"\"\"\n\n    def verify(self, ts):\n        tree1 = tsutil.SampleListTree(ts)\n        s = str(tree1)\n        assert s is not None\n        trees = ts.trees(sample_lists=True)\n        for left, right in tree1.sample_lists():\n            tree2 = next(trees)\n            assert (left, right) == tree2.interval\n            for u in tree2.nodes():\n                assert tree1.left_sample[u] == tree2.left_sample(u)\n                assert tree1.right_sample[u] == tree2.right_sample(u)\n            for j in range(ts.num_samples):\n                assert tree1.next_sample[j] == tree2.next_sample(j)\n        assert right == ts.sequence_length\n\n        tree1 = tsutil.SampleListTree(ts)\n        trees = ts.trees(sample_lists=False)\n        sample_index_map = ts.samples()\n        for _, _ in tree1.sample_lists():\n            tree2 = next(trees)\n            for u in range(ts.num_nodes):\n                samples2 = list(tree2.samples(u))\n                samples1 = []\n                index = tree1.left_sample[u]\n                if index != tskit.NULL:\n                    assert sample_index_map[tree1.left_sample[u]] == samples2[0]\n                    assert sample_index_map[tree1.right_sample[u]] == samples2[-1]\n                    stop = tree1.right_sample[u]\n                    while True:\n                        assert index != -1\n                        samples1.append(sample_index_map[index])\n                        if index == stop:\n                            break\n                        index = tree1.next_sample[index]\n                assert samples1 == samples2\n            # The python implementation here doesn't maintain roots\n            np.testing.assert_array_equal(tree1.parent, tree2.parent_array[:-1])\n            np.testing.assert_array_equal(tree1.left_child, tree2.left_child_array[:-1])\n            np.testing.assert_array_equal(\n                tree1.right_child, tree2.right_child_array[:-1]\n            )\n        assert right == ts.sequence_length\n\n\nclass TestOneSampleRoot(ExampleTopologyMixin):\n    \"\"\"\n    Tests for the standard root threshold of subtending at least\n    one sample.\n    \"\"\"\n\n    def verify(self, ts):\n        tree2 = tskit.Tree(ts)\n        tree2.first()\n        for interval, tree1 in tsutil.algorithm_R(ts, root_threshold=1):\n            root_reachable_nodes = len(tree2.preorder())\n            size_bound = tree1.num_edges + ts.num_samples\n            assert size_bound >= root_reachable_nodes\n            assert interval == tree2.interval\n            assert tree1.roots() == tree2.roots\n            # Definition here is the set unique path ends from samples\n            roots = set()\n            for u in ts.samples():\n                while u != tskit.NULL:\n                    path_end = u\n                    u = tree2.parent(u)\n                roots.add(path_end)\n            assert set(tree1.roots()) == roots\n            np.testing.assert_array_equal(tree1.parent, tree2.parent_array)\n            np.testing.assert_array_equal(tree1.left_child, tree2.left_child_array)\n            np.testing.assert_array_equal(tree1.right_child, tree2.right_child_array)\n            np.testing.assert_array_equal(tree1.left_sib, tree2.left_sib_array)\n            np.testing.assert_array_equal(tree1.right_sib, tree2.right_sib_array)\n            np.testing.assert_array_equal(tree1.num_children, tree2.num_children_array)\n            tree2.next()\n        assert tree2.index == -1\n\n\nclass RootThreshold(ExampleTopologyMixin):\n    \"\"\"\n    Tests for the root criteria of subtending at least k samples.\n    \"\"\"\n\n    def verify(self, ts):\n        k = self.root_threshold\n        trees_py = tsutil.algorithm_R(ts, root_threshold=k)\n        tree_lib = tskit.Tree(ts, root_threshold=k)\n        tree_lib.first()\n        tree_leg = tsutil.LegacyRootThresholdTree(ts, root_threshold=k)\n        for (interval_py, tree_py), interval_leg in itertools.zip_longest(\n            trees_py, tree_leg.iterate()\n        ):\n            assert interval_py == tree_lib.interval\n            assert interval_leg == tree_lib.interval\n\n            root_reachable_nodes = len(tree_lib.preorder())\n            size_bound = tree_py.num_edges + ts.num_samples\n            assert size_bound >= root_reachable_nodes\n            assert tree_py.num_edges == tree_lib.num_edges\n\n            # Definition here is the set unique path ends from samples\n            # that subtend at least k samples\n            roots = set()\n            for u in ts.samples():\n                while u != tskit.NULL:\n                    path_end = u\n                    u = tree_lib.parent(u)\n                if tree_lib.num_samples(path_end) >= k:\n                    roots.add(path_end)\n            assert set(tree_py.roots()) == roots\n            assert set(tree_lib.roots) == roots\n            assert set(tree_leg.roots()) == roots\n            assert len(tree_leg.roots()) == tree_lib.num_roots\n            assert tree_py.roots() == tree_lib.roots\n\n            # # The python class has identical behaviour to the lib version\n            assert tree_py.left_child[-1] == tree_lib.left_root\n            np.testing.assert_array_equal(tree_py.parent, tree_lib.parent_array)\n            np.testing.assert_array_equal(tree_py.left_child, tree_lib.left_child_array)\n            np.testing.assert_array_equal(\n                tree_py.right_child, tree_lib.right_child_array\n            )\n            np.testing.assert_array_equal(tree_py.left_sib, tree_lib.left_sib_array)\n            np.testing.assert_array_equal(tree_py.right_sib, tree_lib.right_sib_array)\n            np.testing.assert_array_equal(\n                tree_py.num_children, tree_lib.num_children_array\n            )\n\n            # NOTE: the legacy left_root value is *not* necessarily the same as the\n            # new left_root.\n            # assert tree_leg.left_root == tree_py.left_child[-1]\n\n            # The virtual root version is identical to the legacy tree\n            # except for the extra node and the details of the sib arrays.\n            np.testing.assert_array_equal(tree_py.parent[:-1], tree_leg.parent)\n            np.testing.assert_array_equal(tree_py.left_child[:-1], tree_leg.left_child)\n            np.testing.assert_array_equal(tree_py.right_child[:-1], tree_leg.right_child)\n            # The sib arrays are identical except for root nodes.\n            for u in range(ts.num_nodes):\n                if u not in roots:\n                    assert tree_py.left_sib[u] == tree_leg.left_sib[u]\n                    assert tree_py.right_sib[u] == tree_leg.right_sib[u]\n\n            tree_lib.next()\n        assert tree_lib.index == -1\n\n\nclass TestRootThreshold1(RootThreshold):\n    root_threshold = 1\n\n\nclass TestRootThreshold2(RootThreshold):\n    root_threshold = 2\n\n\nclass TestRootThreshold3(RootThreshold):\n    root_threshold = 3\n\n\nclass TestRootThreshold4(RootThreshold):\n    root_threshold = 4\n\n\nclass TestRootThreshold10(RootThreshold):\n    root_threshold = 10\n\n\nclass TestSquashEdges:\n    \"\"\"\n    Tests of the squash_edges function.\n    \"\"\"\n\n    def do_squash(self, ts, compare_lib=True):\n        squashed = ts.dump_tables().edges\n        squashed.squash()\n        if compare_lib:\n            squashed_list = squash_edges(ts)\n            squashed_py = tskit.EdgeTable()\n            for e in squashed_list:\n                squashed_py.append(e)\n            # Check the Python and C implementations produce the same output.\n            assert squashed_py == squashed\n        return squashed\n\n    def test_simple_case(self):\n        #   2\n        #  / \\\n        # 0   1\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       0       0               1.00000000000000\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.00000000      0.50000000      2       0\n        1       0.00000000      0.50000000      2       1\n        2       0.50000000      1.00000000      2       0\n        3       0.50000000      1.00000000      2       1\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        edges = self.do_squash(ts)\n        assert all(edges.left) == 0\n        assert all(edges.right) == 1\n        assert list(edges.parent) == [2, 2]\n        assert list(edges.child) == [0, 1]\n\n    def test_simple_case_unordered_intervals(self):\n        # 1\n        # |\n        # 0\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1           0               0.0\n        1       0           0               1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.40            1.0             1       0\n        0       0.00            0.40            1       0\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        edges = self.do_squash(ts)\n        assert edges.left[0] == 0\n        assert edges.right[0] == 1\n        assert edges.parent[0] == 1\n        assert edges.child[0] == 0\n\n    def test_simple_case_unordered_children(self):\n        #   2\n        #  / \\\n        # 0   1\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       0       0               1.00000000000000\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.50000000      1.00000000      2       1\n        1       0.50000000      1.00000000      2       0\n        2       0.00000000      0.50000000      2       1\n        3       0.00000000      0.50000000      2       0\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        edges = self.do_squash(ts)\n        assert all(edges.left) == 0\n        assert all(edges.right) == 1\n        assert list(edges.parent) == [2, 2]\n        assert list(edges.child) == [0, 1]\n\n    def test_simple_case_unordered_children_and_intervals(self):\n        #   2\n        #  / \\\n        # 0   1\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       0       0               1.00000000000000\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.50000000      1.00000000      2       1\n        2       0.00000000      0.50000000      2       1\n        3       0.00000000      0.50000000      2       0\n        1       0.50000000      1.00000000      2       0\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        edges = self.do_squash(ts)\n        assert all(edges.left) == 0\n        assert all(edges.right) == 1\n        assert list(edges.parent) == [2, 2]\n        assert list(edges.child) == [0, 1]\n\n    def test_squash_multiple_parents_and_children(self):\n        #   4       5\n        #  / \\     / \\\n        # 0   1   2   3\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1       0               0.00000000000000\n        1       1       0               0.00000000000000\n        2       1       0               0.00000000000000\n        3       1       0               0.00000000000000\n        4       0       0               1.00000000000000\n        5       0       0               1.00000000000000\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        5       0.50000000      1.00000000      5       3\n        6       0.50000000      1.00000000      5       2\n        7       0.00000000      0.50000000      5       3\n        8       0.00000000      0.50000000      5       2\n        9       0.40000000      1.00000000      4       1\n        10      0.00000000      0.40000000      4       1\n        11      0.40000000      1.00000000      4       0\n        12      0.00000000      0.40000000      4       0\n        \"\"\"\n        )\n        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)\n        edges = self.do_squash(ts)\n        assert all(edges.left) == 0\n        assert all(edges.right) == 1\n        assert list(edges.parent) == [4, 4, 5, 5]\n        assert list(edges.child) == [0, 1, 2, 3]\n\n    def test_squash_overlapping_intervals(self):\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   population      time\n        0       1           0               0.0\n        1       0           0               1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        id      left            right           parent  child\n        0       0.00            0.50            1       0\n        1       0.40            0.80            1       0\n        2       0.60            1.00            1       0\n        \"\"\"\n        )\n        with pytest.raises(tskit.LibraryError):\n            tskit.load_text(nodes=nodes, edges=edges, strict=False)\n\n    def verify_slice_and_squash(self, ts):\n        \"\"\"\n        Slices a tree sequence so that there are edge endpoints at\n        all integer locations, then squashes these edges and verifies\n        that the resulting edge table is the same as the input edge table.\n        \"\"\"\n        sliced_edges = []\n        # Create new sliced edge table.\n        for e in ts.edges():\n            left = e.left\n            right = e.right\n\n            if left == np.floor(left):\n                r_left = np.ceil(left) + 1\n            else:\n                r_left = np.ceil(left)\n            if right == np.floor(right):\n                r_right = np.floor(right)\n            else:\n                r_right = np.floor(right) + 1\n\n            new_range = [left]\n            for r in np.arange(r_left, r_right):\n                new_range.append(r)\n            new_range.append(right)\n            assert len(new_range) > 1\n\n            # Add new edges to the list.\n            for r in range(1, len(new_range)):\n                new = tskit.Edge(new_range[r - 1], new_range[r], e.parent, e.child)\n                sliced_edges.append(new)\n\n        # Shuffle the edges and create a new edge table.\n        random.shuffle(sliced_edges)\n        sliced_table = tskit.EdgeTable()\n        for e in sliced_edges:\n            sliced_table.append(e)\n\n        # Squash the edges and check against input table.\n        sliced_table.squash()\n        assert sliced_table == ts.tables.edges\n\n    def test_sim_single_coalescent_tree(self):\n        ts = msprime.simulate(20, random_seed=4, length=10)\n        assert ts.num_trees == 1\n        self.verify_slice_and_squash(ts)\n\n    def test_sim_big_coalescent_trees(self):\n        ts = msprime.simulate(20, recombination_rate=5, random_seed=4, length=10)\n        assert ts.num_trees > 2\n        self.verify_slice_and_squash(ts)\n\n\ndef squash_edges(ts):\n    \"\"\"\n    Returns the edges in the tree sequence squashed.\n    \"\"\"\n    t = ts.tables.nodes.time\n    edges = list(ts.edges())\n    edges.sort(key=lambda e: (t[e.parent], e.parent, e.child, e.left))\n    if len(edges) == 0:\n        return []\n\n    squashed = []\n    last_e = edges[0]\n    for e in edges[1:]:\n        condition = (\n            e.parent != last_e.parent\n            or e.child != last_e.child\n            or e.left != last_e.right\n        )\n        if condition:\n            squashed.append(last_e)\n            last_e = e\n        last_e.right = e.right\n    squashed.append(last_e)\n    return squashed\n\n\ndef reduce_topology(ts):\n    \"\"\"\n    Returns a tree sequence with the minimal information required to represent\n    the tree topologies at its sites. Uses a left-to-right algorithm.\n    \"\"\"\n    tables = ts.dump_tables()\n    edge_map = {}\n\n    def add_edge(left, right, parent, child):\n        new_edge = tskit.Edge(left, right, parent, child)\n        if child not in edge_map:\n            edge_map[child] = new_edge\n        else:\n            edge = edge_map[child]\n            if edge.right == left and edge.parent == parent:\n                # Squash\n                edge.right = right\n            else:\n                tables.edges.append(edge)\n                edge_map[child] = new_edge\n\n    tables.edges.clear()\n\n    edge_buffer = []\n    first_site = True\n    for tree in ts.trees():\n        # print(tree.interval)\n        # print(tree.draw(format=\"unicode\"))\n        if tree.num_sites > 0:\n            sites = list(tree.sites())\n            if first_site:\n                x = 0\n                # print(\"First site\", sites)\n                first_site = False\n            else:\n                x = sites[0].position\n            # Flush the edge buffer.\n            for left, parent, child in edge_buffer:\n                add_edge(left, x, parent, child)\n            # Add edges for each node in the tree.\n            edge_buffer = []\n            for root in tree.roots:\n                for u in tree.nodes(root):\n                    if u != root:\n                        edge_buffer.append((x, tree.parent(u), u))\n    # Add the final edges.\n    for left, parent, child in edge_buffer:\n        add_edge(left, tables.sequence_length, parent, child)\n    # Flush the remaining edges to the table\n    for edge in edge_map.values():\n        tables.edges.append(edge)\n    tables.sort()\n    ts = tables.tree_sequence()\n    # Now simplify to remove redundant nodes.\n    return ts.simplify(map_nodes=True, filter_sites=False)\n\n\nclass TestReduceTopology:\n    \"\"\"\n    Tests to ensure that reduce topology in simplify is equivalent to the\n    reduce_topology function above.\n    \"\"\"\n\n    def verify(self, ts):\n        source_tables = ts.tables\n        X = source_tables.sites.position\n        position_count = {x: 0 for x in X}\n        position_count[0] = 0\n        position_count[ts.sequence_length] = 0\n        mts, node_map = reduce_topology(ts)\n        for edge in mts.edges():\n            assert edge.left in position_count\n            assert edge.right in position_count\n            position_count[edge.left] += 1\n            position_count[edge.right] += 1\n        if ts.num_sites == 0:\n            # We should have zero edges output.\n            assert mts.num_edges == 0\n        elif X[0] != 0:\n            # The first site (if it's not zero) should be mapped to zero so\n            # this never occurs in edges.\n            assert position_count[X[0]] == 0\n\n        minimised_trees = mts.trees()\n        minimised_tree = next(minimised_trees)\n        minimised_tree_sites = minimised_tree.sites()\n        for tree in ts.trees():\n            for site in tree.sites():\n                minimised_site = next(minimised_tree_sites, None)\n                if minimised_site is None:\n                    minimised_tree = next(minimised_trees)\n                    minimised_tree_sites = minimised_tree.sites()\n                    minimised_site = next(minimised_tree_sites)\n                assert site.position == minimised_site.position\n                assert site.ancestral_state == minimised_site.ancestral_state\n                assert site.metadata == minimised_site.metadata\n                assert len(site.mutations) == len(minimised_site.mutations)\n\n                for mutation, minimised_mutation in zip(\n                    site.mutations, minimised_site.mutations\n                ):\n                    assert mutation.derived_state == minimised_mutation.derived_state\n                    assert mutation.metadata == minimised_mutation.metadata\n                    assert mutation.parent == minimised_mutation.parent\n                    assert node_map[mutation.node] == minimised_mutation.node\n            if tree.num_sites > 0:\n                mapped_dict = {\n                    node_map[u]: node_map[v] for u, v in tree.parent_dict.items()\n                }\n                assert mapped_dict == minimised_tree.parent_dict\n        assert np.array_equal(ts.genotype_matrix(), mts.genotype_matrix())\n\n        edges = list(mts.edges())\n        squashed = squash_edges(mts)\n        assert len(edges) == len(squashed)\n        assert edges == squashed\n\n        # Verify against simplify implementations.\n        s = tests.Simplifier(\n            ts, ts.samples(), reduce_to_site_topology=True, filter_sites=False\n        )\n        sts1, _ = s.simplify()\n        sts2 = ts.simplify(reduce_to_site_topology=True, filter_sites=False)\n        t1 = mts.tables\n        for sts in [sts2, sts2]:\n            t2 = sts.tables\n            assert t1.nodes == t2.nodes\n            assert t1.edges == t2.edges\n            assert t1.sites == t2.sites\n            assert t1.mutations == t2.mutations\n            assert t1.populations == t2.populations\n            assert t1.individuals == t2.individuals\n        return mts\n\n    def test_no_recombination_one_site(self):\n        ts = msprime.simulate(15, random_seed=1)\n        tables = ts.dump_tables()\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        mts = self.verify(tables.tree_sequence())\n        assert mts.num_trees == 1\n\n    def test_simple_recombination_one_site(self):\n        ts = msprime.simulate(15, random_seed=1, recombination_rate=2)\n        tables = ts.dump_tables()\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        mts = self.verify(tables.tree_sequence())\n        assert mts.num_trees == 1\n\n    def test_simple_recombination_fixed_sites(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2)\n        tables = ts.dump_tables()\n        for x in [0.25, 0.5, 0.75]:\n            tables.sites.add_row(position=x, ancestral_state=\"0\")\n        self.verify(tables.tree_sequence())\n\n    def get_integer_edge_ts(self, n, m):\n        recombination_map = msprime.RecombinationMap.uniform_map(m, 1, num_loci=m)\n        ts = msprime.simulate(n, random_seed=1, recombination_map=recombination_map)\n        assert ts.num_trees > 1\n        for edge in ts.edges():\n            assert int(edge.left) == edge.left\n            assert int(edge.right) == edge.right\n        return ts\n\n    def test_integer_edges_one_site(self):\n        ts = self.get_integer_edge_ts(5, 10)\n        tables = ts.dump_tables()\n        tables.sites.add_row(position=1, ancestral_state=\"0\")\n        mts = self.verify(tables.tree_sequence())\n        assert mts.num_trees == 1\n\n    def test_integer_edges_all_sites(self):\n        ts = self.get_integer_edge_ts(5, 10)\n        tables = ts.dump_tables()\n        for x in range(10):\n            tables.sites.add_row(position=x, ancestral_state=\"0\")\n        mts = self.verify(tables.tree_sequence())\n        assert mts.num_trees == ts.num_trees\n\n    def test_simple_recombination_site_at_zero(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2)\n        tables = ts.dump_tables()\n        tables.sites.add_row(position=0, ancestral_state=\"0\")\n        mts = self.verify(tables.tree_sequence())\n        assert mts.num_trees == 1\n\n    def test_simple_recombination(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2, mutation_rate=2)\n        self.verify(ts)\n\n    def test_large_recombination(self):\n        ts = msprime.simulate(25, random_seed=12, recombination_rate=5, mutation_rate=15)\n        self.verify(ts)\n\n    def test_no_recombination(self):\n        ts = msprime.simulate(5, random_seed=1, mutation_rate=2)\n        self.verify(ts)\n\n    def test_no_mutation(self):\n        ts = msprime.simulate(5, random_seed=1)\n        self.verify(ts)\n\n    def test_zero_sites(self):\n        ts = msprime.simulate(5, random_seed=2)\n        assert ts.num_sites == 0\n        mts = ts.simplify(reduce_to_site_topology=True)\n        assert mts.num_trees == 1\n        assert mts.num_edges == 0\n\n    def test_branch_sites(self):\n        ts = msprime.simulate(15, random_seed=12, recombination_rate=2, length=10)\n        ts = tsutil.insert_branch_sites(ts)\n        self.verify(ts)\n\n    def test_jiggled_samples(self):\n        ts = msprime.simulate(8, random_seed=13, recombination_rate=2, length=10)\n        ts = tsutil.jiggle_samples(ts)\n        self.verify(ts)\n\n\ndef search_sorted(a, v):\n    \"\"\"\n    Implementation of searchsorted based on binary search with the same\n    semantics as numpy's searchsorted. Used as the basis of the C\n    implementation which we use in the simplify algorithm.\n    \"\"\"\n    upper = len(a)\n    if upper == 0:\n        return 0\n    lower = 0\n    while upper - lower > 1:\n        mid = (upper + lower) // 2\n        if v >= a[mid]:\n            lower = mid\n        else:\n            upper = mid\n    offset = 0\n    if a[lower] < v:\n        offset = 1\n    return lower + offset\n\n\nclass TestSearchSorted:\n    \"\"\"\n    Tests for the basic implementation of search_sorted.\n    \"\"\"\n\n    def verify(self, a):\n        a = np.array(a)\n        start, end = a[0], a[-1]\n        # Check random values.\n        np.random.seed(43)\n        for v in np.random.uniform(start, end, 10):\n            assert search_sorted(a, v) == np.searchsorted(a, v)\n        # Check equal values.\n        for v in a:\n            assert search_sorted(a, v) == np.searchsorted(a, v)\n        # Check values outside bounds.\n        for v in [start - 2, start - 1, end, end + 1, end + 2]:\n            assert search_sorted(a, v) == np.searchsorted(a, v)\n\n    def test_range(self):\n        for j in range(1, 20):\n            self.verify(range(j))\n\n    def test_negative_range(self):\n        for j in range(1, 20):\n            self.verify(-1 * np.arange(j)[::-1])\n\n    def test_random_unit_interval(self):\n        np.random.seed(143)\n        for size in range(1, 100):\n            a = np.random.random(size=size)\n            a.sort()\n            self.verify(a)\n\n    def test_random_interval(self):\n        np.random.seed(143)\n        for _ in range(10):\n            interval = np.random.random(2) * 10\n            interval.sort()\n            a = np.random.uniform(*interval, size=100)\n            a.sort()\n            self.verify(a)\n\n    def test_random_negative(self):\n        np.random.seed(143)\n        for _ in range(10):\n            interval = np.random.random(2) * 5\n            interval.sort()\n            a = -1 * np.random.uniform(*interval, size=100)\n            a.sort()\n            self.verify(a)\n\n    def test_edge_cases(self):\n        for v in [0, 1]:\n            assert search_sorted([], v) == np.searchsorted([], v)\n            assert search_sorted([1], v) == np.searchsorted([1], v)\n\n\nclass TestDeleteSites:\n    \"\"\"\n    Tests for the TreeSequence.delete_sites method\n    \"\"\"\n\n    def ts_with_4_sites(self):\n        ts = msprime.simulate(8, random_seed=3)\n        tables = ts.dump_tables()\n        tables.sites.set_columns(np.arange(0, 1, 0.25), *tskit.pack_strings([\"G\"] * 4))\n        tables.mutations.add_row(site=1, node=ts.first().parent(0), derived_state=\"C\")\n        tables.mutations.add_row(site=1, node=0, derived_state=\"T\", parent=0)\n        tables.mutations.add_row(site=2, node=1, derived_state=\"A\")\n        return tables.tree_sequence()\n\n    def test_remove_by_index(self):\n        ts = self.ts_with_4_sites().delete_sites([])\n        assert ts.num_sites == 4\n        assert ts.num_mutations == 3\n        ts = ts.delete_sites(2)\n        assert ts.num_sites == 3\n        assert ts.num_mutations == 2\n        ts = ts.delete_sites([1, 2])\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 0\n\n    def test_remove_all(self):\n        ts = self.ts_with_4_sites().delete_sites(range(4))\n        assert ts.num_sites == 0\n        assert ts.num_mutations == 0\n        # should be OK to run on a siteless tree seq as no sites specified\n        ts.delete_sites([])\n\n    def test_remove_repeated_sites(self):\n        ts = self.ts_with_4_sites()\n        t1 = ts.delete_sites([0, 1], record_provenance=False)\n        t2 = ts.delete_sites([0, 0, 1], record_provenance=False)\n        t3 = ts.delete_sites([0, 0, 0, 1], record_provenance=False)\n        assert t1.tables == t2.tables\n        assert t1.tables == t3.tables\n\n    def test_remove_different_orders(self):\n        ts = self.ts_with_4_sites()\n        t1 = ts.delete_sites([0, 1, 3], record_provenance=False)\n        t2 = ts.delete_sites([0, 3, 1], record_provenance=False)\n        t3 = ts.delete_sites([3, 0, 1], record_provenance=False)\n        assert t1.tables == t2.tables\n        assert t1.tables == t3.tables\n\n    def test_remove_bad(self):\n        ts = self.ts_with_4_sites()\n        with pytest.raises(TypeError):\n            ts.delete_sites([\"1\"])\n        with pytest.raises(ValueError):\n            ts.delete_sites(4)\n        with pytest.raises(ValueError):\n            ts.delete_sites(-5)\n\n    def verify_removal(self, ts, remove_sites):\n        tables = ts.dump_tables()\n        tables.delete_sites(remove_sites)\n\n        # Make sure we've computed the mutation parents properly.\n        mutation_parent = tables.mutations.parent\n        tables.compute_mutation_parents()\n        assert np.array_equal(mutation_parent, tables.mutations.parent)\n\n        tsd = tables.tree_sequence()\n        assert tsd.num_sites == ts.num_sites - len(remove_sites)\n        source_sites = [site for site in ts.sites() if site.id not in remove_sites]\n        assert len(source_sites) == tsd.num_sites\n        for s1, s2 in zip(source_sites, tsd.sites()):\n            assert s1.position == s2.position\n            assert s1.ancestral_state == s2.ancestral_state\n            assert s1.metadata == s2.metadata\n            assert len(s1.mutations) == len(s2.mutations)\n            for m1, m2 in zip(s1.mutations, s2.mutations):\n                assert m1.node == m2.node\n                assert m1.derived_state == m2.derived_state\n                assert m1.metadata == m2.metadata\n\n        # Check we get the same genotype_matrix\n        G1 = ts.genotype_matrix()\n        G2 = tsd.genotype_matrix()\n        keep = np.ones(ts.num_sites, dtype=bool)\n        keep[remove_sites] = 0\n        assert np.array_equal(G1[keep], G2)\n\n    def test_simple_random_metadata(self):\n        ts = msprime.simulate(10, mutation_rate=10, random_seed=2)\n        ts = tsutil.add_random_metadata(ts)\n        assert ts.num_mutations > 5\n        self.verify_removal(ts, [1, 3])\n\n    def test_simple_mixed_length_states(self):\n        ts = msprime.simulate(10, random_seed=2, length=10)\n        tables = ts.dump_tables()\n        for j in range(10):\n            tables.sites.add_row(j, \"X\" * j)\n            tables.mutations.add_row(site=j, node=j, derived_state=\"X\" * (j + 1))\n        ts = tables.tree_sequence()\n        self.verify_removal(ts, [9])\n\n    def test_jukes_cantor_random_metadata(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 10, 1, seed=2)\n        ts = tsutil.add_random_metadata(ts)\n        assert ts.num_mutations > 10\n        self.verify_removal(ts, [])\n        self.verify_removal(ts, [0, 2, 4, 8])\n        self.verify_removal(ts, range(5))\n\n    def test_jukes_cantor_many_mutations(self):\n        ts = msprime.simulate(2, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 10, mu=10, seed=2)\n        assert ts.num_mutations > 100\n        self.verify_removal(ts, [1, 3, 5, 7])\n        self.verify_removal(ts, [1])\n        self.verify_removal(ts, [9])\n\n    def test_jukes_cantor_one_site(self):\n        ts = msprime.simulate(5, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 1, mu=10, seed=2)\n        assert ts.num_mutations > 10\n        self.verify_removal(ts, [])\n        self.verify_removal(ts, [0])\n\n\nclass TestKeepSingleInterval(unittest.TestCase):\n    \"\"\"\n    Tests for cutting up tree sequences along the genome.\n    \"\"\"\n\n    def test_slice_unchanged(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2, mutation_rate=2)\n        tables = ts.dump_tables()\n        tables.edges.packset_metadata([b\"edge {i}\" for i in range(ts.num_edges)])\n        ts1 = tables.tree_sequence()\n        ts2 = ts1.keep_intervals([[0, 1]], simplify=False, record_provenance=False)\n        ts1.tables.assert_equals(ts2.tables)\n\n    def test_slice_by_tree_positions(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2, mutation_rate=2)\n        breakpoints = list(ts.breakpoints())\n\n        # Keep the last 3 trees (from 4th last breakpoint onwards)\n        ts_sliced = ts.keep_intervals([[breakpoints[-4], ts.sequence_length]])\n        assert ts_sliced.num_trees == 4\n        assert ts_sliced.num_edges < ts.num_edges\n        self.assertAlmostEqual(ts_sliced.sequence_length, 1.0)\n        last_3_mutations = 0\n        for tree_index in range(-3, 0):\n            last_3_mutations += ts.at_index(tree_index).num_mutations\n        assert ts_sliced.num_mutations == last_3_mutations\n\n        # Keep the first 3 trees\n        ts_sliced = ts.keep_intervals([[0, breakpoints[3]]])\n        assert ts_sliced.num_trees == 4\n        assert ts_sliced.num_edges < ts.num_edges\n        self.assertAlmostEqual(ts_sliced.sequence_length, 1)\n        first_3_mutations = 0\n        for tree_index in range(0, 3):\n            first_3_mutations += ts.at_index(tree_index).num_mutations\n        assert ts_sliced.num_mutations == first_3_mutations\n\n        # Slice out the middle\n        ts_sliced = ts.keep_intervals([[breakpoints[3], breakpoints[-4]]])\n        assert ts_sliced.num_trees == ts.num_trees - 4\n        assert ts_sliced.num_edges < ts.num_edges\n        self.assertAlmostEqual(ts_sliced.sequence_length, 1.0)\n        assert (\n            ts_sliced.num_mutations\n            == ts.num_mutations - first_3_mutations - last_3_mutations\n        )\n\n    def test_slice_by_position(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2, mutation_rate=2)\n        ts_sliced = ts.keep_intervals([[0.4, 0.6]])\n        positions = ts.tables.sites.position\n        assert ts_sliced.num_sites == np.sum((positions >= 0.4) & (positions < 0.6))\n\n    def test_slice_unsimplified(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2, mutation_rate=2)\n        ts_sliced = ts.keep_intervals([[0.4, 0.6]], simplify=True)\n        assert ts.num_nodes != ts_sliced.num_nodes\n        self.assertAlmostEqual(ts_sliced.sequence_length, 1.0)\n        ts_sliced = ts.keep_intervals([[0.4, 0.6]], simplify=False)\n        assert ts.num_nodes == ts_sliced.num_nodes\n        self.assertAlmostEqual(ts_sliced.sequence_length, 1.0)\n\n    def test_slice_coordinates(self):\n        ts = msprime.simulate(5, random_seed=1, recombination_rate=2, mutation_rate=2)\n        ts_sliced = ts.keep_intervals([[0.4, 0.6]])\n        self.assertAlmostEqual(ts_sliced.sequence_length, 1)\n        assert ts_sliced.num_trees != ts.num_trees\n        assert ts_sliced.at_index(0).total_branch_length == 0\n        assert ts_sliced.at(0).total_branch_length == 0\n        assert ts_sliced.at(0.399).total_branch_length == 0\n        assert ts_sliced.at(0.4).total_branch_length != 0\n        assert ts_sliced.at(0.5).total_branch_length != 0\n        assert ts_sliced.at(0.599).total_branch_length != 0\n        assert ts_sliced.at(0.6).total_branch_length == 0\n        assert ts_sliced.at(0.999).total_branch_length == 0\n        assert ts_sliced.at_index(-1).total_branch_length == 0\n\n    def test_slice_migrations(self):\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 0.05], [0.05, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            record_migrations=True,\n            recombination_rate=2,\n            random_seed=1,\n        )\n        tables = ts.dump_tables()\n        tables.migrations.packset_metadata(\n            [b\"migration {i}\" for i in range(ts.num_migrations)]\n        )\n        ts = tables.tree_sequence()\n\n        ts_sliced = ts.keep_intervals([[0, 1]], simplify=False)\n        assert ts.tables.migrations == ts_sliced.tables.migrations\n        ts_sliced = ts.keep_intervals([[0, 0.5]], simplify=False)\n        assert np.max(ts_sliced.tables.migrations.right) <= 0.5\n        assert ts.num_migrations > ts_sliced.num_migrations\n\n        ts_sliced = ts.keep_intervals([[0.5, 1]], simplify=False)\n        assert np.max(ts_sliced.tables.migrations.left) >= 0.5\n        assert ts.num_migrations > ts_sliced.num_migrations\n\n        ts_sliced = ts.keep_intervals([[0.4, 0.6]], simplify=False)\n        assert np.max(ts_sliced.tables.migrations.right) <= 0.6\n        assert np.max(ts_sliced.tables.migrations.left) >= 0.4\n        assert ts.num_migrations > ts_sliced.num_migrations\n\n\nclass TestKeepIntervals(TopologyTestCase):\n    \"\"\"\n    Tests for keep_intervals operation, where we slice out multiple disjoint\n    intervals concurrently.\n    \"\"\"\n\n    def example_intervals(self, tables):\n        L = tables.sequence_length\n        yield []\n        yield [(0, L)]\n        yield [(0, L / 2), (L / 2, L)]\n        yield [(0, 0.25 * L), (0.75 * L, L)]\n        yield [(0.25 * L, L)]\n        yield [(0.25 * L, 0.5 * L)]\n        yield [(0.25 * L, 0.5 * L), (0.75 * L, 0.8 * L)]\n\n    def do_keep_intervals(\n        self, tables, intervals, simplify=True, record_provenance=True\n    ):\n        t1 = tables.copy()\n        simple_keep_intervals(t1, intervals, simplify, record_provenance)\n        t2 = tables.copy()\n        t2.keep_intervals(intervals, simplify, record_provenance)\n        t1.assert_equals(t2, ignore_timestamps=True)\n        return t2\n\n    def test_migration_error(self):\n        # keep_intervals should fail if simplify=True (default)\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 0.05], [0.05, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            record_migrations=True,\n            recombination_rate=2,\n            random_seed=1,\n        )\n        with pytest.raises(tskit.LibraryError):\n            ts.dump_tables().keep_intervals([[0, 1]])\n\n    def test_bad_intervals(self):\n        tables = tskit.TableCollection(10)\n        bad_intervals = [[[1, 1]], [[-1, 0]], [[0, 11]], [[0, 5], [4, 6]]]\n        for intervals in bad_intervals:\n            with pytest.raises(ValueError):\n                tables.keep_intervals(intervals)\n            with pytest.raises(ValueError):\n                tables.delete_intervals(intervals)\n\n    def test_one_interval(self):\n        ts = msprime.simulate(\n            10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2\n        )\n        tables = ts.dump_tables()\n        intervals = [(0.3, 0.7)]\n        for simplify in (True, False):\n            for rec_prov in (True, False):\n                self.do_keep_intervals(tables, intervals, simplify, rec_prov)\n\n    def test_two_intervals(self):\n        ts = msprime.simulate(\n            10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2\n        )\n        tables = ts.dump_tables()\n        intervals = [(0.1, 0.2), (0.8, 0.9)]\n        for simplify in (True, False):\n            for rec_prov in (True, False):\n                self.do_keep_intervals(tables, intervals, simplify, rec_prov)\n\n    def test_ten_intervals(self):\n        ts = msprime.simulate(\n            10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2\n        )\n        tables = ts.dump_tables()\n        intervals = [(x, x + 0.05) for x in np.arange(0.0, 1.0, 0.1)]\n        for simplify in (True, False):\n            for rec_prov in (True, False):\n                self.do_keep_intervals(tables, intervals, simplify, rec_prov)\n\n    def test_hundred_intervals(self):\n        ts = msprime.simulate(\n            10, random_seed=self.random_seed, recombination_rate=2, mutation_rate=2\n        )\n        tables = ts.dump_tables()\n        intervals = [(x, x + 0.005) for x in np.arange(0.0, 1.0, 0.01)]\n        for simplify in (True, False):\n            for rec_prov in (True, False):\n                self.do_keep_intervals(tables, intervals, simplify, rec_prov)\n\n    def test_regular_intervals(self):\n        ts = msprime.simulate(3, random_seed=1234, recombination_rate=2, mutation_rate=2)\n        tables = ts.dump_tables()\n        eps = 0.0125\n        for num_intervals in range(2, 10):\n            breaks = np.linspace(0, ts.sequence_length, num=num_intervals)\n            intervals = [(x, x + eps) for x in breaks[:-1]]\n            self.do_keep_intervals(tables, intervals)\n\n    def test_no_edges_sites(self):\n        tables = tskit.TableCollection(1.0)\n        tables.sites.add_row(0.1, \"A\")\n        tables.sites.add_row(0.2, \"T\")\n        for intervals in self.example_intervals(tables):\n            assert len(tables.sites) == 2\n            diced = self.do_keep_intervals(tables, intervals)\n            assert diced.sequence_length == 1\n            assert len(diced.edges) == 0\n            assert len(diced.sites) == 0\n\n    def verify(self, tables):\n        for intervals in self.example_intervals(tables):\n            for simplify in [True, False]:\n                self.do_keep_intervals(tables, intervals, simplify=simplify)\n\n    def test_empty_tables(self):\n        tables = tskit.TableCollection(1.0)\n        self.verify(tables)\n\n    def test_single_tree_jukes_cantor(self):\n        ts = msprime.simulate(6, random_seed=1, mutation_rate=1)\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        self.verify(ts.tables)\n\n    def test_single_tree_multichar_mutations(self):\n        ts = msprime.simulate(6, random_seed=1, mutation_rate=1)\n        ts = tsutil.insert_multichar_mutations(ts)\n        self.verify(ts.tables)\n\n    def test_many_trees_infinite_sites(self):\n        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=2, random_seed=1)\n        assert ts.num_sites > 0\n        assert ts.num_trees > 2\n        self.verify(ts.tables)\n\n    def test_many_trees_sequence_length_infinite_sites(self):\n        for L in [0.5, 1.5, 3.3333]:\n            ts = msprime.simulate(\n                6, length=L, recombination_rate=2, mutation_rate=1, random_seed=1\n            )\n            self.verify(ts.tables)\n\n    def test_wright_fisher_unsimplified(self):\n        tables = wf.wf_sim(\n            4,\n            5,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=10,\n        )\n        tables.sort()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.05, random_seed=234)\n        assert ts.num_sites > 0\n        self.verify(ts.tables)\n\n    def test_wright_fisher_initial_generation(self):\n        tables = wf.wf_sim(\n            6, 5, seed=3, deep_history=True, initial_generation_samples=True, num_loci=2\n        )\n        tables.sort()\n        tables.simplify()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.08, random_seed=2)\n        assert ts.num_sites > 0\n        self.verify(ts.tables)\n\n    def test_wright_fisher_initial_generation_no_deep_history(self):\n        tables = wf.wf_sim(\n            7,\n            15,\n            seed=202,\n            deep_history=False,\n            initial_generation_samples=True,\n            num_loci=5,\n        )\n        tables.sort()\n        tables.simplify()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.2, random_seed=2)\n        assert ts.num_sites > 0\n        self.verify(ts.tables)\n\n    def test_wright_fisher_unsimplified_multiple_roots(self):\n        tables = wf.wf_sim(\n            8,\n            15,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=20,\n        )\n        tables.sort()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.006, random_seed=2)\n        assert ts.num_sites > 0\n        self.verify(ts.tables)\n\n    def test_wright_fisher_simplified(self):\n        tables = wf.wf_sim(\n            9,\n            10,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        ts = msprime.mutate(ts, rate=0.2, random_seed=1234)\n        assert ts.num_sites > 0\n        self.verify(ts.tables)\n\n\nclass TestKeepDeleteIntervalsExamples:\n    \"\"\"\n    Simple examples of keep/delete intervals at work.\n    \"\"\"\n\n    def test_tables_single_tree_keep_middle(self):\n        ts = msprime.simulate(10, random_seed=2)\n        t_keep = ts.dump_tables()\n        t_keep.keep_intervals([[0.25, 0.5]], record_provenance=False)\n        t_delete = ts.dump_tables()\n        t_delete.delete_intervals([[0, 0.25], [0.5, 1.0]], record_provenance=False)\n        assert t_keep == t_delete\n\n    def test_tables_single_tree_delete_middle(self):\n        ts = msprime.simulate(10, random_seed=2)\n        t_keep = ts.dump_tables()\n        t_keep.delete_intervals([[0.25, 0.5]], record_provenance=False)\n        t_delete = ts.dump_tables()\n        t_delete.keep_intervals([[0, 0.25], [0.5, 1.0]], record_provenance=False)\n        assert t_keep == t_delete\n\n    def test_ts_single_tree_keep_middle(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts_keep = ts.keep_intervals([[0.25, 0.5]], record_provenance=False)\n        ts_delete = ts.delete_intervals([[0, 0.25], [0.5, 1.0]], record_provenance=False)\n        assert ts_keep == ts_delete\n\n    def test_ts_single_tree_delete_middle(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts_keep = ts.delete_intervals([[0.25, 0.5]], record_provenance=False)\n        ts_delete = ts.keep_intervals([[0, 0.25], [0.5, 1.0]], record_provenance=False)\n        assert ts_keep == ts_delete\n\n    def test_ts_migrations(self):\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 0.05], [0.05, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            record_migrations=True,\n            recombination_rate=2,\n            random_seed=1,\n        )\n        ts_keep = ts.delete_intervals(\n            [[0.25, 0.5]], record_provenance=False, simplify=False\n        )\n        ts_delete = ts.keep_intervals(\n            [[0, 0.25], [0.5, 1.0]], record_provenance=False, simplify=False\n        )\n        assert ts_keep == ts_delete\n\n\nclass TestTrim(unittest.TestCase):\n    \"\"\"\n    Test the trimming functionality\n    \"\"\"\n\n    def add_mutations(self, ts, position, ancestral_state, derived_states, nodes):\n        \"\"\"\n        Create a site at the specified position and assign mutations to the specified\n        nodes (could be sequential mutations)\n        \"\"\"\n        tables = ts.dump_tables()\n        site = tables.sites.add_row(position, ancestral_state)\n        for state, node in zip(derived_states, nodes):\n            tables.mutations.add_row(site, node, state)\n        tables.sort()\n        tables.build_index()\n        tables.compute_mutation_parents()\n        return tables.tree_sequence()\n\n    def verify_sites(self, source_tree, trimmed_tree, position_offset):\n        source_sites = list(source_tree.sites())\n        trimmed_sites = list(trimmed_tree.sites())\n        assert len(source_sites) == len(trimmed_sites)\n        for source_site, trimmed_site in zip(source_sites, trimmed_sites):\n            self.assertAlmostEqual(\n                source_site.position, position_offset + trimmed_site.position\n            )\n            assert source_site.ancestral_state == trimmed_site.ancestral_state\n            assert source_site.metadata == trimmed_site.metadata\n            assert len(source_site.mutations) == len(trimmed_site.mutations)\n            for source_mut, trimmed_mut in zip(\n                source_site.mutations, trimmed_site.mutations\n            ):\n                assert source_mut.node == trimmed_mut.node\n                assert source_mut.derived_state == trimmed_mut.derived_state\n                assert source_mut.metadata == trimmed_mut.metadata\n                # mutation.parent id may have changed after deleting redundant mutations\n                if source_mut.parent == trimmed_mut.parent == tskit.NULL:\n                    pass\n                else:\n                    assert (\n                        source_tree.tree_sequence.mutation(source_mut.parent).node\n                        == trimmed_tree.tree_sequence.mutation(trimmed_mut.parent).node\n                    )\n\n    def verify_ltrim(self, source_ts, trimmed_ts):\n        deleted_span = source_ts.first().span\n        self.assertAlmostEqual(\n            source_ts.sequence_length, trimmed_ts.sequence_length + deleted_span\n        )\n        assert source_ts.num_trees == trimmed_ts.num_trees + 1\n        for j in range(trimmed_ts.num_trees):\n            source_tree = source_ts.at_index(j + 1)\n            trimmed_tree = trimmed_ts.at_index(j)\n            assert source_tree.parent_dict == trimmed_tree.parent_dict\n            self.assertAlmostEqual(source_tree.span, trimmed_tree.span)\n            self.assertAlmostEqual(\n                source_tree.interval.left, trimmed_tree.interval.left + deleted_span\n            )\n            self.verify_sites(source_tree, trimmed_tree, deleted_span)\n\n    def verify_rtrim(self, source_ts, trimmed_ts):\n        deleted_span = source_ts.last().span\n        self.assertAlmostEqual(\n            source_ts.sequence_length, trimmed_ts.sequence_length + deleted_span\n        )\n        assert source_ts.num_trees == trimmed_ts.num_trees + 1\n        for j in range(trimmed_ts.num_trees):\n            source_tree = source_ts.at_index(j)\n            trimmed_tree = trimmed_ts.at_index(j)\n            assert source_tree.parent_dict == trimmed_tree.parent_dict\n            assert source_tree.interval == trimmed_tree.interval\n            self.verify_sites(source_tree, trimmed_tree, 0)\n\n    def clear_left_mutate(self, ts, left, num_sites):\n        \"\"\"\n        Clear the edges from a tree sequence left of the specified coordinate\n        and add in num_sites regularly spaced sites into the cleared region.\n        \"\"\"\n        new_ts = ts.delete_intervals([[0.0, left]])\n        for j, x in enumerate(np.linspace(0, left, num_sites, endpoint=False)):\n            new_ts = self.add_mutations(new_ts, x, \"A\" * j, [\"T\"] * j, range(j + 1))\n        return new_ts\n\n    def clear_right_mutate(self, ts, right, num_sites):\n        \"\"\"\n        Clear the edges from a tree sequence right of the specified coordinate\n        and add in num_sites regularly spaced sites into the cleared region.\n        \"\"\"\n        new_ts = ts.delete_intervals([[right, ts.sequence_length]])\n        for j, x in enumerate(\n            np.linspace(right, ts.sequence_length, num_sites, endpoint=False)\n        ):\n            new_ts = self.add_mutations(new_ts, x, \"A\" * j, [\"T\"] * j, range(j + 1))\n        return new_ts\n\n    def clear_left_right_234(self, left, right):\n        \"\"\"\n        Clear edges to left and right and add 2 mutations at the same site into the left\n        cleared region, 3 at the same site into the untouched region, and 4 into the\n        right cleared region.\n        \"\"\"\n        assert 0.0 < left < right < 1.0\n        ts = msprime.simulate(10, recombination_rate=10, random_seed=2)\n        left_pos = np.mean([0.0, left])\n        left_root = ts.at(left_pos).root\n        mid_pos = np.mean([left, right])\n        mid_root = ts.at(mid_pos).root\n        right_pos = np.mean([right, ts.sequence_length])\n        right_root = ts.at(right_pos).root\n        # Clear\n        ts = ts.keep_intervals([[left, right]], simplify=False)\n        ts = self.add_mutations(ts, left_pos, \"A\", [\"T\", \"C\"], [left_root, 0])\n        ts = self.add_mutations(ts, mid_pos, \"T\", [\"A\", \"C\", \"G\"], [mid_root, 0, 1])\n        ts = self.add_mutations(\n            ts, right_pos, \"X\", [\"T\", \"C\", \"G\", \"A\"], [right_root, 0, 1, 2]\n        )\n        assert np.min(ts.tables.edges.left) != 0\n        assert ts.num_mutations == 9\n        assert ts.num_sites == 3\n        return ts\n\n    def migration_sim(self):\n        pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)]\n        migration_matrix = [[0, 0.05], [0.05, 0]]\n        ts = msprime.simulate(\n            population_configurations=pop_configs,\n            migration_matrix=migration_matrix,\n            record_migrations=True,\n            recombination_rate=2,\n            random_seed=1,\n        )\n        return ts\n\n    def test_ltrim_single_tree(self):\n        ts = msprime.simulate(10, mutation_rate=12, random_seed=2)\n        ts = self.clear_left_mutate(ts, 0.5, 10)\n        self.verify_ltrim(ts, ts.ltrim())\n\n    def test_ltrim_single_tree_no_mutations(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = self.clear_left_mutate(ts, 0.5, 0)\n        self.verify_ltrim(ts, ts.ltrim())\n\n    def test_ltrim_single_tree_tiny_left(self):\n        ts = msprime.simulate(10, mutation_rate=12, random_seed=2)\n        ts = self.clear_left_mutate(ts, 1e-200, 10)\n        self.verify_ltrim(ts, ts.ltrim())\n\n    def test_ltrim_many_trees(self):\n        ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2)\n        ts = self.clear_left_mutate(ts, 0.5, 10)\n        self.verify_ltrim(ts, ts.ltrim())\n\n    def test_ltrim_many_trees_left_min(self):\n        ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2)\n        ts = self.clear_left_mutate(ts, sys.float_info.min, 10)\n        self.verify_ltrim(ts, ts.ltrim())\n\n    def test_ltrim_many_trees_left_epsilon(self):\n        ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2)\n        ts = self.clear_left_mutate(ts, sys.float_info.epsilon, 0)\n        self.verify_ltrim(ts, ts.ltrim())\n\n    def test_ltrim_empty(self):\n        ts = msprime.simulate(2, random_seed=2)\n        ts = ts.delete_intervals([[0, 1]])\n        with pytest.raises(ValueError):\n            ts.ltrim()\n\n    def test_ltrim_multiple_mutations(self):\n        ts = self.clear_left_right_234(0.1, 0.5)\n        trimmed_ts = ts.ltrim()\n        self.assertAlmostEqual(trimmed_ts.sequence_length, 0.9)\n        assert trimmed_ts.num_sites == 2\n        assert trimmed_ts.num_mutations == 7  # We should have deleted 2\n        assert np.min(trimmed_ts.tables.edges.left) == 0\n        self.verify_ltrim(ts, trimmed_ts)\n\n    def test_ltrim_migrations(self):\n        ts = self.migration_sim()\n        ts = ts.delete_intervals([[0, 0.1]], simplify=False)\n        trimmed_ts = ts.ltrim()\n        assert np.array_equal(\n            trimmed_ts.tables.migrations.left, ts.tables.migrations.left - 0.1\n        )\n        assert np.array_equal(\n            trimmed_ts.tables.migrations.right, ts.tables.migrations.right - 0.1\n        )\n\n    def test_rtrim_single_tree(self):\n        ts = msprime.simulate(10, mutation_rate=12, random_seed=2)\n        ts = self.clear_right_mutate(ts, 0.5, 10)\n        self.verify_rtrim(ts, ts.rtrim())\n\n    def test_rtrim_single_tree_no_mutations(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = self.clear_right_mutate(ts, 0.5, 0)\n        self.verify_rtrim(ts, ts.rtrim())\n\n    def test_rtrim_single_tree_tiny_left(self):\n        ts = msprime.simulate(10, mutation_rate=12, random_seed=2)\n        ts = self.clear_right_mutate(ts, 1e-200, 10)\n        self.verify_rtrim(ts, ts.rtrim())\n\n    def test_rtrim_many_trees(self):\n        ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2)\n        ts = self.clear_right_mutate(ts, 0.5, 10)\n        self.verify_rtrim(ts, ts.rtrim())\n\n    def test_rtrim_many_trees_left_min(self):\n        ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2)\n        ts = self.clear_right_mutate(ts, sys.float_info.min, 10)\n        self.verify_rtrim(ts, ts.rtrim())\n\n    def test_rtrim_many_trees_left_epsilon(self):\n        ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2)\n        ts = self.clear_right_mutate(ts, sys.float_info.epsilon, 0)\n        self.verify_rtrim(ts, ts.rtrim())\n\n    def test_rtrim_empty(self):\n        ts = msprime.simulate(2, random_seed=2)\n        ts = ts.delete_intervals([[0, 1]])\n        with pytest.raises(ValueError):\n            ts.rtrim()\n\n    def test_rtrim_multiple_mutations(self):\n        ts = self.clear_left_right_234(0.1, 0.5)\n        trimmed_ts = ts.rtrim()\n        self.assertAlmostEqual(trimmed_ts.sequence_length, 0.5)\n        assert trimmed_ts.num_sites == 2\n        assert trimmed_ts.num_mutations == 5  # We should have deleted 4\n        assert np.max(trimmed_ts.tables.edges.right) == trimmed_ts.tables.sequence_length\n        self.verify_rtrim(ts, trimmed_ts)\n\n    def test_rtrim_migrations(self):\n        ts = self.migration_sim()\n        ts = ts.delete_intervals([[0.9, 1]], simplify=False)\n        trimmed_ts = ts.rtrim()\n        trimmed_rights = trimmed_ts.tables.migrations.right\n        assert np.max(trimmed_rights) == 0.9\n\n    def test_trim_multiple_mutations(self):\n        ts = self.clear_left_right_234(0.1, 0.5)\n        trimmed_ts = ts.trim()\n        self.assertAlmostEqual(trimmed_ts.sequence_length, 0.4)\n        assert trimmed_ts.num_mutations == 3\n        assert trimmed_ts.num_sites == 1\n        assert np.min(trimmed_ts.tables.edges.left) == 0\n        assert np.max(trimmed_ts.tables.edges.right) == trimmed_ts.tables.sequence_length\n\n    def test_trims_no_effect(self):\n        # Deleting from middle should have no effect on any trim function\n        ts = msprime.simulate(10, recombination_rate=2, mutation_rate=50, random_seed=2)\n        ts = ts.delete_intervals([[0.1, 0.5]])\n        trimmed_ts = ts.ltrim(record_provenance=False)\n        assert ts == trimmed_ts\n        trimmed_ts = ts.rtrim(record_provenance=False)\n        assert ts == trimmed_ts\n        trimmed_ts = ts.trim(record_provenance=False)\n        assert ts == trimmed_ts\n\n    def test_failure_with_migrations(self):\n        # All trim functions fail if migrations extend further than rightmost or\n        # leftmost edges\n        ts = msprime.simulate(10, recombination_rate=2, random_seed=2)\n        ts = ts.keep_intervals([[0.1, 0.5]])\n        tables = ts.dump_tables()\n        tables.migrations.add_row(0, 1, 0, 0, 0, 0)\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError):\n            ts.ltrim()\n        with pytest.raises(ValueError):\n            ts.rtrim()\n        with pytest.raises(ValueError):\n            ts.trim()\n\n    def test_reference_sequence(self):\n        # Test that we fail if there is a reference sequence\n        tables = tskit.TableCollection(3.0)\n        tables.reference_sequence.data = \"ABC\"\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError, match=\"reference sequence\"):\n            ts.ltrim()\n        with pytest.raises(ValueError, match=\"reference sequence\"):\n            ts.rtrim()\n        with pytest.raises(ValueError, match=\"reference sequence\"):\n            ts.trim()\n\n\nclass TestShift:\n    \"\"\"\n    Test the shift functionality\n    \"\"\"\n\n    @pytest.mark.parametrize(\"shift\", [-0.5, 0, 0.5])\n    def test_shift(self, shift):\n        ts = tskit.Tree.generate_comb(2, span=2).tree_sequence\n        tables = ts.dump_tables()\n        tables.delete_intervals([[0, 1]], simplify=False)\n        tables.sites.add_row(1.5, \"A\")\n        ts = tables.tree_sequence()\n        ts = ts.shift(shift)\n        assert ts.sequence_length == 2 + shift\n        assert np.min(ts.tables.edges.left) == 1 + shift\n        assert np.max(ts.tables.edges.right) == 2 + shift\n        assert np.all(ts.tables.sites.position == 1.5 + shift)\n        assert len(list(ts.trees())) == ts.num_trees\n\n    def test_sequence_length(self):\n        ts = tskit.Tree.generate_comb(2).tree_sequence\n        ts = ts.shift(1, sequence_length=3)\n        assert ts.sequence_length == 3\n        ts = ts.shift(-1, sequence_length=1)\n        assert ts.sequence_length == 1\n\n    def test_empty(self):\n        empty_ts = tskit.TableCollection(1.0).tree_sequence()\n        empty_ts = empty_ts.shift(1)\n        assert empty_ts.sequence_length == 2\n        empty_ts = empty_ts.shift(-1.5)\n        assert empty_ts.sequence_length == 0.5\n        assert empty_ts.num_nodes == 0\n\n    def test_migrations(self):\n        tables = tskit.Tree.generate_comb(2, span=2).tree_sequence.dump_tables()\n        tables.populations.add_row()\n        tables.migrations.add_row(0, 1, 0, 0, 0, 0)\n        ts = tables.tree_sequence().shift(10)\n        assert np.all(ts.tables.migrations.left == 10)\n        assert np.all(ts.tables.migrations.right == 11)\n\n    def test_provenance(self):\n        ts = tskit.Tree.generate_comb(2).tree_sequence\n        ts = ts.shift(1, record_provenance=False)\n        params = json.loads(ts.provenance(-1).record)[\"parameters\"]\n        assert params[\"command\"] != \"shift\"\n        ts = ts.shift(1, sequence_length=9)\n        params = json.loads(ts.provenance(-1).record)[\"parameters\"]\n        assert params[\"command\"] == \"shift\"\n        assert params[\"value\"] == 1\n        assert params[\"sequence_length\"] == 9\n\n    def test_too_negative(self):\n        ts = tskit.Tree.generate_comb(2).tree_sequence\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_BAD_SEQUENCE_LENGTH\"):\n            ts.shift(-1)\n\n    def test_bad_seq_len(self):\n        ts = tskit.Tree.generate_comb(2).tree_sequence\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_RIGHT_GREATER_SEQ_LENGTH\"):\n            ts.shift(1, sequence_length=1)\n\n    def test_reference_sequence(self):\n        # Test that we fail if there is a reference sequence\n        tables = tskit.TableCollection(3.0)\n        tables.reference_sequence.data = \"ABC\"\n        ts = tables.tree_sequence()\n        with pytest.raises(ValueError, match=\"reference sequence\"):\n            ts.shift(1)\n\n\nclass TestConcatenate:\n    def test_simple(self):\n        ts1 = tskit.Tree.generate_comb(5, span=2).tree_sequence\n        ts1 = msprime.sim_mutations(ts1, rate=1, random_seed=1)\n        ts2 = tskit.Tree.generate_balanced(5, arity=3, span=3).tree_sequence\n        ts2 = msprime.sim_mutations(ts2, rate=1, random_seed=2)\n        assert ts1.num_samples == ts2.num_samples\n        assert ts1.num_nodes != ts2.num_nodes\n        joint_ts = ts1.concatenate(ts2)\n        assert joint_ts.num_nodes == ts1.num_nodes + ts2.num_nodes - 5\n        assert joint_ts.sequence_length == ts1.sequence_length + ts2.sequence_length\n        assert joint_ts.num_samples == ts1.num_samples\n        assert joint_ts.num_sites == ts1.num_sites + ts2.num_sites\n        assert joint_ts.num_mutations == ts1.num_mutations + ts2.num_mutations\n        ts3 = joint_ts.delete_intervals([[2, 5]]).rtrim()\n        # Have to simplify here, to remove the redundant nodes\n        ts3.tables.assert_equals(ts1.tables, ignore_provenance=True)\n        ts4 = joint_ts.delete_intervals([[0, 2]]).ltrim()\n        ts4.tables.assert_equals(ts2.tables, ignore_provenance=True)\n\n    def test_metadata(self, ts_fixture):\n        tables = ts_fixture.dump_tables()\n        tables.reference_sequence.clear()\n        tables.migrations.clear()\n        ts = tables.tree_sequence()\n        num_sites = ts.num_sites\n        assert num_sites > 0\n        joint_ts = ts.concatenate(ts)\n        for s1, s2 in zip(range(num_sites), range(num_sites, num_sites * 2)):\n            site1 = joint_ts.site(s1)\n            site2 = joint_ts.site(s2)\n            assert site1.metadata == site2.metadata\n            assert site1.ancestral_state == site2.ancestral_state\n            assert len(site1.mutations) == len(site2.mutations)\n            for m1, m2 in zip(site1.mutations, site2.mutations):\n                assert m1.metadata == m2.metadata\n                assert m1.derived_state == m2.derived_state\n                assert m1.time == m2.time\n        ns_nodes = np.where(ts.tables.nodes.flags & tskit.NODE_IS_SAMPLE == 0)[0]\n        assert len(ns_nodes) > 0\n        for u in ns_nodes:\n            node1 = ts.node(u)\n            node2 = joint_ts.node(u)\n            assert node1.metadata == node2.metadata\n            assert node1.flags == node2.flags\n            assert node1.time == node2.time\n            ind1 = joint_ts.individual(node1.individual)\n            ind2 = joint_ts.individual(node2.individual)\n            assert ind1.metadata == ind2.metadata\n            assert ind1.flags == ind2.flags\n            assert np.all(ind1.location == ind2.location)\n\n    def test_multiple(self):\n        np.random.seed(42)\n        ts3 = [\n            tskit.Tree.generate_comb(5, span=2).tree_sequence,\n            tskit.Tree.generate_balanced(5, arity=3, span=3).tree_sequence,\n            tskit.Tree.generate_star(5, span=5).tree_sequence,\n        ]\n        for i in range(1, len(ts3)):\n            # shuffle the sample nodes so they don't have the same IDs\n            ts3[i] = ts3[i].subset(np.random.permutation(ts3[i].num_nodes))\n        assert not np.all(ts3[0].samples() == ts3[1].samples())\n        assert not np.all(ts3[0].samples() == ts3[2].samples())\n        assert not np.all(ts3[1].samples() == ts3[2].samples())\n        ts = ts3[0].concatenate(*ts3[1:])\n        assert ts.sequence_length == sum([t.sequence_length for t in ts3])\n        assert ts.num_nodes - ts.num_samples == sum(\n            [t.num_nodes - t.num_samples for t in ts3]\n        )\n        assert np.all(ts.samples() == ts3[0].samples())\n\n    def test_empty(self):\n        empty_ts = tskit.TableCollection(10).tree_sequence()\n        ts = empty_ts.concatenate(empty_ts, empty_ts, empty_ts)\n        assert ts.num_nodes == 0\n        assert ts.sequence_length == 40\n\n    def test_samples_at_end(self):\n        ts1 = tskit.Tree.generate_comb(5, span=2).tree_sequence\n        ts2 = tskit.Tree.generate_balanced(5, arity=3, span=3).tree_sequence\n        # reverse the node order\n        ts1 = ts1.subset(np.arange(ts1.num_nodes)[::-1])\n        assert ts1.num_samples == ts2.num_samples\n        assert np.all(ts1.samples() != ts2.samples())\n        joint_ts = ts1.concatenate(ts2)\n        assert joint_ts.num_samples == ts1.num_samples\n        assert np.all(joint_ts.samples() == ts1.samples())\n\n    def test_internal_samples(self):\n        tables = tskit.Tree.generate_comb(4, span=2).tree_sequence.dump_tables()\n        nodes_flags = tables.nodes.flags\n        nodes_flags[:] = tskit.NODE_IS_SAMPLE\n        nodes_flags[-1] = 0  # Only root is not a sample\n        tables.nodes.flags = nodes_flags\n        ts = tables.tree_sequence()\n        joint_ts = ts.concatenate(ts)\n        assert joint_ts.num_samples == ts.num_samples\n        assert joint_ts.num_nodes == ts.num_nodes + 1\n        assert joint_ts.sequence_length == ts.sequence_length * 2\n\n    def test_some_shared_samples(self):\n        tables = tskit.Tree.generate_comb(5).tree_sequence.dump_tables()\n        tables.nodes[5] = tables.nodes[5].replace(flags=tskit.NODE_IS_SAMPLE)\n        ts1 = tables.tree_sequence()\n        tables = tskit.Tree.generate_balanced(5).tree_sequence.dump_tables()\n        tables.nodes[5] = tables.nodes[5].replace(flags=tskit.NODE_IS_SAMPLE)\n        ts2 = tables.tree_sequence()\n        assert ts1.num_samples == ts2.num_samples\n        joint_ts = ts1.concatenate(ts2)\n        assert joint_ts.num_samples == ts1.num_samples\n        assert joint_ts.num_edges == ts1.num_edges + ts2.num_edges\n        for tree in joint_ts.trees():\n            assert tree.num_roots == 1\n\n    @pytest.mark.parametrize(\"simplify\", [True, False])\n    def test_wf_sim(self, simplify):\n        # Test that we can split & concat a wf_sim ts, which has internal samples\n        tables = wf.wf_sim(\n            6,\n            5,\n            seed=3,\n            deep_history=True,\n            initial_generation_samples=True,\n            num_loci=10,\n        )\n        tables.sort()\n        tables.simplify()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.05, random_seed=234)\n        assert ts.num_trees > 2\n        assert len(np.unique(ts.nodes_time[ts.samples()])) > 1\n        ts1 = ts.keep_intervals([[0, 4.5]], simplify=False).trim()\n        ts2 = ts.keep_intervals([[4.5, ts.sequence_length]], simplify=False).trim()\n        if simplify:\n            ts1 = ts1.simplify(filter_nodes=False)\n            ts2, node_map = ts2.simplify(map_nodes=True)\n            node_mapping = np.zeros_like(node_map, shape=ts2.num_nodes)\n            kept = node_map != tskit.NULL\n            node_mapping[node_map[kept]] = np.arange(len(node_map))[kept]\n        else:\n            node_mapping = np.arange(ts.num_nodes)\n        ts_new = ts1.concatenate(ts2, node_mappings=[node_mapping]).simplify()\n        ts_new.tables.assert_equals(ts.tables, ignore_provenance=True)\n\n    def test_provenance(self):\n        ts = tskit.Tree.generate_comb(2).tree_sequence\n        ts = ts.concatenate(ts, record_provenance=False)\n        params = json.loads(ts.provenance(-1).record)[\"parameters\"]\n        assert params[\"command\"] != \"concatenate\"\n\n        ts = ts.concatenate(ts)\n        params = json.loads(ts.provenance(-1).record)[\"parameters\"]\n        assert params[\"command\"] == \"concatenate\"\n\n    def test_unequal_samples(self):\n        ts1 = tskit.Tree.generate_comb(5, span=2).tree_sequence\n        ts2 = tskit.Tree.generate_balanced(4, arity=3, span=3).tree_sequence\n        with pytest.raises(ValueError, match=\"must have the same number of samples\"):\n            ts1.concatenate(ts2)\n\n    def test_duplicate_ts(self):\n        ts1 = tskit.Tree.generate_comb(3, span=4).tree_sequence\n        ts = ts1.keep_intervals([[0, 1]]).trim()  # a quarter of the original\n        nm = np.arange(ts.num_nodes)  # all nodes identical\n        ts2 = ts.concatenate(ts, ts, ts, node_mappings=[nm] * 3, add_populations=False)\n        ts2 = ts2.simplify()  # squash the edges\n        assert ts1.equals(ts2, ignore_provenance=True)\n\n    def test_node_mappings_bad_len(self):\n        ts = tskit.Tree.generate_comb(3, span=2).tree_sequence\n        nm = np.arange(ts.num_nodes)\n        with pytest.raises(ValueError, match=\"same number of node_mappings\"):\n            ts.concatenate(ts, ts, ts, node_mappings=[nm, nm])\n\n\nclass TestMissingData:\n    \"\"\"\n    Test various aspects of missing data functionality\n    \"\"\"\n\n    # TODO tests for missing data currently sparse: more tests should go here\n\n    def ts_missing_middle(self):\n        # Simple ts with sample 0 missing a middle section\n        ts = msprime.simulate(4, mutation_rate=1, recombination_rate=4, random_seed=2)\n        tables = ts.dump_tables()\n        tables.edges.clear()\n        # mark the middle as missing\n        for e in ts.tables.edges:\n            if e.child == 0:\n                if e.left == 0.0:\n                    missing_from = e.right\n                elif e.right == 1.0:\n                    missing_to = e.left\n                else:\n                    continue  # omit this edge => node is isolated\n            tables.edges.append(e)\n        # Check we have non-missing to L & R\n        assert 0.0 < missing_from < 1.0\n        assert 0.0 < missing_to < 1.0\n        return tables.tree_sequence(), missing_from, missing_to\n\n    def test_is_isolated(self):\n        ts, missing_from, missing_to = self.ts_missing_middle()\n        for tree in ts.trees():\n            if tree.interval.right > missing_from and tree.interval.left < missing_to:\n                assert tree.is_isolated(0)\n                assert not tree.is_isolated(1)\n            else:\n                assert not tree.is_isolated(0)\n                assert not tree.is_isolated(1)\n            # A non-sample node is isolated if not in the tree\n            tree_nodes = set(tree.nodes())\n            for nonsample_node in np.setdiff1d(np.arange(ts.num_nodes), ts.samples()):\n                if nonsample_node in tree_nodes:\n                    assert not tree.is_isolated(nonsample_node)\n                else:\n                    assert tree.is_isolated(nonsample_node)\n\n    def test_is_isolated_bad(self):\n        ts, missing_from, missing_to = self.ts_missing_middle()\n        for tree in ts.trees():\n            with pytest.raises(ValueError):\n                tree.is_isolated(tskit.NULL)\n            with pytest.raises(ValueError):\n                tree.is_isolated(ts.num_nodes + 1)\n            with pytest.raises(ValueError):\n                tree.is_isolated(-2)\n            with pytest.raises(TypeError):\n                tree.is_isolated(None)\n            with pytest.raises(TypeError):\n                tree.is_isolated(\"abc\")\n            with pytest.raises(TypeError):\n                tree.is_isolated(1.1)\n"
  },
  {
    "path": "python/tests/test_tree_positioning.py",
    "content": "# MIT License\n#\n# Copyright (c) 2023 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for tree iterator schemes. Mostly used to develop the incremental\niterator infrastructure.\n\"\"\"\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests\nimport tskit\nfrom tests import tsutil\nfrom tests.tsutil import get_example_tree_sequences\n\n\nclass StatefulTree:\n    \"\"\"\n    Just enough functionality to mimic the low-level tree implementation\n    for testing of forward/backward moving.\n    \"\"\"\n\n    def __init__(self, ts):\n        self.ts = ts\n        self.tree_pos = tsutil.TreeIndexes(ts)\n        self.parent = [-1 for _ in range(ts.num_nodes)]\n\n    def __str__(self):\n        s = f\"parent: {self.parent}\\nposition:\\n\"\n        for line in str(self.tree_pos).splitlines():\n            s += f\"\\t{line}\\n\"\n        return s\n\n    def assert_equal(self, other):\n        assert self.parent == other.parent\n        assert self.tree_pos.index == other.tree_pos.index\n        assert self.tree_pos.interval == other.tree_pos.interval\n\n    def next(self):  # NOQA: A003\n        valid = self.tree_pos.next()\n        if valid:\n            for j in range(self.tree_pos.out_range.start, self.tree_pos.out_range.stop):\n                e = self.tree_pos.out_range.order[j]\n                c = self.ts.edges_child[e]\n                self.parent[c] = -1\n            for j in range(self.tree_pos.in_range.start, self.tree_pos.in_range.stop):\n                e = self.tree_pos.in_range.order[j]\n                c = self.ts.edges_child[e]\n                p = self.ts.edges_parent[e]\n                self.parent[c] = p\n        return valid\n\n    def prev(self):\n        valid = self.tree_pos.prev()\n        if valid:\n            for j in range(\n                self.tree_pos.out_range.start, self.tree_pos.out_range.stop, -1\n            ):\n                e = self.tree_pos.out_range.order[j]\n                c = self.ts.edges_child[e]\n                self.parent[c] = -1\n            for j in range(\n                self.tree_pos.in_range.start, self.tree_pos.in_range.stop, -1\n            ):\n                e = self.tree_pos.in_range.order[j]\n                c = self.ts.edges_child[e]\n                p = self.ts.edges_parent[e]\n                self.parent[c] = p\n        return valid\n\n    def iter_forward(self, index):\n        while self.tree_pos.index != index:\n            self.next()\n\n    def seek_forward(self, index):\n        old_left, old_right = self.tree_pos.interval\n        self.tree_pos.seek_forward(index)\n        left, right = self.tree_pos.interval\n        for j in range(self.tree_pos.out_range.start, self.tree_pos.out_range.stop):\n            e = self.tree_pos.out_range.order[j]\n            e_left = self.ts.edges_left[e]\n            # We only need to remove an edge if it's in the current tree, which\n            # can only happen if the edge's left coord is < the current tree's\n            # right coordinate.\n            if e_left < old_right:\n                c = self.ts.edges_child[e]\n                assert self.parent[c] != -1\n                self.parent[c] = -1\n            assert e_left < left\n        for j in range(self.tree_pos.in_range.start, self.tree_pos.in_range.stop):\n            e = self.tree_pos.in_range.order[j]\n            if self.ts.edges_left[e] <= left < self.ts.edges_right[e]:\n                c = self.ts.edges_child[e]\n                p = self.ts.edges_parent[e]\n                self.parent[c] = p\n            else:\n                a = self.tree_pos.in_range.start\n                b = self.tree_pos.in_range.stop\n                # The first and last indexes in the range should always be valid\n                # for the tree.\n                assert a < j < b - 1\n\n    def seek_backward(self, index):\n        old_left, old_right = self.tree_pos.interval\n        self.tree_pos.seek_backward(index)\n        left, right = self.tree_pos.interval\n        for j in range(self.tree_pos.out_range.start, self.tree_pos.out_range.stop, -1):\n            e = self.tree_pos.out_range.order[j]\n            e_right = self.ts.edges_right[e]\n            # We only need to remove an edge if it's in the current tree, which\n            # can only happen if the edge's right coord is >= the current tree's\n            # right coordinate.\n            if e_right >= old_right:\n                c = self.ts.edges_child[e]\n                assert self.parent[c] != -1\n                self.parent[c] = -1\n            assert e_right > right\n        for j in range(self.tree_pos.in_range.start, self.tree_pos.in_range.stop, -1):\n            e = self.tree_pos.in_range.order[j]\n            if self.ts.edges_right[e] >= right > self.ts.edges_left[e]:\n                c = self.ts.edges_child[e]\n                p = self.ts.edges_parent[e]\n                self.parent[c] = p\n            else:\n                a = self.tree_pos.in_range.start\n                b = self.tree_pos.in_range.stop\n                # The first and last indexes in the range should always be valid\n                # for the tree.\n                assert a > j > b + 1\n\n    def iter_backward(self, index):\n        while self.tree_pos.index != index:\n            self.prev()\n\n\ndef check_iters_forward(ts):\n    alg_t_output = tsutil.algorithm_T(ts)\n    lib_tree = tskit.Tree(ts)\n    tree_pos = tsutil.TreeIndexes(ts)\n    sample_count = np.zeros(ts.num_nodes, dtype=int)\n    sample_count[ts.samples()] = 1\n    parent1 = [-1 for _ in range(ts.num_nodes)]\n    i = 0\n    lib_tree.next()\n    while tree_pos.next():\n        out_times = []\n        for j in range(tree_pos.out_range.start, tree_pos.out_range.stop):\n            e = tree_pos.out_range.order[j]\n            c = ts.edges_child[e]\n            p = ts.edges_parent[e]\n            out_times.append(ts.nodes_time[p])\n            parent1[c] = -1\n        in_times = []\n        for j in range(tree_pos.in_range.start, tree_pos.in_range.stop):\n            e = tree_pos.in_range.order[j]\n            c = ts.edges_child[e]\n            p = ts.edges_parent[e]\n            in_times.append(ts.nodes_time[p])\n            parent1[c] = p\n        # We must visit the edges in *increasing* time order on the way in,\n        # and *decreasing* order on the way out. Otherwise we get quadratic\n        # behaviour for algorithms that need to propagate changes up to the\n        # root.\n        assert out_times == sorted(out_times, reverse=True)\n        assert in_times == sorted(in_times)\n\n        interval, parent2 = next(alg_t_output)\n        assert list(interval) == list(tree_pos.interval)\n        assert parent1 == parent2\n\n        assert lib_tree.index == i\n        assert list(lib_tree.interval) == list(interval)\n        assert list(lib_tree.parent_array[:-1]) == parent1\n\n        lib_tree.next()\n        i += 1\n    assert i == ts.num_trees\n    assert lib_tree.index == -1\n    assert next(alg_t_output, None) is None\n\n\ndef check_iters_back(ts):\n    alg_t_output = [\n        (list(interval), list(parent)) for interval, parent in tsutil.algorithm_T(ts)\n    ]\n    i = len(alg_t_output) - 1\n\n    lib_tree = tskit.Tree(ts)\n    tree_pos = tsutil.TreeIndexes(ts)\n    parent1 = [-1 for _ in range(ts.num_nodes)]\n\n    lib_tree.last()\n\n    while tree_pos.prev():\n        # print(tree_pos.out_range)\n        out_times = []\n        for j in range(tree_pos.out_range.start, tree_pos.out_range.stop, -1):\n            e = tree_pos.out_range.order[j]\n            c = ts.edges_child[e]\n            p = ts.edges_parent[e]\n            out_times.append(ts.nodes_time[p])\n            parent1[c] = -1\n        in_times = []\n        for j in range(tree_pos.in_range.start, tree_pos.in_range.stop, -1):\n            e = tree_pos.in_range.order[j]\n            c = ts.edges_child[e]\n            p = ts.edges_parent[e]\n            in_times.append(ts.nodes_time[p])\n            parent1[c] = p\n\n        # We must visit the edges in *increasing* time order on the way in,\n        # and *decreasing* order on the way out. Otherwise we get quadratic\n        # behaviour for algorithms that need to propagate changes up to the\n        # root.\n        assert out_times == sorted(out_times, reverse=True)\n        assert in_times == sorted(in_times)\n\n        interval, parent2 = alg_t_output[i]\n        assert list(interval) == list(tree_pos.interval)\n        assert parent1 == parent2\n\n        assert lib_tree.index == i\n        assert list(lib_tree.interval) == list(interval)\n        assert list(lib_tree.parent_array[:-1]) == parent1\n\n        lib_tree.prev()\n        i -= 1\n\n    assert lib_tree.index == -1\n    assert i == -1\n\n\ndef check_forward_back_sweep(ts):\n    alg_t_output = [\n        (list(interval), list(parent)) for interval, parent in tsutil.algorithm_T(ts)\n    ]\n    for j in range(ts.num_trees - 1):\n        tree = StatefulTree(ts)\n        # Seek forward to j\n        k = 0\n        while k <= j:\n            tree.next()\n            interval, parent = alg_t_output[k]\n            assert tree.tree_pos.index == k\n            assert list(tree.tree_pos.interval) == interval\n            assert parent == tree.parent\n            k += 1\n        k = j\n        # And back to zero\n        while k >= 0:\n            interval, parent = alg_t_output[k]\n            assert tree.tree_pos.index == k\n            assert list(tree.tree_pos.interval) == interval\n            assert parent == tree.parent\n            tree.prev()\n            k -= 1\n\n\ndef check_seek_forward_out_range_is_empty(ts, index):\n    tree = StatefulTree(ts)\n    tree.seek_forward(index)\n    assert tree.tree_pos.out_range.start == tree.tree_pos.out_range.stop\n    tree.iter_backward(-1)\n    tree.seek_forward(index)\n    assert tree.tree_pos.out_range.start == tree.tree_pos.out_range.stop\n\n\ndef check_seek_backward_out_range_is_empty(ts, index):\n    tree = StatefulTree(ts)\n    tree.seek_backward(index)\n    assert tree.tree_pos.out_range.start == tree.tree_pos.out_range.stop\n    tree.iter_forward(-1)\n    tree.seek_backward(index)\n    assert tree.tree_pos.out_range.start == tree.tree_pos.out_range.stop\n\n\ndef check_seek_forward_from_null(ts, index):\n    tree1 = StatefulTree(ts)\n    tree1.seek_forward(index)\n    tree2 = StatefulTree(ts)\n    tree2.iter_forward(index)\n    tree1.assert_equal(tree2)\n\n\ndef check_seek_backward_from_null(ts, index):\n    tree1 = StatefulTree(ts)\n    tree1.seek_backward(index)\n    tree2 = StatefulTree(ts)\n    tree2.iter_backward(index)\n    tree1.assert_equal(tree2)\n\n\ndef check_seek_forward_from_first(ts, index):\n    tree1 = StatefulTree(ts)\n    tree1.next()\n    tree1.seek_forward(index)\n    tree2 = StatefulTree(ts)\n    tree2.iter_forward(index)\n    tree1.assert_equal(tree2)\n\n\ndef check_seek_backward_from_last(ts, index):\n    tree1 = StatefulTree(ts)\n    tree1.prev()\n    tree1.seek_backward(index)\n    tree2 = StatefulTree(ts)\n    tree2.iter_backward(index)\n\n\nclass TestDirectionSwitching:\n    # 2.00┊       ┊   4   ┊   4   ┊   4   ┊\n    #     ┊       ┊ ┏━┻┓  ┊  ┏┻━┓ ┊  ┏┻━┓ ┊\n    # 1.00┊   3   ┊ ┃  3  ┊  3  ┃ ┊  3  ┃ ┊\n    #     ┊ ┏━╋━┓ ┊ ┃ ┏┻┓ ┊ ┏┻┓ ┃ ┊ ┏┻┓ ┃ ┊\n    # 0.00┊ 0 1 2 ┊ 0 1 2 ┊ 0 2 1 ┊ 0 1 2 ┊\n    #     0       1       2       3       4\n    # index   0       1       2       3\n    def ts(self):\n        return tsutil.all_trees_ts(3)\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2, 3])\n    def test_iter_backward_matches_iter_forward(self, index):\n        ts = self.ts()\n        tree1 = StatefulTree(ts)\n        tree1.iter_forward(index)\n        tree2 = StatefulTree(ts)\n        tree2.iter_backward(index)\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", [1, 2, 3])\n    def test_prev_from_seek_forward(self, index):\n        tree1 = StatefulTree(self.ts())\n        tree1.seek_forward(index)\n        tree1.prev()\n        tree2 = StatefulTree(self.ts())\n        tree2.seek_forward(index - 1)\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", [1, 2, 3])\n    def test_seek_forward_from_prev(self, index):\n        tree1 = StatefulTree(self.ts())\n        tree1.iter_forward(index)\n        tree1.prev()\n        tree1.seek_forward(index)\n        tree2 = StatefulTree(self.ts())\n        tree2.iter_forward(index)\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2, 3])\n    def test_seek_forward_from_null(self, index):\n        ts = self.ts()\n        check_seek_forward_from_null(ts, index)\n\n    def test_seek_forward_next_null(self):\n        tree1 = StatefulTree(self.ts())\n        tree1.seek_forward(3)\n        tree1.next()\n        assert tree1.tree_pos.index == -1\n        assert list(tree1.tree_pos.interval) == [0, 0]\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2])\n    def test_next_from_seek_backward(self, index):\n        tree1 = StatefulTree(self.ts())\n        tree1.seek_backward(index)\n        tree1.next()\n        tree2 = StatefulTree(self.ts())\n        tree2.seek_backward(index + 1)\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2])\n    def test_seek_backward_from_next(self, index):\n        tree1 = StatefulTree(self.ts())\n        tree1.iter_backward(index)\n        tree1.next()\n        tree1.seek_backward(index)\n        tree2 = StatefulTree(self.ts())\n        tree2.iter_backward(index)\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2, 3])\n    def test_seek_backward_from_null(self, index):\n        ts = self.ts()\n        check_seek_backward_from_null(ts, index)\n\n    def test_seek_backward_prev_null(self):\n        tree1 = StatefulTree(self.ts())\n        tree1.seek_backward(0)\n        tree1.prev()\n        assert tree1.tree_pos.index == -1\n        assert list(tree1.tree_pos.interval) == [0, 0]\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2, 3])\n    def test_seek_forward_out_range_is_empty(self, index):\n        ts = self.ts()\n        check_seek_forward_out_range_is_empty(ts, index)\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2, 3])\n    def test_seek_backward_out_range_is_empty(self, index):\n        ts = self.ts()\n        check_seek_backward_out_range_is_empty(ts, index)\n\n\nclass TestTreeIndexesStep:\n    def ts(self):\n        return tsutil.all_trees_ts(3)\n\n    @pytest.mark.parametrize(\"index\", [0, 1, 2])\n    def test_tree_position_step_forward(self, index):\n        ts = self.ts()\n        tree1_pos = tsutil.TreeIndexes(ts)\n        tree1_pos.seek_forward(index)\n        tree1_pos.step(direction=1)\n        tree2_pos = tsutil.TreeIndexes(ts)\n        tree2_pos.seek_forward(index + 1)\n        tree1_pos.assert_equal(tree2_pos)\n\n    @pytest.mark.parametrize(\"index\", [1, 2, 3])\n    def test_tree_position_step_backward(self, index):\n        ts = self.ts()\n        tree1_pos = tsutil.TreeIndexes(ts)\n        tree1_pos.seek_backward(index)\n        tree1_pos.step(direction=-1)\n        tree2_pos = tsutil.TreeIndexes(ts)\n        tree2_pos.seek_backward(index - 1)\n        tree1_pos.assert_equal(tree2_pos)\n\n    def test_tree_position_step_invalid_direction(self):\n        ts = self.ts()\n        # Test for unallowed direction\n        with pytest.raises(ValueError, match=\"Direction must be FORWARD\"):\n            tsutil.TreeIndexes(ts).step(direction=\"foo\")\n\n\nclass TestSeeking:\n    @tests.cached_example\n    def ts(self):\n        ts = tsutil.all_trees_ts(4)\n        assert ts.num_trees == 26\n        return ts\n\n    @pytest.mark.parametrize(\"index\", range(26))\n    def test_seek_forward_from_null(self, index):\n        ts = self.ts()\n        check_seek_forward_from_null(ts, index)\n\n    @pytest.mark.parametrize(\"index\", range(1, 26))\n    def test_seek_forward_from_first(self, index):\n        ts = self.ts()\n        check_seek_forward_from_first(ts, index)\n\n    @pytest.mark.parametrize(\"index\", range(1, 26))\n    def test_seek_last_from_index(self, index):\n        ts = self.ts()\n        tree1 = StatefulTree(ts)\n        tree1.iter_forward(index)\n        tree1.seek_forward(ts.num_trees - 1)\n        tree2 = StatefulTree(ts)\n        tree2.prev()\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", range(26))\n    def test_seek_backward_from_null(self, index):\n        ts = self.ts()\n        check_seek_backward_from_null(ts, index)\n\n    @pytest.mark.parametrize(\"index\", range(0, 25))\n    def test_seek_backward_from_last(self, index):\n        ts = self.ts()\n        check_seek_backward_from_last(ts, index)\n\n    @pytest.mark.parametrize(\"index\", range(0, 25))\n    def test_seek_first_from_index(self, index):\n        ts = self.ts()\n        tree1 = StatefulTree(ts)\n        tree1.iter_backward(index)\n        tree1.seek_backward(0)\n        tree2 = StatefulTree(ts)\n        tree2.next()\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", range(26))\n    def test_seek_forward_out_range_is_empty(self, index):\n        ts = self.ts()\n        check_seek_forward_out_range_is_empty(ts, index)\n\n    @pytest.mark.parametrize(\"index\", range(26))\n    def test_seek_backward_out_range_is_empty(self, index):\n        ts = self.ts()\n        check_seek_backward_out_range_is_empty(ts, index)\n\n\nclass TestAllTreesTs:\n    @pytest.mark.parametrize(\"n\", [2, 3, 4])\n    def test_forward_full(self, n):\n        ts = tsutil.all_trees_ts(n)\n        check_iters_forward(ts)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 4])\n    def test_back_full(self, n):\n        ts = tsutil.all_trees_ts(n)\n        check_iters_back(ts)\n\n    @pytest.mark.parametrize(\"n\", [2, 3, 4])\n    def test_forward_back(self, n):\n        ts = tsutil.all_trees_ts(n)\n        check_forward_back_sweep(ts)\n\n\nclass TestManyTreesSimulationExample:\n    @tests.cached_example\n    def ts(self):\n        ts = msprime.sim_ancestry(\n            10, sequence_length=1000, recombination_rate=0.1, random_seed=1234\n        )\n        assert ts.num_trees > 250\n        return ts\n\n    @pytest.mark.parametrize(\"index\", [1, 5, 10, 50, 100])\n    def test_seek_forward_from_null(self, index):\n        ts = self.ts()\n        check_seek_forward_from_null(ts, index)\n\n    @pytest.mark.parametrize(\"num_trees\", [1, 5, 10, 50, 100])\n    def test_seek_forward_from_mid(self, num_trees):\n        ts = self.ts()\n        start_index = ts.num_trees // 2\n        dest_index = min(start_index + num_trees, ts.num_trees - 1)\n        tree1 = StatefulTree(ts)\n        tree1.iter_forward(start_index)\n        tree1.seek_forward(dest_index)\n        tree2 = StatefulTree(ts)\n        tree2.iter_forward(dest_index)\n        tree1.assert_equal(tree2)\n\n    @pytest.mark.parametrize(\"index\", [1, 5, 10, 50, 100])\n    def test_seek_backward_from_null(self, index):\n        ts = self.ts()\n        check_seek_backward_from_null(ts, index)\n\n    @pytest.mark.parametrize(\"num_trees\", [1, 5, 10, 50, 100])\n    def test_seek_backward_from_mid(self, num_trees):\n        ts = self.ts()\n        start_index = ts.num_trees // 2\n        dest_index = max(start_index - num_trees, 0)\n        tree1 = StatefulTree(ts)\n        tree1.iter_backward(start_index)\n        tree1.seek_backward(dest_index)\n        tree2 = StatefulTree(ts)\n        tree2.iter_backward(dest_index)\n\n    @pytest.mark.parametrize(\"index\", [1, 5, 10, 50, 100])\n    def test_seek_forward_out_range_is_empty(self, index):\n        ts = self.ts()\n        check_seek_forward_out_range_is_empty(ts, index)\n\n    @pytest.mark.parametrize(\"index\", [1, 5, 10, 50, 100])\n    def test_seek_backward_out_range_is_empty(self, index):\n        ts = self.ts()\n        check_seek_backward_out_range_is_empty(ts, index)\n\n    def test_forward_full(self):\n        check_iters_forward(self.ts())\n\n    def test_back_full(self):\n        check_iters_back(self.ts())\n\n\nclass TestSuiteExamples:\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_forward_full(self, ts):\n        check_iters_forward(ts)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_back_full(self, ts):\n        check_iters_back(ts)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_seek_forward_from_null(self, ts):\n        index = ts.num_trees // 2\n        check_seek_forward_from_null(ts, index)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_seek_forward_from_first(self, ts):\n        index = ts.num_trees - 1\n        check_seek_forward_from_first(ts, index)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_seek_backward_from_null(self, ts):\n        index = ts.num_trees // 2\n        check_seek_backward_from_null(ts, index)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_seek_backward_from_last(self, ts):\n        index = 0\n        check_seek_backward_from_last(ts, index)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_seek_forward_out_range_is_empty(self, ts):\n        index = ts.num_trees // 2\n        check_seek_forward_out_range_is_empty(ts, index)\n\n    @pytest.mark.parametrize(\"ts\", get_example_tree_sequences())\n    def test_seek_backward_out_range_is_empty(self, ts):\n        index = ts.num_trees // 2\n        check_seek_backward_out_range_is_empty(ts, index)\n"
  },
  {
    "path": "python/tests/test_tree_stats.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (C) 2016 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for generalized statistic computation.\n\"\"\"\n\nimport collections\nimport contextlib\nimport copy\nimport functools\nimport io\nimport itertools\nimport math\nimport random\n\nimport msprime\nimport numpy as np\nimport numpy.testing as nt\nimport pytest\n\nimport tests.test_wright_fisher as wf\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.exceptions as exceptions\n\nnp.random.seed(5)\n\n# Notes for refactoring:\n#\n# Things we need to test here are:\n# First, for general_stat, AFS, relatedness_matrix, and a few others:\n# 1. branch mode, correctness\n# 2. site mode, correctness\n# 3. node mode, correctness\n# 4. sample sets: correctness\n# 5. indexes: correctness\n# 6. genome windowing: correctness\n# 7. time windowing: correctness\n# 8. dropping dimensions, output\n# 9. span normalise, correctness\n# And, more specifically:\n# 10. general_stat: correctly uses summary functions\n# 11. sample_count_stat: correctly uses summary functions\n# 12. each statistic: a single tree sufficies, with edge cases\n#   a. agrees with naive version, polarised and not;\n#   b. agrees with python version, polarised and not;\n#   c. stat-specific options (eg centre)\n\n\ndef cached_np(func):\n    \"\"\"\n    Decorator to speed up functions that take numpy arrays as positional\n    arguments that get called a lot with the same arguments.\n\n    # See https://github.com/tskit-dev/tskit/issues/1856 for more info\n    \"\"\"\n    cache = {}\n\n    def f(*args):\n        nonlocal cache\n        key = tuple(x.tobytes() for x in args)\n        if key not in cache:\n            cache[key] = func(*args)\n        return cache[key]\n\n    return f\n\n\ndef subset_combos(*args, p=0.5, min_tests=3):\n    # We have too many tests, combinatorially; so we will run a random subset\n    # of them, using this function, below. If we don't set a seed, a different\n    # random set is run each time. Ensures that at least min_tests are run.\n    # Uncomment this line to run all tests (takes about an hour):\n    # p = 1.0\n    num_tests = 0\n    skipped_tests = []\n    # total_tests = 0\n    for x in itertools.product(*args):\n        # total_tests = total_tests + 1\n        if np.random.uniform() < p:\n            num_tests += 1\n            yield x\n        elif len(skipped_tests) < min_tests:\n            skipped_tests.append(x)\n        elif np.random.uniform() < 0.1:\n            skipped_tests[np.random.randint(min_tests)] = x\n    while num_tests < min_tests:\n        yield skipped_tests.pop()\n        num_tests += 1\n    # print(\"tests\", num_tests)\n    assert num_tests >= min_tests\n\n\ndef path_length(tr, x, y):\n    L = 0\n    if x >= 0 and y >= 0:\n        mrca = tr.mrca(x, y)\n    else:\n        mrca = -1\n    for u in x, y:\n        while u != mrca:\n            L += tr.branch_length(u)\n            u = tr.parent(u)\n    return L\n\n\n@contextlib.contextmanager\ndef suppress_division_by_zero_warning():\n    with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n        yield\n\n\n##############################\n# Branch general stat algorithms\n##############################\n\n\ndef windowed_tree_stat(ts, stat, windows, span_normalise=True):\n    shape = list(stat.shape)\n    shape[0] = len(windows) - 1\n    A = np.zeros(shape)\n\n    tree_breakpoints = np.array(list(ts.breakpoints()))\n    tree_index = 0\n    for j in range(len(windows) - 1):\n        w_left = windows[j]\n        w_right = windows[j + 1]\n        while True:\n            t_left = tree_breakpoints[tree_index]\n            t_right = tree_breakpoints[tree_index + 1]\n            left = max(t_left, w_left)\n            right = min(t_right, w_right)\n            weight = max(0.0, (right - left) / (t_right - t_left))\n            A[j] += stat[tree_index] * weight\n            assert left != right\n            if t_right <= w_right:\n                tree_index += 1\n                # TODO This is inelegant - should include this in the case below\n                if t_right == w_right:\n                    break\n            else:\n                break\n    if span_normalise:\n        # re-normalize by window lengths\n        window_lengths = np.diff(windows)\n        for j in range(len(windows) - 1):\n            A[j] /= window_lengths[j]\n    return A\n\n\ndef naive_branch_general_stat(\n    ts, w, f, windows=None, polarised=False, span_normalise=True\n):\n    # NOTE: does not behave correctly for unpolarised stats\n    # with non-ancestral material.\n    if windows is None:\n        windows = [0.0, ts.sequence_length]\n    n, k = w.shape\n    # hack to determine m\n    m = len(f(w[0]))\n    total = np.sum(w, axis=0)\n\n    sigma = np.zeros((ts.num_trees, m))\n    for tree in ts.trees():\n        x = np.zeros((ts.num_nodes, k))\n        x[ts.samples()] = w\n        for u in tree.nodes(order=\"postorder\"):\n            for v in tree.children(u):\n                x[u] += x[v]\n        if polarised:\n            s = sum(tree.branch_length(u) * f(x[u]) for u in tree.nodes())\n        else:\n            s = sum(\n                tree.branch_length(u) * (f(x[u]) + f(total - x[u])) for u in tree.nodes()\n            )\n        sigma[tree.index] = s * tree.span\n    if isinstance(windows, str) and windows == \"trees\":\n        # need to average across the windows\n        if span_normalise:\n            for j, tree in enumerate(ts.trees()):\n                sigma[j] /= tree.span\n        return sigma\n    else:\n        return windowed_tree_stat(ts, sigma, windows, span_normalise=span_normalise)\n\n\ndef branch_general_stat(\n    ts, sample_weights, summary_func, windows=None, polarised=False, span_normalise=True\n):\n    \"\"\"\n    Efficient implementation of the algorithm used as the basis for the\n    underlying C version.\n    \"\"\"\n    n, state_dim = sample_weights.shape\n    windows = ts.parse_windows(windows)\n    num_windows = windows.shape[0] - 1\n\n    # Determine result_dim\n    result_dim = len(summary_func(sample_weights[0]))\n    result = np.zeros((num_windows, result_dim))\n    state = np.zeros((ts.num_nodes, state_dim))\n    state[ts.samples()] = sample_weights\n    total_weight = np.sum(sample_weights, axis=0)\n\n    time = ts.tables.nodes.time\n    parent = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n    branch_length = np.zeros(ts.num_nodes)\n    # The value of summary_func(u) for every node.\n    summary = np.zeros((ts.num_nodes, result_dim))\n    # The result for the current tree *not* weighted by span.\n    running_sum = np.zeros(result_dim)\n\n    def polarised_summary(u):\n        s = summary_func(state[u])\n        if not polarised:\n            s += summary_func(total_weight - state[u])\n        return s\n\n    for u in range(ts.num_nodes):\n        summary[u] = polarised_summary(u)\n\n    window_index = 0\n    for (t_left, t_right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            u = edge.child\n            running_sum -= branch_length[u] * summary[u]\n            u = edge.parent\n            while u != -1:\n                running_sum -= branch_length[u] * summary[u]\n                state[u] -= state[edge.child]\n                summary[u] = polarised_summary(u)\n                running_sum += branch_length[u] * summary[u]\n                u = parent[u]\n            parent[edge.child] = -1\n            branch_length[edge.child] = 0\n\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            branch_length[edge.child] = time[edge.parent] - time[edge.child]\n            u = edge.child\n            running_sum += branch_length[u] * summary[u]\n            u = edge.parent\n            while u != -1:\n                running_sum -= branch_length[u] * summary[u]\n                state[u] += state[edge.child]\n                summary[u] = polarised_summary(u)\n                running_sum += branch_length[u] * summary[u]\n                u = parent[u]\n\n        # Update the windows\n        assert window_index < num_windows\n        while windows[window_index] < t_right:\n            w_left = windows[window_index]\n            w_right = windows[window_index + 1]\n            left = max(t_left, w_left)\n            right = min(t_right, w_right)\n            span = right - left\n            assert span > 0\n            result[window_index] += running_sum * span\n            if w_right <= t_right:\n                window_index += 1\n            else:\n                # This interval crosses a tree boundary, so we update it again in the\n                # for the next tree\n                break\n\n    # print(\"window_index:\", window_index, windows.shape)\n    assert window_index == windows.shape[0] - 1\n    if span_normalise:\n        for j in range(num_windows):\n            result[j] /= windows[j + 1] - windows[j]\n    return result\n\n\n##############################\n# Site general stat algorithms\n##############################\n\n\ndef windowed_sitewise_stat(ts, sigma, windows, span_normalise=True):\n    M = sigma.shape[1]\n    A = np.zeros((len(windows) - 1, M))\n    window = 0\n    for site in ts.sites():\n        while windows[window + 1] <= site.position:\n            window += 1\n        assert windows[window] <= site.position < windows[window + 1]\n        A[window] += sigma[site.id]\n    if span_normalise:\n        diff = np.zeros((A.shape[0], 1))\n        diff[:, 0] = np.diff(windows).T\n        A /= diff\n    return A\n\n\ndef naive_site_general_stat(\n    ts, W, f, windows=None, polarised=False, span_normalise=True\n):\n    n, K = W.shape\n    # Hack to determine M\n    M = len(f(W[0]))\n    sigma = np.zeros((ts.num_sites, M))\n    for tree in ts.trees():\n        X = np.zeros((ts.num_nodes, K))\n        X[ts.samples()] = W\n        for u in tree.nodes(order=\"postorder\"):\n            for v in tree.children(u):\n                X[u] += X[v]\n        for site in tree.sites():\n            state_map = collections.defaultdict(functools.partial(np.zeros, K))\n            state_map[site.ancestral_state] = sum(X[root] for root in tree.roots)\n            for mutation in site.mutations:\n                state_map[mutation.derived_state] += X[mutation.node]\n                if mutation.parent != tskit.NULL:\n                    parent = site.mutations[mutation.parent - site.mutations[0].id]\n                    state_map[parent.derived_state] -= X[mutation.node]\n                else:\n                    state_map[site.ancestral_state] -= X[mutation.node]\n            if polarised:\n                del state_map[site.ancestral_state]\n            sigma[site.id] += sum(map(f, state_map.values()))\n    return windowed_sitewise_stat(\n        ts, sigma, ts.parse_windows(windows), span_normalise=span_normalise\n    )\n\n\ndef site_general_stat(\n    ts, sample_weights, summary_func, windows=None, polarised=False, span_normalise=True\n):\n    \"\"\"\n    Problem: 'sites' is different that the other windowing options\n    because if we output by site we don't want to normalize by length of the window.\n    Solution: we pass an argument \"normalize\", to the windowing function.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    num_windows = windows.shape[0] - 1\n    n, state_dim = sample_weights.shape\n    # Determine result_dim\n    (result_dim,) = summary_func(sample_weights[0]).shape\n    result = np.zeros((num_windows, result_dim))\n    state = np.zeros((ts.num_nodes, state_dim))\n    state[ts.samples()] = sample_weights\n    total_weight = np.sum(sample_weights, axis=0)\n\n    site_index = 0\n    mutation_index = 0\n    window_index = 0\n    sites = ts.tables.sites\n    mutations = ts.tables.mutations\n    parent = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n    for (left, right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            u = edge.parent\n            while u != -1:\n                state[u] -= state[edge.child]\n                u = parent[u]\n            parent[edge.child] = -1\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            u = edge.parent\n            while u != -1:\n                state[u] += state[edge.child]\n                u = parent[u]\n        while site_index < len(sites) and sites.position[site_index] < right:\n            assert left <= sites.position[site_index]\n            ancestral_state = sites[site_index].ancestral_state\n            allele_state = collections.defaultdict(\n                functools.partial(np.zeros, state_dim)\n            )\n            allele_state[ancestral_state][:] = total_weight\n            while (\n                mutation_index < len(mutations)\n                and mutations[mutation_index].site == site_index\n            ):\n                mutation = mutations[mutation_index]\n                allele_state[mutation.derived_state] += state[mutation.node]\n                if mutation.parent != -1:\n                    parent_allele = mutations[mutation.parent].derived_state\n                    allele_state[parent_allele] -= state[mutation.node]\n                else:\n                    allele_state[ancestral_state] -= state[mutation.node]\n                mutation_index += 1\n            if polarised:\n                del allele_state[ancestral_state]\n\n            pos = sites.position[site_index]\n            while windows[window_index + 1] <= pos:\n                window_index += 1\n            assert windows[window_index] <= pos < windows[window_index + 1]\n            site_result = result[window_index]\n\n            for _allele, value in allele_state.items():\n                site_result += summary_func(value)\n            site_index += 1\n    if span_normalise:\n        for j in range(num_windows):\n            span = windows[j + 1] - windows[j]\n            result[j] /= span\n    return result\n\n\n##############################\n# Node general stat algorithms\n##############################\n\n\ndef naive_node_general_stat(\n    ts, W, f, windows=None, polarised=False, span_normalise=True\n):\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    M = f(W[0]).shape[0]\n    total = np.sum(W, axis=0)\n    sigma = np.zeros((ts.num_trees, ts.num_nodes, M))\n    for tree in ts.trees():\n        X = np.zeros((ts.num_nodes, K))\n        X[ts.samples()] = W\n        for u in tree.nodes(order=\"postorder\"):\n            for v in tree.children(u):\n                X[u] += X[v]\n        s = np.zeros((ts.num_nodes, M))\n        for u in range(ts.num_nodes):\n            s[u] = f(X[u])\n            if not polarised:\n                s[u] += f(total - X[u])\n        sigma[tree.index] = s * tree.span\n    return windowed_tree_stat(ts, sigma, windows, span_normalise=span_normalise)\n\n\ndef node_general_stat(\n    ts, sample_weights, summary_func, windows=None, polarised=False, span_normalise=True\n):\n    \"\"\"\n    Efficient implementation of the algorithm used as the basis for the\n    underlying C version.\n    \"\"\"\n    n, state_dim = sample_weights.shape\n    windows = ts.parse_windows(windows)\n    num_windows = windows.shape[0] - 1\n    result_dim = summary_func(sample_weights[0]).shape[0]\n    result = np.zeros((num_windows, ts.num_nodes, result_dim))\n    state = np.zeros((ts.num_nodes, state_dim))\n    state[ts.samples()] = sample_weights\n    total_weight = np.sum(sample_weights, axis=0)\n\n    def node_summary(u):\n        s = summary_func(state[u])\n        if not polarised:\n            s += summary_func(total_weight - state[u])\n        return s\n\n    window_index = 0\n    parent = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n    # contains summary_func(state[u]) for each node\n    current_values = np.zeros((ts.num_nodes, result_dim))\n    for u in range(ts.num_nodes):\n        current_values[u] = node_summary(u)\n    # contains the location of the last time we updated the output for a node.\n    last_update = np.zeros((ts.num_nodes, 1))\n    for (t_left, t_right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            u = edge.child\n            v = edge.parent\n            while v != -1:\n                result[window_index, v] += (t_left - last_update[v]) * current_values[v]\n                last_update[v] = t_left\n                state[v] -= state[u]\n                current_values[v] = node_summary(v)\n                v = parent[v]\n            parent[u] = -1\n\n        for edge in edges_in:\n            u = edge.child\n            v = edge.parent\n            parent[u] = v\n            while v != -1:\n                result[window_index, v] += (t_left - last_update[v]) * current_values[v]\n                last_update[v] = t_left\n                state[v] += state[u]\n                current_values[v] = node_summary(v)\n                v = parent[v]\n\n        # Update the windows\n        while window_index < num_windows and windows[window_index + 1] <= t_right:\n            w_right = windows[window_index + 1]\n            # Flush the contribution of all nodes to the current window.\n            for u in range(ts.num_nodes):\n                result[window_index, u] += (w_right - last_update[u]) * current_values[u]\n                last_update[u] = w_right\n            window_index += 1\n\n    assert window_index == windows.shape[0] - 1\n    if span_normalise:\n        for j in range(num_windows):\n            result[j] /= windows[j + 1] - windows[j]\n    return result\n\n\ndef general_stat(\n    ts,\n    sample_weights,\n    summary_func,\n    windows=None,\n    polarised=False,\n    mode=\"site\",\n    span_normalise=True,\n):\n    \"\"\"\n    General iterface for algorithms above. Directly corresponds to the interface\n    for TreeSequence.general_stat.\n    \"\"\"\n    method_map = {\n        \"site\": site_general_stat,\n        \"node\": node_general_stat,\n        \"branch\": branch_general_stat,\n    }\n    return method_map[mode](\n        ts,\n        sample_weights,\n        summary_func,\n        windows=windows,\n        polarised=polarised,\n        span_normalise=span_normalise,\n    )\n\n\ndef upper_tri_to_matrix(x):\n    \"\"\"\n    Given x, a vector of entries of the upper triangle of a matrix\n    in row-major order, including the diagonal, return the corresponding matrix.\n    \"\"\"\n    # n^2 + n = 2 u => n = (-1 + sqrt(1 + 8*u))/2\n    n = int((np.sqrt(1 + 8 * len(x)) - 1) / 2.0)\n    out = np.ones((n, n))\n    k = 0\n    for i in range(n):\n        for j in range(i, n):\n            out[i, j] = out[j, i] = x[k]\n            k += 1\n    return out\n\n\n##################################\n# Test cases\n##################################\n\n\nclass StatsTestCase:\n    \"\"\"\n    Provides convenience functions.\n    \"\"\"\n\n    def assertListAlmostEqual(self, x, y):\n        assert len(x) == len(y)\n        for a, b in zip(x, y):\n            self.assertAlmostEqual(a, b)\n\n    def assertArrayEqual(self, x, y):\n        nt.assert_equal(x, y)\n\n    def assertArrayAlmostEqual(self, x, y, atol=1e-6, rtol=1e-7):\n        nt.assert_allclose(x, y, atol=atol, rtol=rtol)\n\n    def identity_f(self, ts):\n        return lambda x: x * (x < ts.num_samples)\n\n    def cumsum_f(self, ts):\n        return lambda x: np.cumsum(x) * (x < ts.num_samples)\n\n    def sum_f(self, ts, k=1):\n        return lambda x: np.array([sum(x) * (sum(x) < 2 * ts.num_samples)] * k)\n\n\nclass TopologyExamplesMixin:\n    \"\"\"\n    Defines a set of test cases on different example tree sequence topologies.\n    Derived classes need to define a 'verify' function which will perform the\n    actual tests.\n    \"\"\"\n\n    def test_single_tree_sequence_length(self):\n        ts = msprime.simulate(6, length=10, random_seed=1)\n        self.verify(ts)\n\n    def test_single_tree_multiple_roots(self):\n        ts = msprime.simulate(8, random_seed=1, end_time=0.5)\n        assert ts.first().num_roots > 1\n        self.verify(ts)\n\n    def test_many_trees(self, ts_4_recomb_fixture):\n        ts = ts_4_recomb_fixture\n        assert ts.num_trees > 2\n        self.verify(ts)\n\n    # @pytest.mark.skip(reason=\"Skipping short sequence length test\")\n    def test_short_sequence_length(self):\n        ts = msprime.simulate(6, length=0.5, recombination_rate=2, random_seed=1)\n        assert ts.num_trees > 2\n        self.verify(ts)\n\n    @pytest.mark.slow\n    def test_wright_fisher_slow(self, wf_fixture_slow):\n        _, ts = wf_fixture_slow\n        self.verify(ts)\n\n    def test_wright_fisher(self, wf_fixture):\n        _, ts = wf_fixture\n        self.verify(ts)\n\n    def test_empty_ts(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(1, 0)\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n    def test_non_sample_ancestry(self):\n        tables = tskit.TableCollection(1.0)\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(1, 0)\n        tables.nodes.add_row(0, 1)\n        tables.nodes.add_row(0, 0)  # 3 is a leaf but not a sample.\n        # Make sure we have 4 samples for the tests.\n        tables.nodes.add_row(1, 1)\n        tables.nodes.add_row(1, 1)\n        tables.edges.add_row(0, 1, 2, 0)\n        tables.edges.add_row(0, 1, 2, 1)\n        tables.edges.add_row(0, 1, 4, 3)\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n\n# Fixtures for commonly used simulations in test_tree_stats.py\n# Naming convention: ts_{num_samples}_{features}_fixture\n# Features: mut (mutations), recomb (recombination), highmut/highrecomb (high rates)\n# here the 'scope=\"session\"' argument means that these will\n# be executed once per session, so that each time the fixture is used\n# it won't have to re-simulate (note that's actually once per job,\n# and so if xdist is using four cores it might run each sim four times)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_6_fixture():\n    \"\"\"Basic 6-sample tree sequence, no mutations or recombination.\"\"\"\n    return msprime.simulate(6, random_seed=1)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_10_recomb_fixture():\n    \"\"\"10-sample tree sequence with recombination (used 3+ times).\"\"\"\n    return msprime.simulate(10, recombination_rate=1, random_seed=2)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_10_mut_fixture():\n    \"\"\"10-sample tree sequence with mutations (used 10 times).\"\"\"\n    ts = msprime.simulate(10, mutation_rate=1, random_seed=1)\n    assert ts.num_mutations > 0\n    return ts\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_10_mut_recomb_fixture():\n    \"\"\"10-sample tree sequence with mutations and recombination (used 5+ times).\"\"\"\n    return msprime.simulate(10, mutation_rate=1, recombination_rate=2, random_seed=1)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_4_recomb_fixture():\n    \"\"\"4-sample tree sequence with recombination (used 4+ times).\"\"\"\n    return msprime.simulate(4, recombination_rate=1, random_seed=2)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_12_highrecomb_fixture():\n    \"\"\"12-sample tree sequence with high recombination (used 4+ times).\"\"\"\n    return msprime.simulate(12, recombination_rate=3, random_seed=2)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_44_recomb_fixture():\n    \"\"\"44-sample tree sequence with recombination (used 2 times).\"\"\"\n    return msprime.simulate(44, recombination_rate=1, random_seed=2)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_ancestry_10_fixture():\n    \"\"\"Standard ancestry simulation for 10 samples.\"\"\"\n    return msprime.sim_ancestry(10, random_seed=1, sequence_length=10)\n\n\n@pytest.fixture(scope=\"session\")\ndef ts_6_length_factory_fixture():\n    \"\"\"Factory fixture for 6-sample tree sequences with variable length.\"\"\"\n\n    def _make_ts(length):\n        return msprime.simulate(\n            6, length=length, recombination_rate=2, mutation_rate=1, random_seed=1\n        )\n\n    return _make_ts\n\n\n# Wright-Fisher simulation fixtures\n@pytest.fixture(scope=\"session\")\ndef wf_fixture_sims():\n    \"\"\"Common Wright-Fisher simulations used across test classes.\"\"\"\n    # Pre-compute all common WF simulations\n    simulations = {}\n\n    # Used in TopologyExamplesMixin tests\n    tables = wf.wf_sim(\n        4, 5, seed=1, deep_history=True, initial_generation_samples=False, num_loci=5\n    )\n    tables.sort()\n    simulations[\"unsimplified\"] = tables.tree_sequence()\n\n    tables = wf.wf_sim(\n        6, 5, seed=3, deep_history=True, initial_generation_samples=True, num_loci=2\n    )\n    tables.sort()\n    tables.simplify()\n    simulations[\"initial_generation\"] = tables.tree_sequence()\n\n    tables = wf.wf_sim(\n        6, 15, seed=202, deep_history=False, initial_generation_samples=True, num_loci=5\n    )\n    tables.sort()\n    tables.simplify()\n    simulations[\"no_deep_history\"] = tables.tree_sequence()\n\n    tables = wf.wf_sim(\n        6, 5, seed=1, deep_history=False, initial_generation_samples=False, num_loci=4\n    )\n    tables.sort()\n    simulations[\"unsimplified_multi_roots\"] = tables.tree_sequence()\n\n    tables = wf.wf_sim(\n        5, 8, seed=1, deep_history=True, initial_generation_samples=False, num_loci=5\n    )\n    tables.sort()\n    simulations[\"simplified\"] = tables.tree_sequence().simplify()\n\n    tables = wf.wf_sim(\n        6, 8, seed=1, deep_history=False, initial_generation_samples=False, num_loci=3\n    )\n    tables.sort()\n    simulations[\"simplified_multi_roots\"] = tables.tree_sequence().simplify()\n\n    return simulations\n\n\n@pytest.fixture(\n    params=[\n        \"no_deep_history\",\n        \"unsimplified_multi_roots\",\n        \"simplified\",\n        \"simplified_multi_roots\",\n    ],\n    scope=\"session\",\n)\ndef wf_fixture(wf_fixture_sims, request):\n    \"\"\"\n    A collection of small Wright-Fisher simulations. The name is returned for\n    debugging purposes.\n    \"\"\"\n    name = request.param\n    ts = msprime.sim_mutations(wf_fixture_sims[name], rate=0.05, random_seed=1234)\n    assert ts.num_mutations > 0\n    return name, ts\n\n\n@pytest.fixture(params=[\"unsimplified\", \"initial_generation\"], scope=\"session\")\ndef wf_fixture_slow(wf_fixture_sims, request):\n    \"\"\"\n    A few more small Wright-Fisher simulations. Despite the name, in total\n    they take about the same time for tests together as wf_fixture.\n    \"\"\"\n    name = request.param\n    ts = msprime.sim_mutations(wf_fixture_sims[name], rate=0.05, random_seed=1234)\n    assert ts.num_mutations > 0\n    return name, ts\n\n\n@pytest.fixture(scope=\"session\")\ndef four_taxa_test_case():\n    #\n    # 1.0          7\n    # 0.7         / \\                                    6\n    #            /   \\                                  / \\\n    # 0.5       /     5              5                 /   5\n    #          /     / \\            / \\__             /   / \\\n    # 0.4     /     8   \\          8     4           /   8   \\\n    #        /     / \\   \\        / \\   / \\         /   / \\   \\\n    # 0.0   0     1   3   2      1   3 0   2       0   1   3   2\n    #          (0.0, 0.2),        (0.2, 0.8),       (0.8, 2.5)\n\n    nodes = io.StringIO(\n        \"\"\"\\\n    id      is_sample   time\n    0       1           0\n    1       1           0\n    2       1           0\n    3       1           0\n    4       0           0.4\n    5       0           0.5\n    6       0           0.7\n    7       0           1.0\n    8       0           0.4\n    \"\"\"\n    )\n    edges = io.StringIO(\n        \"\"\"\\\n    left    right   parent  child\n    0.0     2.5     8       1,3\n    0.2     0.8     4       0,2\n    0.0     0.2     5       8,2\n    0.2     0.8     5       8,4\n    0.8     2.5     5       8,2\n    0.8     2.5     6       0,5\n    0.0     0.2     7       0,5\n    \"\"\"\n    )\n    sites = io.StringIO(\n        \"\"\"\\\n    id  position    ancestral_state\n    \"\"\"\n    )\n    mutations = io.StringIO(\n        \"\"\"\\\n    site    node    derived_state   parent\n    \"\"\"\n    )\n    ts = tskit.load_text(\n        nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n    )\n    return ts\n\n\n@pytest.fixture(scope=\"session\")\ndef four_taxa_test_case_afs(four_taxa_test_case):\n    # Examples of the AFS computed by hand\n    ts = four_taxa_test_case\n    examples = []\n\n    params = {\n        \"sample_sets\": [[0, 1, 2, 3]],\n        \"windows\": [0, 0.2, 0.8, 2.5],\n        \"time_windows\": [0, 0.5, np.inf],\n        \"mode\": \"branch\",\n        \"polarised\": True,\n        \"span_normalise\": False,\n    }\n    full_afs = np.array(\n        [\n            np.transpose(u)\n            for u in [\n                [\n                    np.array(u) * 0.2\n                    for u in (  # window [0, 0.2)\n                        [0, 0],  # bin 0\n                        [0.4 + 0.4 + 0.5 + 0.5, 0.5],  # bin 1\n                        [0.1, 0],  # bin 2\n                        [0, 0.5],  # bin 3\n                        [0, 0],  # bin 4\n                    )\n                ],\n                [\n                    np.array(u) * (0.8 - 0.2)\n                    for u in (\n                        [0, 0],  # bin 0\n                        [0.4 + 0.4 + 0.4 + 0.4, 0],  # bin 1\n                        [0.1 + 0.1, 0],  # bin 2\n                        [0, 0],  # bin 3\n                        [0, 0],  # bin 4\n                    )\n                ],\n                [\n                    np.array(u) * (2.5 - 0.8)\n                    for u in (\n                        [0, 0],  # bin 0\n                        [0.5 + 0.4 + 0.4 + 0.5, 0.2],  # bin 1\n                        [0.1, 0],  # bin 2\n                        [0, 0.2],  # bin 3\n                        [0, 0],  # bin 4\n                    )\n                ],\n            ]\n        ]\n    )\n    assert full_afs.shape == (3, 2, 5)\n    examples.append((params, full_afs))\n\n    # windows that don't fall at tree breaks\n    p = copy.deepcopy(params)\n    p[\"windows\"] = [0, 0.5, 2.5]\n    afs = np.array([full_afs[0] + full_afs[1] / 2, full_afs[1] / 2 + full_afs[2]])\n    assert afs.shape == (2, 2, 5)\n    examples.append((p, afs))\n\n    # no windows\n    p = copy.deepcopy(params)\n    p[\"windows\"] = None\n    afs = full_afs.sum(axis=0)\n    assert afs.shape == (2, 5)\n    examples.append((p, afs))\n\n    # no time windows\n    p = copy.deepcopy(params)\n    p[\"time_windows\"] = None\n    afs = full_afs.sum(axis=1)\n    assert afs.shape == (3, 5)\n    examples.append((p, afs))\n\n    # sub time windows\n    p = copy.deepcopy(params)\n    p[\"time_windows\"] = [0, 0.5]\n    afs = full_afs[:, (0,), :]\n    assert afs.shape == (3, 1, 5)\n    examples.append((p, afs))\n\n    return ts, examples\n\n\nclass MutatedTopologyExamplesMixin:\n    \"\"\"\n    Defines a set of test cases on different example tree sequence topologies.\n    Derived classes need to define a 'verify' function which will perform the\n    actual tests.\n    \"\"\"\n\n    def test_single_tree_no_sites(self, ts_6_fixture):\n        ts = ts_6_fixture\n        assert ts.num_sites == 0\n        self.verify(ts)\n\n    def test_ghost_allele(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=0, time=1)\n        tables.edges.add_row(0, 1, 2, 0)\n        tables.edges.add_row(0, 1, 2, 1)\n        tables.sites.add_row(position=0.5, ancestral_state=\"A\")\n        # Make sure there's 4 samples\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        # The ghost mutation that's never seen in the genotypes\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"G\", parent=0)\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n    def test_ghost_allele_all_ancestral(self):\n        tables = tskit.TableCollection(1)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=0, time=1)\n        # Make sure there's 4 samples\n        tables.nodes.add_row(flags=1, time=0)\n        tables.nodes.add_row(flags=1, time=0)\n        tables.edges.add_row(0, 1, 2, 0)\n        tables.edges.add_row(0, 1, 2, 1)\n        tables.sites.add_row(position=0.5, ancestral_state=\"A\")\n        tables.mutations.add_row(site=0, node=0, derived_state=\"T\")\n        # Mutate back to the ancestral state so that all genotypes are zero\n        tables.mutations.add_row(site=0, node=0, derived_state=\"A\", parent=0)\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n    def test_non_sample_ancestry(self):\n        # 2.00┊       5   ┊\n        #     ┊    ┏━━┻━┓ ┊\n        # 1.00┊    4    ┃ ┊\n        #     ┊ ┏━┳┻┳━┓ ┃ ┊\n        # 0.00┊ 0 1 2 3 6 ┊\n        #    0.00        1.00\n        tables = tskit.TableCollection(1)\n        # Four sample nodes\n        for j in range(4):\n            tables.nodes.add_row(flags=1, time=0)\n            tables.edges.add_row(0, 1, 4, j)\n        # Their MRCA, 4, joins to older ancestor 5\n        tables.nodes.add_row(flags=0, time=1)\n        tables.nodes.add_row(flags=0, time=2)\n        tables.edges.add_row(0, 1, 5, 4)\n        # Which has non-sample leaf at time 0\n        tables.nodes.add_row(flags=0, time=0)\n        tables.edges.add_row(0, 1, 5, 6)\n        # Two sites with mutations. One over the MRCA of the\n        # samples so it's fixed at 1 and one over the non sample\n        # leaf so that samples are fixed at zero.\n        tables.sites.add_row(position=0.25, ancestral_state=\"0\")\n        tables.sites.add_row(position=0.5, ancestral_state=\"0\")\n        tables.mutations.add_row(site=0, node=4, derived_state=\"1\")\n        tables.mutations.add_row(site=1, node=6, derived_state=\"1\")\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n    def test_single_tree_infinite_sites(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        assert ts.num_sites > 0\n        self.verify(ts)\n\n    def test_single_tree_sites_no_mutations(self, ts_6_fixture):\n        ts = ts_6_fixture\n        tables = ts.dump_tables()\n        tables.sites.add_row(0.1, \"a\")\n        tables.sites.add_row(0.2, \"aaa\")\n        self.verify(tables.tree_sequence())\n\n    @pytest.mark.slow\n    def test_single_tree_jukes_cantor(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        self.verify(ts)\n\n    def test_single_tree_single_site_many_silent(self, ts_6_fixture):\n        ts = ts_6_fixture\n        ts = tsutil.jukes_cantor(ts, 1, 20, seed=10)\n        self.verify(ts)\n\n    def test_single_tree_multichar_mutations(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        ts = tsutil.insert_multichar_mutations(ts)\n        self.verify(ts)\n\n    def test_many_trees_infinite_sites(self, ts_10_mut_recomb_fixture):\n        ts = ts_10_mut_recomb_fixture\n        assert ts.num_sites > 0\n        assert ts.num_trees > 2\n        self.verify(ts)\n\n    @pytest.mark.slow\n    def test_many_trees_sequence_length_infinite_sites(\n        self, ts_6_length_factory_fixture\n    ):\n        for L in [0.5, 1.5, 3.3333]:\n            ts = ts_6_length_factory_fixture(L)\n            self.verify(ts)\n\n    def test_wright_fisher(self, wf_fixture):\n        _, ts = wf_fixture\n        assert ts.num_sites > 0\n        self.verify(ts)\n\n    def test_wright_fisher_slow(self, wf_fixture_slow):\n        _, ts = wf_fixture_slow\n        assert ts.num_sites > 0\n        self.verify(ts)\n\n    def test_empty_ts(self):\n        tables = tskit.TableCollection(1.0)\n        for _ in range(10):\n            tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)\n        ts = tables.tree_sequence()\n        self.verify(ts)\n\n\ndef example_sample_sets(ts, min_size=1):\n    \"\"\"\n    Generate a series of example sample sets from the specfied tree sequence. The\n    number of sample sets returned in each example must be at least min_size\n    \"\"\"\n    samples = ts.samples()\n    np.random.shuffle(samples)  # <-- no seed\n    splits = np.array_split(samples, min_size)\n    yield splits\n    yield [[s] for s in samples]\n    if min_size == 1:\n        yield [samples[:1]]\n    if ts.num_samples > 2 and min_size <= 2:\n        yield [samples[:2], samples[2:]]\n    if ts.num_samples > 4 and min_size <= 2:\n        yield [samples[:2], samples[2:4]]\n    if ts.num_samples > 7 and min_size <= 4:\n        yield [samples[:2], samples[2:4], samples[4:6], samples[6:]]\n    if ts.num_samples > 8 and min_size <= 4:\n        yield [samples[:2], samples[2:4], samples[4:6], samples[6:8]]\n\n\ndef example_sample_set_index_pairs(sample_sets):\n    assert len(sample_sets) >= 2\n    yield [(0, 1)]\n    yield [(1, 0), (0, 1)]\n    if len(sample_sets) > 2:\n        yield [(0, 1), (1, 2), (0, 2)]\n\n\ndef example_sample_set_index_triples(sample_sets):\n    assert len(sample_sets) >= 3\n    yield [(0, 1, 2)]\n    yield [(0, 2, 1), (2, 1, 0)]\n    if len(sample_sets) > 3:\n        yield [(3, 0, 1), (0, 2, 3), (1, 2, 3)]\n\n\ndef example_sample_set_index_quads(sample_sets):\n    assert len(sample_sets) >= 4\n    yield [(0, 1, 2, 3)]\n    yield [(0, 1, 2, 3), (3, 2, 1, 0)]\n    yield [(0, 1, 2, 3), (3, 2, 1, 0), (1, 2, 3, 0)]\n\n\ndef example_windows(ts):\n    \"\"\"\n    Generate a series of example windows for the specified tree sequence.\n    \"\"\"\n    L = ts.sequence_length\n    yield \"sites\"\n    yield \"trees\"\n    yield [0, L]\n    yield ts.breakpoints(as_array=True)\n    yield np.linspace(0, L, num=10)\n\n\nclass WeightStatsMixin:\n    \"\"\"\n    Implements the verify method and dispatches it to verify_weighted_stat\n    for a representative set of sample sets and windows.\n    \"\"\"\n\n    def example_weights(self, ts, min_size=1):\n        \"\"\"\n        Generate a series of example weights from the specfied tree sequence.\n        \"\"\"\n        np.random.seed(46)\n        # Reduced to 3 essential weight matrices for performance\n\n        # 1. Simple weights with variance (k=min_size)\n        k = min_size\n        W = np.ones((ts.num_samples, k))\n        # Ensure positive variance: different values for different samples\n        for i in range(ts.num_samples):\n            W[i, :] = 1.0 + i * 0.1\n        yield W\n\n        # 2. Exponential weights with k=min_size+1 (medium complexity)\n        k = min_size + 1\n        W = np.zeros((ts.num_samples, k))\n        for j in range(k):\n            W[:, j] = np.random.exponential(1, ts.num_samples)\n        yield W\n\n        # 3. Mixed weights with larger k (complex case)\n        # Only test larger k if samples allow it and keep it reasonable\n        k = min(min_size + 3, ts.num_samples)  # Reduced from +5 to +3\n        if k > min_size + 1:\n            W = np.zeros((ts.num_samples, k))\n            # First column: linear gradient for variance\n            W[:, 0] = np.linspace(0.5, 1.5, ts.num_samples)\n            # Remaining columns: exponential\n            for j in range(1, k):\n                W[:, j] = np.random.exponential(0.8, ts.num_samples)\n            yield W\n\n    def transform_weights(self, W):\n        \"\"\"\n        Specific methods will need to transform weights\n        before passing them to general_stat.\n        \"\"\"\n        return W\n\n    def verify(self, ts):\n        for W, windows in subset_combos(\n            self.example_weights(ts), example_windows(ts), p=0.1\n        ):\n            self.verify_weighted_stat(ts, W, windows=windows)\n\n    def verify_definition(self, ts, W, windows, summary_func, ts_method, definition):\n        # general_stat will need an extra column for p\n        gW = self.transform_weights(W)\n\n        def wrapped_summary_func(x):\n            with suppress_division_by_zero_warning():\n                return summary_func(x)\n\n        # Determine output_dim of the function\n        M = len(wrapped_summary_func(gW[0]))\n        for sn in [True, False]:\n            sigma1 = ts.general_stat(\n                gW, wrapped_summary_func, M, windows, mode=self.mode, span_normalise=sn\n            )\n            sigma2 = general_stat(\n                ts, gW, wrapped_summary_func, windows, mode=self.mode, span_normalise=sn\n            )\n            sigma3 = ts_method(W, windows=windows, mode=self.mode, span_normalise=sn)\n            sigma4 = definition(\n                ts, W, windows=windows, mode=self.mode, span_normalise=sn\n            )\n\n            assert sigma1.shape == sigma2.shape\n            assert sigma1.shape == sigma3.shape\n            assert sigma1.shape == sigma4.shape\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n            self.assertArrayAlmostEqual(sigma1, sigma3)\n            self.assertArrayAlmostEqual(sigma1, sigma4)\n\n\nclass SampleSetStatsMixin:\n    \"\"\"\n    Implements the verify method and dispatches it to verify_sample_sets\n    for a representative set of sample sets and windows.\n    \"\"\"\n\n    def verify(self, ts):\n        for sample_sets, windows in subset_combos(\n            example_sample_sets(ts),\n            example_windows(ts),\n            p=0.2,\n        ):\n            self.verify_sample_sets(ts, sample_sets, windows=windows)\n\n    def verify_definition(\n        self, ts, sample_sets, windows, summary_func, ts_method, definition\n    ):\n        W = np.array([[u in A for A in sample_sets] for u in ts.samples()], dtype=float)\n\n        def wrapped_summary_func(x):\n            with suppress_division_by_zero_warning():\n                return summary_func(x)\n\n        for sn in [True, False]:\n            # Determine output_dim of the function\n            M = len(summary_func(W[0]))\n            sigma1 = ts.general_stat(\n                W, wrapped_summary_func, M, windows, mode=self.mode, span_normalise=sn\n            )\n            sigma2 = general_stat(\n                ts, W, wrapped_summary_func, windows, mode=self.mode, span_normalise=sn\n            )\n            sigma3 = ts_method(\n                sample_sets, windows=windows, mode=self.mode, span_normalise=sn\n            )\n            sigma4 = definition(\n                ts, sample_sets, windows=windows, mode=self.mode, span_normalise=sn\n            )\n\n            assert sigma1.shape == sigma2.shape\n            assert sigma1.shape == sigma3.shape\n            assert sigma1.shape == sigma4.shape\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n            self.assertArrayAlmostEqual(sigma1, sigma3)\n            self.assertArrayAlmostEqual(sigma1, sigma4)\n\n\nclass KWaySampleSetStatsMixin(SampleSetStatsMixin):\n    \"\"\"\n    Defines the verify definition method, which comparse the results from\n    several different ways of defining and computing the same statistic.\n    \"\"\"\n\n    def verify_definition(\n        self, ts, sample_sets, indexes, windows, summary_func, ts_method, definition\n    ):\n        def wrapped_summary_func(x):\n            with suppress_division_by_zero_warning():\n                return summary_func(x)\n\n        W = np.array([[u in A for A in sample_sets] for u in ts.samples()], dtype=float)\n        # Determine output_dim of the function\n        M = len(wrapped_summary_func(W[0]))\n        sigma1 = ts.general_stat(W, wrapped_summary_func, M, windows, mode=self.mode)\n        sigma2 = general_stat(ts, W, wrapped_summary_func, windows, mode=self.mode)\n        sigma3 = ts_method(sample_sets, indexes=indexes, windows=windows, mode=self.mode)\n        sigma4 = definition(\n            ts, sample_sets, indexes=indexes, windows=windows, mode=self.mode\n        )\n\n        assert sigma1.shape == sigma2.shape\n        assert sigma1.shape == sigma3.shape\n        assert sigma1.shape == sigma4.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3)\n        self.assertArrayAlmostEqual(sigma1, sigma4)\n\n\nclass TwoWaySampleSetStatsMixin(KWaySampleSetStatsMixin):\n    \"\"\"\n    Implements the verify method and dispatches it to verify_sample_sets_indexes,\n    which gives a representative sample of sample set indexes.\n    \"\"\"\n\n    def verify(self, ts):\n        for sample_sets, windows in subset_combos(\n            example_sample_sets(ts, min_size=2),\n            example_windows(ts),\n            p=0.1,\n        ):\n            for indexes in example_sample_set_index_pairs(sample_sets):\n                self.verify_sample_sets_indexes(ts, sample_sets, indexes, windows)\n\n\nclass ThreeWaySampleSetStatsMixin(KWaySampleSetStatsMixin):\n    \"\"\"\n    Implements the verify method and dispatches it to verify_sample_sets_indexes,\n    which gives a representative sample of sample set indexes.\n    \"\"\"\n\n    def verify(self, ts):\n        for sample_sets, windows in subset_combos(\n            example_sample_sets(ts, min_size=3),\n            example_windows(ts),\n            p=0.1,\n        ):\n            for indexes in example_sample_set_index_triples(sample_sets):\n                self.verify_sample_sets_indexes(ts, sample_sets, indexes, windows)\n\n\nclass FourWaySampleSetStatsMixin(KWaySampleSetStatsMixin):\n    \"\"\"\n    Implements the verify method and dispatches it to verify_sample_sets_indexes,\n    which gives a representative sample of sample set indexes.\n    \"\"\"\n\n    def verify(self, ts):\n        for sample_sets, windows in subset_combos(\n            example_sample_sets(ts, min_size=4),\n            example_windows(ts),\n            p=0.1,\n        ):\n            for indexes in example_sample_set_index_quads(sample_sets):\n                self.verify_sample_sets_indexes(ts, sample_sets, indexes, windows)\n\n\n############################################\n# Diversity\n############################################\n\n\ndef site_diversity(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False).T\n        site_positions = [x.position for x in ts.sites()]\n        for i, X in enumerate(sample_sets):\n            S = 0\n            site_in_window = False\n            denom = np.float64(len(X) * (len(X) - 1))\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for x in X:\n                        for y in set(X) - {x}:\n                            x_index = np.where(samples == x)[0][0]\n                            y_index = np.where(samples == y)[0][0]\n                            if haps[x_index][k] != haps[y_index][k]:\n                                # x|y\n                                S += 1\n            if site_in_window:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef branch_diversity(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, X in enumerate(sample_sets):\n            S = 0\n            denom = np.float64(len(X) * (len(X) - 1))\n            has_trees = False\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                SS = 0\n                for x in X:\n                    for y in set(X) - {x}:\n                        SS += path_length(tr, x, y)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_diversity(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    K = len(sample_sets)\n    out = np.zeros((len(windows) - 1, ts.num_nodes, K))\n    for k in range(K):\n        X = sample_sets[k]\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            tX = len(X)\n            denom = np.float64(len(X) * (len(X) - 1))\n            S = np.zeros(ts.num_nodes)\n            for tr in ts.trees(tracked_samples=X):\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in tr.nodes():\n                    # count number of pairwise paths going through u\n                    n = tr.num_tracked_samples(u)\n                    SS[u] += 2 * n * (tX - n)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, k] = S / denom\n            if span_normalise:\n                out[j, :, k] /= end - begin\n    return out\n\n\ndef diversity(ts, sample_sets, windows=None, mode=\"site\", span_normalise=True):\n    \"\"\"\n    Computes average pairwise diversity between two random choices from x\n    over the window specified.\n    \"\"\"\n    method_map = {\n        \"site\": site_diversity,\n        \"node\": node_diversity,\n        \"branch\": branch_diversity,\n    }\n    return method_map[mode](\n        ts, sample_sets, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestDiversity(StatsTestCase, SampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets(self, ts, sample_sets, windows):\n        n = np.array([len(x) for x in sample_sets])\n\n        def f(x):\n            with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                return x * (n - x) / (n * (n - 1))\n\n        self.verify_definition(ts, sample_sets, windows, f, ts.diversity, diversity)\n\n\nclass TestBranchDiversity(TestDiversity, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodeDiversity(TestDiversity, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteDiversity(TestDiversity, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# Segregating sites\n############################################\n\n\ndef site_segregating_sites(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False)\n        site_positions = [x.position for x in ts.sites()]\n        for i, X in enumerate(sample_sets):\n            set_X = set(X)\n            X_index = np.where(np.fromiter((s in set_X for s in samples), dtype=bool))[0]\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    num_alleles = len(set(haps[k, X_index]))\n                    out[j][i] += num_alleles - 1\n            if span_normalise:\n                out[j][i] /= end - begin\n    return out\n\n\ndef branch_segregating_sites(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, X in enumerate(sample_sets):\n            tX = len(X)\n            for tr in ts.trees(tracked_samples=X):\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = 0\n                for u in tr.nodes():\n                    nX = tr.num_tracked_samples(u)\n                    if nX > 0 and nX < tX:\n                        SS += tr.branch_length(u)\n                out[j][i] += SS * (\n                    min(end, tr.interval.right) - max(begin, tr.interval.left)\n                )\n            if span_normalise:\n                out[j][i] /= end - begin\n    return out\n\n\ndef node_segregating_sites(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    K = len(sample_sets)\n    out = np.zeros((len(windows) - 1, ts.num_nodes, K))\n    for k in range(K):\n        X = sample_sets[k]\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            tX = len(X)\n            S = np.zeros(ts.num_nodes)\n            for tr in ts.trees(tracked_samples=X):\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in tr.nodes():\n                    nX = tr.num_tracked_samples(u)\n                    SS[u] = (nX > 0) and (nX < tX)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            out[j, :, k] = S\n            if span_normalise:\n                out[j, :, k] /= end - begin\n    return out\n\n\ndef segregating_sites(ts, sample_sets, windows=None, mode=\"site\", span_normalise=True):\n    \"\"\"\n    Computes the density of segregating sites over the window specified.\n    \"\"\"\n    method_map = {\n        \"site\": site_segregating_sites,\n        \"node\": node_segregating_sites,\n        \"branch\": branch_segregating_sites,\n    }\n    return method_map[mode](\n        ts, sample_sets, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestSegregatingSites(StatsTestCase, SampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets(self, ts, sample_sets, windows):\n        n = np.array([len(x) for x in sample_sets])\n\n        # this works because sum_{i=1}^k (1-p_i) = k-1\n        def f(x):\n            return (x > 0) * (1 - x / n)\n\n        self.verify_definition(\n            ts, sample_sets, windows, f, ts.segregating_sites, segregating_sites\n        )\n\n\nclass TestBranchSegregatingSites(TestSegregatingSites, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodeSegregatingSites(TestSegregatingSites, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteSegregatingSites(TestSegregatingSites, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\nclass TestBranchSegregatingSitesProperties(StatsTestCase, TopologyExamplesMixin):\n    def verify(self, ts):\n        windows = ts.breakpoints(as_array=True)\n        # If we split by tree, this should always be equal to the total\n        # branch length. The definition of total_branch_length here is slightly\n        # tricky: it's the sum of all branch lengths that subtend between 0\n        # and n samples. This differs from the built-in total_branch_length\n        # function, which just sums that total branch length reachable from\n        # roots.\n        tbl_tree = [\n            sum(\n                tree.branch_length(u)\n                for u in tree.nodes()\n                if 0 < tree.num_samples(u) < ts.num_samples\n            )\n            for tree in ts.trees()\n        ]\n        # We must span_normalise, because these values are always weighted\n        # by the span, so we're effectively cancelling out this contribution\n        tbl = ts.segregating_sites([ts.samples()], windows=windows, mode=\"branch\")\n        tbl = tbl.reshape(tbl.shape[:-1])\n        self.assertArrayAlmostEqual(tbl_tree, tbl)\n\n\n############################################\n# Tajima's D\n############################################\n\n\ndef site_tajimas_d(ts, sample_sets, windows=None):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False)\n        site_positions = [x.position for x in ts.sites()]\n        n = np.array([len(X) for X in sample_sets])\n        for i, X in enumerate(sample_sets):\n            nn = n[i]\n            S = 0\n            T = 0\n            set_X = set(X)\n            X_index = np.where(np.fromiter((s in set_X for s in samples), dtype=bool))[0]\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    hX = haps[k, X_index]\n                    alleles = set(hX)\n                    num_alleles = len(alleles)\n                    n_alleles = [np.sum(hX == a) for a in alleles]\n                    S += num_alleles - 1\n                    for k in n_alleles:\n                        with suppress_division_by_zero_warning():\n                            T += k * (nn - k) / (nn * (nn - 1))\n            with suppress_division_by_zero_warning():\n                a1 = np.sum(1 / np.arange(1, nn))  # this is h in the main version\n                a2 = np.sum(1 / np.arange(1, nn) ** 2)  # this is g\n                b1 = (nn + 1) / (3 * (nn - 1))\n                b2 = 2 * (nn**2 + nn + 3) / (9 * nn * (nn - 1))\n                c1 = b1 - 1 / a1\n                c2 = b2 - (nn + 2) / (a1 * nn) + a2 / a1**2\n                e1 = c1 / a1  # this is a\n                e2 = c2 / (a1**2 + a2)  # this is b\n                out[j][i] = (T - S / a1) / np.sqrt(e1 * S + e2 * S * (S - 1))\n    return out\n\n\ndef tajimas_d(ts, sample_sets, windows=None, mode=\"site\", span_normalise=True):\n    method_map = {\"site\": site_tajimas_d}\n    return method_map[mode](\n        ts, sample_sets, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestTajimasD(StatsTestCase, SampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify(self, ts):\n        # only check per-site\n        for sample_sets in example_sample_sets(ts, min_size=1):\n            self.verify_persite_tajimas_d(ts, sample_sets)\n\n    def get_windows(self, ts):\n        yield \"sites\"\n        yield [0, ts.sequence_length]\n        yield np.arange(0, 1.1, 0.1) * ts.sequence_length\n\n    def verify_persite_tajimas_d(self, ts, sample_sets):\n        for windows in self.get_windows(ts):\n            sigma1 = ts.Tajimas_D(sample_sets, windows=windows, mode=self.mode)\n            sigma2 = site_tajimas_d(ts, sample_sets, windows=windows)\n            assert sigma1.shape == sigma2.shape\n            # floating point error can lead in strange cases to\n            # +/-inf in our implementation here and nan in the ts version\n            f1 = np.isfinite(sigma1)\n            f2 = np.isfinite(sigma2)\n            assert np.all(f1 == f2)\n            sigma2[~f2] = np.nan\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n\n\nclass TestSiteTajimasD(TestTajimasD, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# Y1\n############################################\n\n\ndef branch_Y1(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, X in enumerate(sample_sets):\n            S = 0\n            denom = np.float64(len(X) * (len(X) - 1) * (len(X) - 2))\n            has_trees = False\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                this_length = min(end, tr.interval.right) - max(begin, tr.interval.left)\n                for x in X:\n                    for y in set(X) - {x}:\n                        for z in set(X) - {x, y}:\n                            xy_mrca = tr.mrca(x, y)\n                            xz_mrca = tr.mrca(x, z)\n                            yz_mrca = tr.mrca(y, z)\n                            if xy_mrca == xz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # x y  z\n                                S += path_length(tr, x, yz_mrca) * this_length\n                            elif xy_mrca == yz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # y x  z\n                                S += path_length(tr, x, xz_mrca) * this_length\n                            elif xz_mrca == yz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # z x  y\n                                S += path_length(tr, x, xy_mrca) * this_length\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef site_Y1(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(sample_sets)))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False).T\n        site_positions = [x.position for x in ts.sites()]\n        for i, X in enumerate(sample_sets):\n            S = 0\n            denom = np.float64(len(X) * (len(X) - 1) * (len(X) - 2))\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for x in X:\n                        x_index = np.where(samples == x)[0][0]\n                        for y in set(X) - {x}:\n                            y_index = np.where(samples == y)[0][0]\n                            for z in set(X) - {x, y}:\n                                z_index = np.where(samples == z)[0][0]\n                                condition = (\n                                    haps[x_index, k] != haps[y_index, k]\n                                    and haps[x_index, k] != haps[z_index, k]\n                                )\n                                if condition:\n                                    # x|yz\n                                    S += 1\n            if site_in_window:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_Y1(ts, sample_sets, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    K = len(sample_sets)\n    out = np.zeros((len(windows) - 1, ts.num_nodes, K))\n    for k in range(K):\n        X = sample_sets[k]\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            tX = len(X)\n            denom = np.float64(tX * (tX - 1) * (tX - 2))\n            S = np.zeros(ts.num_nodes)\n            for tr in ts.trees(tracked_samples=X):\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in tr.nodes():\n                    # count number of paths above a but not b,c\n                    n = tr.num_tracked_samples(u)\n                    SS[u] += n * (tX - n) * (tX - n - 1) + (tX - n) * n * (n - 1)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, k] = S / denom\n            if span_normalise:\n                out[j, :, k] /= end - begin\n    return out\n\n\ndef Y1(ts, sample_sets, windows=None, mode=\"site\", span_normalise=True):\n    windows = ts.parse_windows(windows)\n    method_map = {\"site\": site_Y1, \"node\": node_Y1, \"branch\": branch_Y1}\n    return method_map[mode](\n        ts, sample_sets, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestY1(StatsTestCase, SampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets(self, ts, sample_sets, windows):\n        n = np.array([len(x) for x in sample_sets])\n        denom = n * (n - 1) * (n - 2)\n\n        def f(x):\n            with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                return x * (n - x) * (n - x - 1) / denom\n\n        self.verify_definition(ts, sample_sets, windows, f, ts.Y1, Y1)\n\n\nclass TestBranchY1(TestY1, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodeY1(TestY1, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteY1(TestY1, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# Divergence\n############################################\n\n\ndef site_divergence(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False).T\n        site_positions = [x.position for x in ts.sites()]\n        for i, (ix, iy) in enumerate(indexes):\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            denom = np.float64(len(X) * len(Y))\n            site_in_window = False\n            S = 0\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for x in X:\n                        x_index = np.where(samples == x)[0][0]\n                        for y in Y:\n                            y_index = np.where(samples == y)[0][0]\n                            if haps[x_index][k] != haps[y_index][k]:\n                                # x|y\n                                S += 1\n            if site_in_window:\n                with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef branch_divergence(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (ix, iy) in enumerate(indexes):\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            denom = np.float64(len(X) * len(Y))\n            has_trees = False\n            S = 0\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                SS = 0\n                for x in X:\n                    for y in Y:\n                        SS += path_length(tr, x, y)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_divergence(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    for i, (ix, iy) in enumerate(indexes):\n        X = sample_sets[ix]\n        Y = sample_sets[iy]\n        tX = len(X)\n        tY = len(Y)\n        denom = np.float64(len(X) * len(Y))\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            S = np.zeros(ts.num_nodes)\n            for t1, t2 in zip(ts.trees(tracked_samples=X), ts.trees(tracked_samples=Y)):\n                if t1.interval.right <= begin:\n                    continue\n                if t1.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in t1.nodes():\n                    # count number of pairwise paths going through u\n                    nX = t1.num_tracked_samples(u)\n                    nY = t2.num_tracked_samples(u)\n                    SS[u] += nX * (tY - nY) + (tX - nX) * nY\n                S += SS * (min(end, t1.interval.right) - max(begin, t1.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, i] = S / denom\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef divergence(\n    ts, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n):\n    \"\"\"\n    Computes average pairwise divergence between two random choices from x\n    over the window specified.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    if indexes is None:\n        indexes = [(0, 1)]\n    method_map = {\n        \"site\": site_divergence,\n        \"node\": node_divergence,\n        \"branch\": branch_divergence,\n    }\n    return method_map[mode](\n        ts, sample_sets, indexes=indexes, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestDivergence(StatsTestCase, TwoWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n\n        denom = np.array([n[i] * (n[j] - (i == j)) for i, j in indexes])\n\n        def f(x):\n            numer = np.array([(x[i] * (n[j] - x[j])) for i, j in indexes])\n            return numer / denom\n\n        self.verify_definition(\n            ts, sample_sets, indexes, windows, f, ts.divergence, divergence\n        )\n\n\nclass TestBranchDivergence(TestDivergence, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodeDivergence(TestDivergence, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteDivergence(TestDivergence, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# Genetic relatedness\n############################################\n\n\ndef site_genetic_relatedness(\n    ts,\n    sample_sets,\n    indexes,\n    windows=None,\n    span_normalise=True,\n    polarised=True,\n    proportion=True,\n    centre=True,\n):\n    if windows is None:\n        windows = [0.0, ts.sequence_length]\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    all_samples = np.array(list({u for s in sample_sets for u in s}))\n    denom = np.ones(len(windows))\n    if proportion:\n        denom = ts.segregating_sites(\n            sample_sets=all_samples,\n            windows=windows,\n            mode=\"site\",\n            span_normalise=span_normalise,\n        )\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for vv in zip(\n            *[\n                ts.variants(left=begin, right=end, samples=x, isolated_as_missing=False)\n                for x in sample_sets\n            ]\n        ):\n            ancestral_state = vv[0].site.ancestral_state\n            alleles = vv[0].alleles\n            ff = [v.frequencies() for v in vv]\n            for a in alleles:\n                mean_f = sum([f[a] for f in ff]) / len(ff)\n                for i, (ix, iy) in enumerate(indexes):\n                    fx = ff[ix][a]\n                    fy = ff[iy][a]\n                    if not (polarised and a == ancestral_state):\n                        if centre:\n                            out[j][i] += (fx - mean_f) * (fy - mean_f)\n                        else:\n                            out[j][i] += fx * fy\n        for i in range(len(indexes)):\n            with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                out[j][i] /= denom[j]\n            if span_normalise:\n                out[j][i] /= end - begin\n    return out\n\n\ndef branch_genetic_relatedness(\n    ts,\n    sample_sets,\n    indexes,\n    windows=None,\n    span_normalise=True,\n    polarised=True,\n    proportion=True,\n    centre=True,\n):\n    if windows is None:\n        windows = [0.0, ts.sequence_length]\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    all_samples = np.array(list({u for s in sample_sets for u in s}))\n    denom = np.ones(len(windows))\n    if proportion:\n        denom = ts.segregating_sites(\n            sample_sets=all_samples,\n            windows=windows,\n            mode=\"branch\",\n            span_normalise=span_normalise,\n        )\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for tr in ts.trees():\n            if tr.interval.right <= begin:\n                continue\n            if tr.interval.left >= end:\n                break\n            span = min(end, tr.interval.right) - max(begin, tr.interval.left)\n            # iterating over tr.nodes will miss nodes unreachable from samples\n            for v in range(ts.num_nodes):\n                area = tr.branch_length(v) * span\n                freqs = [\n                    sum([tr.is_descendant(u, v) for u in x]) / len(x)\n                    for x in sample_sets\n                ]\n                mean_freq = sum(freqs) / len(freqs)\n                for i, (ix, iy) in enumerate(indexes):\n                    fx = freqs[ix]\n                    fy = freqs[iy]\n                    if centre:\n                        out[j][i] += area * (fx - mean_freq) * (fy - mean_freq)\n                        if not polarised:\n                            out[j][i] += (\n                                area\n                                * (1 - fx - (1 - mean_freq))\n                                * (1 - fy - (1 - mean_freq))\n                            )\n                    else:\n                        out[j][i] += area * fx * fy\n                        if not polarised:\n                            out[j][i] += area * (1 - fx) * (1 - fy)\n        for i in range(len(indexes)):\n            with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                out[j][i] /= denom[j]\n            if span_normalise:\n                out[j][i] /= end - begin\n    return out\n\n\ndef node_genetic_relatedness(\n    ts,\n    sample_sets,\n    indexes,\n    windows=None,\n    span_normalise=True,\n    proportion=True,\n    centre=True,\n    polarised=True,\n):\n    if windows is None:\n        windows = [0.0, ts.sequence_length]\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    all_samples = np.array(list({u for s in sample_sets for u in s}))\n    denom = np.ones((len(windows), ts.num_nodes))\n    if proportion:\n        denom = ts.segregating_sites(\n            sample_sets=all_samples,\n            windows=windows,\n            mode=\"node\",\n            span_normalise=span_normalise,\n        )\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for tr in ts.trees():\n            if tr.interval.right <= begin:\n                continue\n            if tr.interval.left >= end:\n                break\n            span = min(end, tr.interval.right) - max(begin, tr.interval.left)\n            for v in range(ts.num_nodes):\n                freqs = [\n                    sum([tr.is_descendant(u, v) for u in x]) / len(x)\n                    for x in sample_sets\n                ]\n                mean_freq = sum(freqs) / len(freqs)\n                for i, (ix, iy) in enumerate(indexes):\n                    fx = freqs[ix]\n                    fy = freqs[iy]\n                    if centre:\n                        out[j][v][i] += span * (fx - mean_freq) * (fy - mean_freq)\n                        if not polarised:\n                            out[j][v][i] += (\n                                span\n                                * (1 - fx - (1 - mean_freq))\n                                * (1 - fy - (1 - mean_freq))\n                            )\n                    else:\n                        out[j][v][i] += span * fx * fy\n                        if not polarised:\n                            out[j][v][i] += span * (1 - fx) * (1 - fy)\n        for i in range(len(indexes)):\n            for v in ts.nodes():\n                iV = v.id\n                with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                    out[j, iV, i] /= denom[j, iV]\n                if span_normalise:\n                    out[j, iV, i] /= end - begin\n    return out\n\n\ndef genetic_relatedness(\n    ts,\n    sample_sets,\n    indexes=None,\n    windows=None,\n    mode=\"site\",\n    span_normalise=True,\n    proportion=True,\n    centre=True,\n    polarised=True,\n):\n    \"\"\"\n    Computes genetic relatedness between two random choices from x\n    over the window specified.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    if indexes is None:\n        indexes = [(0, 1)]\n    method_map = {\n        \"site\": site_genetic_relatedness,\n        \"node\": node_genetic_relatedness,\n        \"branch\": branch_genetic_relatedness,\n    }\n    return method_map[mode](\n        ts,\n        sample_sets,\n        indexes=indexes,\n        windows=windows,\n        span_normalise=span_normalise,\n        polarised=polarised,\n        proportion=proportion,\n        centre=centre,\n    )\n\n\nclass TestGeneticRelatedness(StatsTestCase, TwoWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_definition(\n        self,\n        ts,\n        sample_sets,\n        indexes,\n        windows,\n        summary_func,\n        ts_method,\n        definition,\n        proportion,\n        polarised=True,\n        centre=True,\n    ):\n        def wrapped_summary_func(x):\n            with suppress_division_by_zero_warning():\n                return summary_func(x)\n\n        W = np.array([[u in A for A in sample_sets] for u in ts.samples()], dtype=float)\n        # Determine output_dim of the function\n        M = len(wrapped_summary_func(W[0]))\n        denom = 1\n        if proportion:\n            all_samples = list({u for s in sample_sets for u in s})\n            denom = ts.segregating_sites(\n                sample_sets=[all_samples], windows=windows, mode=self.mode\n            )\n\n        with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n            sigma1 = (\n                ts.general_stat(\n                    W,\n                    wrapped_summary_func,\n                    M,\n                    windows,\n                    mode=self.mode,\n                    strict=centre,\n                    polarised=polarised,\n                )\n                / denom\n            )\n        sigma2 = ts_method(\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=self.mode,\n            proportion=proportion,\n            centre=centre,\n            polarised=polarised,\n        )\n        sigma3 = definition(\n            ts,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=self.mode,\n            proportion=proportion,\n            centre=centre,\n            polarised=polarised,\n        )\n        assert sigma1.shape == sigma2.shape\n        assert sigma1.shape == sigma3.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3)\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n\n        def f_noncentred(x):\n            p = x / n\n            return np.array([p[i] * p[j] for i, j in indexes])\n\n        def f_centred(x):\n            p = x / n\n            mp = np.mean(p)\n            return np.array([(p[i] - mp) * (p[j] - mp) for i, j in indexes])\n\n        for proportion in [True, False]:\n            self.verify_definition(\n                ts,\n                sample_sets,\n                indexes,\n                windows,\n                f_centred,\n                ts.genetic_relatedness,\n                genetic_relatedness,\n                proportion,\n            )\n\n        for centre, polarised in [\n            (True, True),\n            (False, True),\n            (True, False),\n            (False, False),\n        ]:\n            f = f_centred if centre else f_noncentred\n            self.verify_definition(\n                ts,\n                sample_sets,\n                indexes,\n                windows,\n                f,\n                ts.genetic_relatedness,\n                genetic_relatedness,\n                proportion=False,\n                centre=centre,\n                polarised=polarised,\n            )\n\n    @pytest.mark.parametrize(\"proportion\", [None, True, False])\n    def test_shapes(self, proportion):\n        # exclude this test in the parent class\n        if self.mode is None:\n            return\n        ts = msprime.sim_ancestry(\n            8,\n            random_seed=1,\n            end_time=10,\n            sequence_length=10,\n            population_size=10,\n            recombination_rate=0.02,\n        )\n        ts = msprime.sim_mutations(ts, rate=0.01, random_seed=2)\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3]],\n            indexes=None,\n            windows=None,\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (ts.num_nodes,)\n        else:\n            assert x.shape == ()\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3]],\n            indexes=[(0, 1)],\n            windows=None,\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (ts.num_nodes, 1)\n        else:\n            assert x.shape == (1,)\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3]],\n            indexes=[(0, 1)],\n            windows=[0, 10],\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (1, ts.num_nodes, 1)\n        else:\n            assert x.shape == (1, 1)\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3]],\n            indexes=[(0, 1)],\n            windows=[0, 5, 10],\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (2, ts.num_nodes, 1)\n        else:\n            assert x.shape == (2, 1)\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3]],\n            indexes=None,\n            windows=[0, 5, 10],\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (2, ts.num_nodes)\n        else:\n            assert x.shape == (2,)\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3], [4, 5]],\n            indexes=[(0, 1), (1, 2)],\n            windows=[0, 5, 9, 10],\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (3, ts.num_nodes, 2)\n        else:\n            assert x.shape == (3, 2)\n        x = ts.genetic_relatedness(\n            sample_sets=[[0, 1, 2], [3], [4, 5]],\n            indexes=[(0, 1), (1, 2)],\n            windows=None,\n            mode=self.mode,\n            proportion=proportion,\n        )\n        if self.mode == \"node\":\n            assert x.shape == (ts.num_nodes, 2)\n        else:\n            assert x.shape == (2,)\n\n    def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):\n        if self.mode is None:\n            return\n        # Test for issue #3055 - self-comparisons with single sample set\n        ts = ts_12_highrecomb_fixture\n        # Single sample set with self-comparison\n        result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode=self.mode)\n        result_shape = (ts.num_nodes, 1) if self.mode == \"node\" else (1,)\n        assert result.shape == result_shape\n        # Should work for multiple samples in single set too\n        result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode=self.mode)\n        assert result.shape == result_shape\n        # Test with multiple self-comparisons\n        result = ts.genetic_relatedness(\n            [[0, 1], [2, 3]], indexes=[(0, 0), (1, 1)], mode=self.mode\n        )\n        result_shape = (ts.num_nodes, 2) if self.mode == \"node\" else (2,)\n        assert result.shape == result_shape\n\n    def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):\n        if self.mode is None:\n            return\n        # Test that invalid indexes raise ValueError with single sample set\n        ts = ts_12_highrecomb_fixture\n        # Index out of bounds (only have 1 sample set, but trying to access index 1)\n        with pytest.raises(\n            exceptions.LibraryError, match=\"TSK_ERR_BAD_SAMPLE_SET_INDEX\"\n        ):\n            ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode=self.mode)\n        # Negative index\n        with pytest.raises(\n            exceptions.LibraryError, match=\"TSK_ERR_BAD_SAMPLE_SET_INDEX\"\n        ):\n            ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode=self.mode)\n        # Both indexes out of bounds\n        with pytest.raises(\n            exceptions.LibraryError, match=\"TSK_ERR_BAD_SAMPLE_SET_INDEX\"\n        ):\n            ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode=self.mode)\n\n\nclass TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):\n    mode = \"branch\"\n\n    @pytest.mark.parametrize(\"polarised\", [True, False])\n    def test_simple_tree_noncentred(self, polarised):\n        # 2.00┊   4   ┊\n        #     ┊ ┏━┻┓  ┊\n        # 1.00┊ ┃  3  ┊\n        #     ┊ ┃ ┏┻┓ ┊\n        # 0.00┊ 0 1 2 ┊\n        #     0       1\n        ts = tskit.Tree.generate_balanced(3).tree_sequence\n        indexes = [(0, 0), (0, 1), (1, 1), (1, 2), (2, 2)]\n        sample_sets = [[0], [1], [2]]\n        if polarised:\n            A = np.array(\n                [\n                    2,  # (0, 0)\n                    0,  # (0, 1)\n                    2,  # (1, 1)\n                    1,  # (1, 2),\n                    2,  # (2, 2)\n                ]\n            )\n        else:\n            A = np.array(\n                [\n                    (2 + 3),  # (0, 0)\n                    (0 + 1),  # (0, 1)\n                    (2 + 3),  # (1, 1)\n                    (1 + 2),  # (1, 2),\n                    (2 + 3),  # (2, 2)\n                ]\n            )\n        B = branch_genetic_relatedness(\n            ts,\n            sample_sets=sample_sets,\n            indexes=indexes,\n            polarised=polarised,\n            proportion=False,\n            centre=False,\n        ).squeeze()\n        C = ts.genetic_relatedness(\n            sample_sets=sample_sets,\n            indexes=indexes,\n            mode=\"branch\",\n            polarised=polarised,\n            proportion=False,\n            centre=False,\n        ).squeeze()\n        self.assertArrayAlmostEqual(A, B)\n        self.assertArrayAlmostEqual(A, C)\n\n\nclass TestNodeGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteGeneticRelatedness(TestGeneticRelatedness, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n    def test_match_K_c0(self):\n        # This test checks that ts.genetic_relatedness() matches K_c0\n        # from Speed & Balding (2014) https://www.nature.com/articles/nrg3821\n        ts = msprime.simulate(\n            10, mutation_rate=0.01, length=100, recombination_rate=0.01, random_seed=23\n        )\n        samples = [u for u in ts.samples()]\n        sample_sets = [[0, 1], [2, 3], [4, 5]]\n        all_samples = list({u for s in sample_sets for u in s})\n        sample_ind = [samples.index(x) for x in all_samples]\n        indexes = [(0, 0), (0, 1), (0, 2), (1, 1), (1, 2), (2, 2)]\n        A = ts.genetic_relatedness(\n            sample_sets, indexes=indexes, mode=\"site\", span_normalise=False\n        )\n        # Genotype covariance as in Speed and Balding\n        G = ts.genotype_matrix().T\n        G = G[sample_ind]\n        G_centered = G - G.mean(axis=0)\n        B = np.zeros(len(indexes))\n        for i, (ix, iy) in enumerate(indexes):\n            x1 = sample_sets[ix][0]\n            x2 = sample_sets[ix][1]\n            y1 = sample_sets[iy][0]\n            y2 = sample_sets[iy][1]\n            B[i] = (\n                (G_centered[x1] + G_centered[x2])\n                @ (G_centered[y1] + G_centered[y2])\n                / ts.segregating_sites(sample_sets=all_samples, span_normalise=False)\n            )\n        self.assertArrayAlmostEqual(4 * A, B)\n\n\n############################################\n# Genetic relatedness weighted\n############################################\n\n\ndef genetic_relatedness_matrix(\n    ts, sample_sets, windows=None, mode=\"site\", polarised=True, centre=True\n):\n    n = len(sample_sets)\n    indexes = [\n        (n1, n2) for n1, n2 in itertools.combinations_with_replacement(range(n), 2)\n    ]\n    if windows is None:\n        if mode == \"node\":\n            n_nodes = ts.num_nodes\n            K = np.zeros((n_nodes, n, n))\n            out = ts.genetic_relatedness(\n                sample_sets,\n                indexes,\n                mode=mode,\n                proportion=False,\n                span_normalise=True,\n                polarised=polarised,\n                centre=centre,\n            )\n            for node in range(n_nodes):\n                this_K = np.zeros((n, n))\n                this_K[np.triu_indices(n)] = out[node, :]\n                this_K = this_K + np.triu(this_K, 1).transpose()\n                K[node, :, :] = this_K\n        else:\n            K = np.zeros((n, n))\n            K[np.triu_indices(n)] = ts.genetic_relatedness(\n                sample_sets,\n                indexes,\n                mode=mode,\n                proportion=False,\n                span_normalise=True,\n                centre=centre,\n                polarised=polarised,\n            )\n            K = K + np.triu(K, 1).transpose()\n    else:\n        windows = ts.parse_windows(windows)\n        n_windows = len(windows) - 1\n        out = ts.genetic_relatedness(\n            sample_sets,\n            indexes,\n            mode=mode,\n            windows=windows,\n            proportion=False,\n            span_normalise=True,\n            polarised=polarised,\n            centre=centre,\n        )\n        if mode == \"node\":\n            n_nodes = ts.num_nodes\n            K = np.zeros((n_windows, n_nodes, n, n))\n            for win in range(n_windows):\n                for node in range(n_nodes):\n                    K_this = np.zeros((n, n))\n                    K_this[np.triu_indices(n)] = out[win, node, :]\n                    K_this = K_this + np.triu(K_this, 1).transpose()\n                    K[win, node, :, :] = K_this\n        else:\n            K = np.zeros((n_windows, n, n))\n            for win in range(n_windows):\n                K_this = np.zeros((n, n))\n                K_this[np.triu_indices(n)] = out[win, :]\n                K_this = K_this + np.triu(K_this, 1).transpose()\n                K[win, :, :] = K_this\n    return K\n\n\ndef genetic_relatedness_weighted(\n    ts, W, indexes, windows=None, mode=\"site\", polarised=True, centre=True\n):\n    if centre:\n        W_mean = W.mean(axis=0)\n        W = W - W_mean\n    sample_sets = [[u] for u in ts.samples()]\n    K = genetic_relatedness_matrix(\n        ts, sample_sets, windows=windows, mode=mode, centre=centre, polarised=polarised\n    )\n    n_indexes = len(indexes)\n    n_nodes = ts.num_nodes\n    if windows is None:\n        if mode == \"node\":\n            out = np.zeros((n_nodes, n_indexes))\n        else:\n            out = np.zeros(n_indexes)\n    else:\n        windows = ts.parse_windows(windows)\n        n_windows = len(windows) - 1\n        if mode == \"node\":\n            out = np.zeros((n_windows, n_nodes, n_indexes))\n        else:\n            out = np.zeros((n_windows, n_indexes))\n    for pair in range(n_indexes):\n        i1 = indexes[pair][0]\n        i2 = indexes[pair][1]\n        if windows is None:\n            if mode == \"node\":\n                for node in range(n_nodes):\n                    this_K = K[node, :, :]\n                    out[node, pair] = W[:, i1] @ this_K @ W[:, i2]\n            else:\n                out[pair] = W[:, i1] @ K @ W[:, i2]\n        else:\n            for win in range(n_windows):\n                if mode == \"node\":\n                    for node in range(n_nodes):\n                        this_K = K[win, node, :, :]\n                        out[win, node, pair] = W[:, i1] @ this_K @ W[:, i2]\n                else:\n                    this_K = K[win, :, :]\n                    out[win, pair] = W[:, i1] @ this_K @ W[:, i2]\n    return out\n\n\ndef example_index_pairs(weights):\n    assert weights.shape[1] >= 2\n    yield [(0, 1)]\n    yield [(1, 0), (0, 1)]\n    if weights.shape[1] > 2:\n        yield [(0, 1), (1, 2), (0, 2)]\n\n\nclass TestGeneticRelatednessWeighted(StatsTestCase, WeightStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_definition(\n        self,\n        ts,\n        W,\n        indexes,\n        windows,\n        summary_func,\n        ts_method,\n        definition,\n        polarised=True,\n        centre=True,\n    ):\n        # Determine output_dim of the function\n        M = len(indexes)\n\n        sigma1 = ts.general_stat(\n            W,\n            summary_func,\n            M,\n            windows,\n            mode=self.mode,\n            span_normalise=True,\n            strict=centre,\n            polarised=polarised,\n        )\n        sigma2 = general_stat(\n            ts,\n            W,\n            summary_func,\n            windows,\n            mode=self.mode,\n            span_normalise=True,\n            polarised=polarised,\n        )\n\n        sigma3 = ts_method(\n            W,\n            indexes=indexes,\n            windows=windows,\n            mode=self.mode,\n            polarised=polarised,\n            centre=centre,\n        )\n        sigma4 = definition(\n            ts,\n            W,\n            indexes=indexes,\n            windows=windows,\n            mode=self.mode,\n            polarised=polarised,\n            centre=centre,\n        )\n        assert sigma1.shape == sigma2.shape\n        assert sigma1.shape == sigma3.shape\n        assert sigma1.shape == sigma4.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3)\n        self.assertArrayAlmostEqual(sigma1, sigma4)\n\n    def verify(self, ts):\n        for W, windows in subset_combos(\n            self.example_weights(ts, min_size=2), example_windows(ts), p=0.1\n        ):\n            for indexes in example_index_pairs(W):\n                self.verify_weighted_stat(ts, W, indexes, windows)\n\n    def verify_weighted_stat(self, ts, W, indexes, windows):\n        n = W.shape[0]\n        K = W.shape[1]\n        WW = np.column_stack([W, np.ones(n) / n])\n        W_sum = WW.sum(axis=0)\n\n        def f_noncentred(x):\n            return np.array([x[i] * x[j] for i, j in indexes])\n\n        def f_centred(x):\n            pn = x[K]\n            return np.array(\n                [(x[i] - W_sum[i] * pn) * (x[j] - W_sum[j] * pn) for i, j in indexes]\n            )\n\n        for centre, polarised in [\n            (True, True),\n            (False, True),\n            (True, False),\n            (False, False),\n        ]:\n            f = f_centred if centre else f_noncentred\n            self.verify_definition(\n                ts,\n                WW,\n                indexes,\n                windows,\n                f,\n                ts.genetic_relatedness_weighted,\n                genetic_relatedness_weighted,\n                centre=centre,\n                polarised=polarised,\n            )\n\n\nclass TestBranchGeneticRelatednessWeighted(\n    TestGeneticRelatednessWeighted, TopologyExamplesMixin\n):\n    mode = \"branch\"\n\n\nclass TestNodeGeneticRelatednessWeighted(\n    TestGeneticRelatednessWeighted, TopologyExamplesMixin\n):\n    mode = \"node\"\n\n\nclass TestSiteGeneticRelatednessWeighted(\n    TestGeneticRelatednessWeighted, MutatedTopologyExamplesMixin\n):\n    mode = \"site\"\n\n\n# NOTE: these classes don't follow the same (anti)-patterns as used elsewhere as they\n# were added in several years afterwards.\n\n\nclass TestGeneticRelatednessWeightedSimpleExamples:\n    # Values verified against the simple implementations above\n    site_value = 22.24\n    branch_value = 29.44\n\n    def fixture(self):\n        ts = tskit.Tree.generate_balanced(5).tree_sequence\n        # Abitrary weights that give non-zero results\n        W = np.zeros((ts.num_samples, 2))\n        W[0, :] = 1\n        W[1, :] = 2\n        return tsutil.insert_branch_sites(ts), W\n\n    def test_no_arguments_site(self):\n        ts, W = self.fixture()\n        X = ts.genetic_relatedness_weighted(W, mode=\"site\")\n        assert X.shape == tuple()\n        nt.assert_almost_equal(X, self.site_value)\n\n    def test_windows_site(self):\n        ts, W = self.fixture()\n        X = ts.genetic_relatedness_weighted(W, mode=\"site\", windows=[0, 1 - 1e-12, 1])\n        assert X.shape == (2,)\n        nt.assert_almost_equal(X[0], self.site_value)\n        nt.assert_almost_equal(X[1], 0)\n\n    def test_no_arguments_branch(self):\n        ts, W = self.fixture()\n        X = ts.genetic_relatedness_weighted(W, mode=\"branch\")\n        assert X.shape == tuple()\n        nt.assert_almost_equal(X, self.branch_value)\n\n    def test_windows_branch(self):\n        ts, W = self.fixture()\n        X = ts.genetic_relatedness_weighted(W, mode=\"branch\", windows=[0, 0.5, 1])\n        assert X.shape == (2,)\n        nt.assert_almost_equal(X, self.branch_value)\n\n    def test_indexes_1D(self):\n        ts, W = self.fixture()\n        indexes = [0, 1]\n        X = ts.genetic_relatedness_weighted(W, indexes, mode=\"branch\")\n        assert X.shape == tuple()\n        nt.assert_almost_equal(X, self.branch_value)\n\n    def test_indexes_2D(self):\n        ts, W = self.fixture()\n        indexes = [[0, 1]]\n        X = ts.genetic_relatedness_weighted(W, indexes, mode=\"branch\")\n        assert X.shape == (1,)\n        nt.assert_almost_equal(X, self.branch_value)\n\n    def test_indexes_2D_windows(self):\n        ts, W = self.fixture()\n        indexes = [[0, 1], [0, 1]]\n        X = ts.genetic_relatedness_weighted(\n            W, indexes, windows=[0, 0.5, 1], mode=\"branch\"\n        )\n        assert X.shape == (2, 2)\n        nt.assert_almost_equal(X, self.branch_value)\n\n\nclass TestGeneticRelatednessWeightedErrors:\n    def ts(self):\n        return tskit.Tree.generate_balanced(3).tree_sequence\n\n    @pytest.mark.parametrize(\"W\", [[0], np.array([0]), np.zeros(100)])\n    def test_bad_weight_size(self, W):\n        with pytest.raises(ValueError, match=\"First trait dimension\"):\n            self.ts().genetic_relatedness_weighted(W)\n\n    @pytest.mark.parametrize(\"cols\", [1, 3])\n    def test_no_indexes_with_non_2_cols(self, cols):\n        ts = self.ts()\n        W = np.zeros((ts.num_samples, cols))\n        with pytest.raises(ValueError, match=\"Must specify indexes\"):\n            ts.genetic_relatedness_weighted(W)\n\n    @pytest.mark.parametrize(\"indexes\", [[], [[0]], [[0, 0, 0]], [[[0], [0], [0]]]])\n    def test_bad_index_shapes(self, indexes):\n        ts = self.ts()\n        W = np.zeros((ts.num_samples, 2))\n        with pytest.raises(ValueError, match=\"Indexes must be convertable to a 2D\"):\n            ts.genetic_relatedness_weighted(W, indexes=indexes)\n\n\n############################################\n# Fst\n############################################\n\n\ndef single_site_Fst(ts, sample_sets, indexes):\n    \"\"\"\n    Compute single-site Fst, which between two groups with frequencies p and q is\n      1 - 2 * (p (1-p) + q(1-q)) / ( p(1-p) + q(1-q) + p(1-q) + q(1-p) )\n    or in the multiallelic case, replacing p(1-p) with the sum over alleles of p(1-p),\n    and adjusted for sampling without replacement.\n    \"\"\"\n    # TODO: what to do in this case?\n    if ts.num_sites == 0:\n        out = np.array([np.repeat(np.nan, len(indexes))])\n        return out\n    out = np.zeros((ts.num_sites, len(indexes)))\n    samples = ts.samples()\n    # TODO deal with missing data properly.\n    for j, v in enumerate(ts.variants(isolated_as_missing=False)):\n        for i, (ix, iy) in enumerate(indexes):\n            g = v.genotypes\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            gX = [a for k, a in zip(samples, g) if k in X]\n            gY = [a for k, a in zip(samples, g) if k in Y]\n            nX = len(X)\n            nY = len(Y)\n            dX = dY = dXY = 0\n            for a in set(g):\n                fX = np.sum(gX == a)\n                fY = np.sum(gY == a)\n                with suppress_division_by_zero_warning():\n                    dX += fX * (nX - fX) / (nX * (nX - 1))\n                    dY += fY * (nY - fY) / (nY * (nY - 1))\n                    dXY += (fX * (nY - fY) + (nX - fX) * fY) / (2 * nX * nY)\n            with suppress_division_by_zero_warning():\n                out[j][i] = 1 - 2 * (dX + dY) / (dX + dY + 2 * dXY)\n    return out\n\n\nclass TestFst(StatsTestCase, TwoWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify(self, ts):\n        # only check per-site\n        for sample_sets in example_sample_sets(ts, min_size=2):\n            for indexes in example_sample_set_index_pairs(sample_sets):\n                self.verify_persite_Fst(ts, sample_sets, indexes)\n\n    def verify_persite_Fst(self, ts, sample_sets, indexes):\n        sigma1 = ts.Fst(\n            sample_sets,\n            indexes=indexes,\n            windows=\"sites\",\n            mode=self.mode,\n            span_normalise=False,\n        )\n        sigma2 = single_site_Fst(ts, sample_sets, indexes)\n        assert sigma1.shape == sigma2.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n\n\nclass FstInterfaceMixin:\n    def test_interface(self):\n        ts = msprime.simulate(10, mutation_rate=0.0)\n        sample_sets = [[0, 1, 2], [6, 7], [4]]\n        with pytest.raises(ValueError):\n            ts.Fst(sample_sets, mode=self.mode)\n        with pytest.raises(ValueError):\n            ts.Fst(sample_sets, indexes=[(0, 1, 2), (3, 4, 5)], mode=self.mode)\n        with pytest.raises(tskit.LibraryError):\n            ts.Fst(sample_sets, indexes=[(0, 1), (0, 20)])\n        sigma1 = ts.Fst(sample_sets, indexes=[(0, 1)], mode=self.mode)\n        sigma2 = ts.Fst(sample_sets, indexes=[(0, 1), (0, 2), (1, 2)], mode=self.mode)\n        self.assertArrayAlmostEqual(sigma1[..., 0], sigma2[..., 0])\n\n\nclass TestSiteFst(TestFst, MutatedTopologyExamplesMixin, FstInterfaceMixin):\n    mode = \"site\"\n\n\n# Since Fst is defined using diversity and divergence, we don't seriously\n# test it for correctness for node and branch, and only test the interface.\n\n\nclass TestNodeFst(StatsTestCase, FstInterfaceMixin):\n    mode = \"node\"\n\n\nclass TestBranchFst(StatsTestCase, FstInterfaceMixin):\n    mode = \"node\"\n\n\n############################################\n# Y2\n############################################\n\n\ndef branch_Y2(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (ix, iy) in enumerate(indexes):\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            denom = np.float64(len(X) * len(Y) * (len(Y) - 1))\n            has_trees = False\n            S = 0\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                this_length = min(end, tr.interval.right) - max(begin, tr.interval.left)\n                for x in X:\n                    for y in Y:\n                        for z in set(Y) - {y}:\n                            xy_mrca = tr.mrca(x, y)\n                            xz_mrca = tr.mrca(x, z)\n                            yz_mrca = tr.mrca(y, z)\n                            if xy_mrca == xz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # x y  z\n                                S += path_length(tr, x, yz_mrca) * this_length\n                            elif xy_mrca == yz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # y x  z\n                                S += path_length(tr, x, xz_mrca) * this_length\n                            elif xz_mrca == yz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # z x  y\n                                S += path_length(tr, x, xy_mrca) * this_length\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef site_Y2(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    samples = ts.samples()\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False).T\n        site_positions = [x.position for x in ts.sites()]\n        for i, (ix, iy) in enumerate(indexes):\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            denom = np.float64(len(X) * len(Y) * (len(Y) - 1))\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for x in X:\n                        x_index = np.where(samples == x)[0][0]\n                        for y in Y:\n                            y_index = np.where(samples == y)[0][0]\n                            for z in set(Y) - {y}:\n                                z_index = np.where(samples == z)[0][0]\n                                condition = (\n                                    haps[x_index, k] != haps[y_index, k]\n                                    and haps[x_index, k] != haps[z_index, k]\n                                )\n                                if condition:\n                                    # x|yz\n                                    S += 1\n            if site_in_window:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_Y2(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    for i, (ix, iy) in enumerate(indexes):\n        X = sample_sets[ix]\n        Y = sample_sets[iy]\n        tX = len(X)\n        tY = len(Y)\n        denom = np.float64(tX * tY * (tY - 1))\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            S = np.zeros(ts.num_nodes)\n            for t1, t2 in zip(ts.trees(tracked_samples=X), ts.trees(tracked_samples=Y)):\n                if t1.interval.right <= begin:\n                    continue\n                if t1.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in t1.nodes():\n                    # count number of pairwise paths going through u\n                    nX = t1.num_tracked_samples(u)\n                    nY = t2.num_tracked_samples(u)\n                    SS[u] += nX * (tY - nY) * (tY - nY - 1) + (tX - nX) * nY * (nY - 1)\n                S += SS * (min(end, t1.interval.right) - max(begin, t1.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, i] = S / denom\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef Y2(ts, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True):\n    windows = ts.parse_windows(windows)\n    if indexes is None:\n        indexes = [(0, 1)]\n    method_map = {\"site\": site_Y2, \"node\": node_Y2, \"branch\": branch_Y2}\n    return method_map[mode](\n        ts, sample_sets, indexes=indexes, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestY2(StatsTestCase, TwoWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n\n        denom = np.array([n[i] * n[j] * (n[j] - 1) for i, j in indexes])\n\n        def f(x):\n            numer = np.array(\n                [(x[i] * (n[j] - x[j]) * (n[j] - x[j] - 1)) for i, j in indexes]\n            )\n            return numer / denom\n\n        self.verify_definition(ts, sample_sets, indexes, windows, f, ts.Y2, Y2)\n\n\nclass TestBranchY2(TestY2, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodeY2(TestY2, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteY2(TestY2, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# Y3\n############################################\n\n\ndef branch_Y3(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (ix, iy, iz) in enumerate(indexes):\n            S = 0\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            Z = sample_sets[iz]\n            denom = np.float64(len(X) * len(Y) * len(Z))\n            has_trees = False\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                this_length = min(end, tr.interval.right) - max(begin, tr.interval.left)\n                for x in X:\n                    for y in Y:\n                        for z in Z:\n                            xy_mrca = tr.mrca(x, y)\n                            xz_mrca = tr.mrca(x, z)\n                            yz_mrca = tr.mrca(y, z)\n                            if xy_mrca == xz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # x y  z\n                                S += path_length(tr, x, yz_mrca) * this_length\n                            elif xy_mrca == yz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # y x  z\n                                S += path_length(tr, x, xz_mrca) * this_length\n                            elif xz_mrca == yz_mrca:\n                                #   /\\\n                                #  / /\\\n                                # z x  y\n                                S += path_length(tr, x, xy_mrca) * this_length\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef site_Y3(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    haps = ts.genotype_matrix(isolated_as_missing=False).T\n    site_positions = ts.tables.sites.position\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (ix, iy, iz) in enumerate(indexes):\n            X = sample_sets[ix]\n            Y = sample_sets[iy]\n            Z = sample_sets[iz]\n            denom = np.float64(len(X) * len(Y) * len(Z))\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for x in X:\n                        x_index = np.where(samples == x)[0][0]\n                        for y in Y:\n                            y_index = np.where(samples == y)[0][0]\n                            for z in Z:\n                                z_index = np.where(samples == z)[0][0]\n                                if (haps[x_index][k] != haps[y_index][k]) and (\n                                    haps[x_index][k] != haps[z_index][k]\n                                ):\n                                    # x|yz\n                                    with suppress_division_by_zero_warning():\n                                        S += 1\n            if site_in_window:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_Y3(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    for i, (ix, iy, iz) in enumerate(indexes):\n        X = sample_sets[ix]\n        Y = sample_sets[iy]\n        Z = sample_sets[iz]\n        tX = len(X)\n        tY = len(Y)\n        tZ = len(Z)\n        denom = np.float64(tX * tY * tZ)\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            S = np.zeros(ts.num_nodes)\n            for t1, t2, t3 in zip(\n                ts.trees(tracked_samples=X),\n                ts.trees(tracked_samples=Y),\n                ts.trees(tracked_samples=Z),\n            ):\n                if t1.interval.right <= begin:\n                    continue\n                if t1.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in t1.nodes():\n                    # count number of pairwise paths going through u\n                    nX = t1.num_tracked_samples(u)\n                    nY = t2.num_tracked_samples(u)\n                    nZ = t3.num_tracked_samples(u)\n                    SS[u] += nX * (tY - nY) * (tZ - nZ) + (tX - nX) * nY * nZ\n                S += SS * (min(end, t1.interval.right) - max(begin, t1.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, i] = S / denom\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef Y3(ts, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True):\n    windows = ts.parse_windows(windows)\n    if indexes is None:\n        indexes = [(0, 1, 2)]\n    method_map = {\"site\": site_Y3, \"node\": node_Y3, \"branch\": branch_Y3}\n    return method_map[mode](\n        ts, sample_sets, indexes=indexes, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass TestY3(StatsTestCase, ThreeWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n        denom = np.array([n[i] * n[j] * n[k] for i, j, k in indexes])\n\n        def f(x):\n            numer = np.array(\n                [x[i] * (n[j] - x[j]) * (n[k] - x[k]) for i, j, k in indexes]\n            )\n            return numer / denom\n\n        self.verify_definition(ts, sample_sets, indexes, windows, f, ts.Y3, Y3)\n\n\nclass TestBranchY3(TestY3, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodeY3(TestY3, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSiteY3(TestY3, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# f2\n############################################\n\n\ndef branch_f2(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    # this is f4(A,B;A,B) but drawing distinct samples from A and B\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (ia, ib) in enumerate(indexes):\n            A = sample_sets[ia]\n            B = sample_sets[ib]\n            denom = np.float64(len(A) * (len(A) - 1) * len(B) * (len(B) - 1))\n            has_trees = False\n            S = 0\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                this_length = min(end, tr.interval.right) - max(begin, tr.interval.left)\n                SS = 0\n                for a in A:\n                    for b in B:\n                        for c in set(A) - {a}:\n                            for d in set(B) - {b}:\n                                with suppress_division_by_zero_warning():\n                                    SS += path_length(tr, tr.mrca(a, c), tr.mrca(b, d))\n                                    SS -= path_length(tr, tr.mrca(a, d), tr.mrca(b, c))\n                S += SS * this_length\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef site_f2(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    samples = ts.samples()\n    haps = ts.genotype_matrix(isolated_as_missing=False).T\n    site_positions = ts.tables.sites.position\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (iA, iB) in enumerate(indexes):\n            A = sample_sets[iA]\n            B = sample_sets[iB]\n            denom = np.float64(len(A) * (len(A) - 1) * len(B) * (len(B) - 1))\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for a in A:\n                        a_index = np.where(samples == a)[0][0]\n                        for b in B:\n                            b_index = np.where(samples == b)[0][0]\n                            for c in set(A) - {a}:\n                                c_index = np.where(samples == c)[0][0]\n                                for d in set(B) - {b}:\n                                    d_index = np.where(samples == d)[0][0]\n                                    if (\n                                        (haps[a_index][k] == haps[c_index][k])\n                                        and (haps[a_index][k] != haps[d_index][k])\n                                        and (haps[a_index][k] != haps[b_index][k])\n                                    ):\n                                        # ac|bd\n                                        S += 1\n                                    elif (\n                                        (haps[a_index][k] == haps[d_index][k])\n                                        and (haps[a_index][k] != haps[c_index][k])\n                                        and (haps[a_index][k] != haps[b_index][k])\n                                    ):\n                                        # ad|bc\n                                        S -= 1\n            if site_in_window:\n                with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_f2(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    for i, (ia, ib) in enumerate(indexes):\n        A = sample_sets[ia]\n        B = sample_sets[ib]\n        tA = len(A)\n        tB = len(B)\n        denom = np.float64(tA * (tA - 1) * tB * (tB - 1))\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            S = np.zeros(ts.num_nodes)\n            for t1, t2 in zip(ts.trees(tracked_samples=A), ts.trees(tracked_samples=B)):\n                if t1.interval.right <= begin:\n                    continue\n                if t1.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in t1.nodes():\n                    # count number of pairwise paths going through u\n                    nA = t1.num_tracked_samples(u)\n                    nB = t2.num_tracked_samples(u)\n                    # xy|uv - xv|uy with x,y in A, u, v in B\n                    SS[u] += nA * (nA - 1) * (tB - nB) * (tB - nB - 1) + (tA - nA) * (\n                        tA - nA - 1\n                    ) * nB * (nB - 1)\n                    SS[u] -= 2 * nA * nB * (tA - nA) * (tB - nB)\n                S += SS * (min(end, t1.interval.right) - max(begin, t1.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, i] = S / denom\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef f2(ts, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True):\n    \"\"\"\n    Patterson's f2 statistic definitions.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    if indexes is None:\n        indexes = [(0, 1)]\n    method_map = {\"site\": site_f2, \"node\": node_f2, \"branch\": branch_f2}\n    return method_map[mode](\n        ts, sample_sets, indexes=indexes, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass Testf2(StatsTestCase, TwoWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n\n        denom = np.array([n[i] * (n[i] - 1) * n[j] * (n[j] - 1) for i, j in indexes])\n\n        def f(x):\n            numer = np.array(\n                [\n                    x[i] * (x[i] - 1) * (n[j] - x[j]) * (n[j] - x[j] - 1)\n                    - x[i] * (n[i] - x[i]) * (n[j] - x[j]) * x[j]\n                    for i, j in indexes\n                ]\n            )\n            return numer / denom\n\n        self.verify_definition(ts, sample_sets, indexes, windows, f, ts.f2, f2)\n\n\nclass TestBranchf2(Testf2, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodef2(Testf2, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSitef2(Testf2, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# f3\n############################################\n\n\ndef branch_f3(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    # this is f4(A,B;A,C) but drawing distinct samples from A\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (ia, ib, ic) in enumerate(indexes):\n            A = sample_sets[ia]\n            B = sample_sets[ib]\n            C = sample_sets[ic]\n            denom = np.float64(len(A) * (len(A) - 1) * len(B) * len(C))\n            has_trees = False\n            S = 0\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                this_length = min(end, tr.interval.right) - max(begin, tr.interval.left)\n                SS = 0\n                for a in A:\n                    for b in B:\n                        for c in set(A) - {a}:\n                            for d in C:\n                                SS += path_length(tr, tr.mrca(a, c), tr.mrca(b, d))\n                                SS -= path_length(tr, tr.mrca(a, d), tr.mrca(b, c))\n                S += SS * this_length\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef site_f3(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    samples = ts.samples()\n    haps = ts.genotype_matrix(isolated_as_missing=False).T\n    site_positions = ts.tables.sites.position\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (iA, iB, iC) in enumerate(indexes):\n            A = sample_sets[iA]\n            B = sample_sets[iB]\n            C = sample_sets[iC]\n            denom = np.float64(len(A) * (len(A) - 1) * len(B) * len(C))\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for a in A:\n                        a_index = np.where(samples == a)[0][0]\n                        for b in B:\n                            b_index = np.where(samples == b)[0][0]\n                            for c in set(A) - {a}:\n                                c_index = np.where(samples == c)[0][0]\n                                for d in C:\n                                    d_index = np.where(samples == d)[0][0]\n                                    if (\n                                        (haps[a_index][k] == haps[c_index][k])\n                                        and (haps[a_index][k] != haps[d_index][k])\n                                        and (haps[a_index][k] != haps[b_index][k])\n                                    ):\n                                        # ac|bd\n                                        S += 1\n                                    elif (\n                                        (haps[a_index][k] == haps[d_index][k])\n                                        and (haps[a_index][k] != haps[c_index][k])\n                                        and (haps[a_index][k] != haps[b_index][k])\n                                    ):\n                                        # ad|bc\n                                        S -= 1\n            if site_in_window:\n                with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_f3(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    for i, (iA, iB, iC) in enumerate(indexes):\n        A = sample_sets[iA]\n        B = sample_sets[iB]\n        C = sample_sets[iC]\n        tA = len(A)\n        tB = len(B)\n        tC = len(C)\n        denom = np.float64(tA * (tA - 1) * tB * tC)\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            S = np.zeros(ts.num_nodes)\n            for t1, t2, t3 in zip(\n                ts.trees(tracked_samples=A),\n                ts.trees(tracked_samples=B),\n                ts.trees(tracked_samples=C),\n            ):\n                if t1.interval.right <= begin:\n                    continue\n                if t1.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in t1.nodes():\n                    # count number of pairwise paths going through u\n                    nA = t1.num_tracked_samples(u)\n                    nB = t2.num_tracked_samples(u)\n                    nC = t3.num_tracked_samples(u)\n                    # xy|uv - xv|uy with x,y in A, u in B and v in C\n                    SS[u] += (\n                        nA * (nA - 1) * (tB - nB) * (tC - nC)\n                        + (tA - nA) * (tA - nA - 1) * nB * nC\n                    )\n                    SS[u] -= (\n                        nA * nC * (tA - nA) * (tB - nB) + (tA - nA) * (tC - nC) * nA * nB\n                    )\n                S += SS * (min(end, t1.interval.right) - max(begin, t1.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, i] = S / denom\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef f3(ts, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True):\n    \"\"\"\n    Patterson's f3 statistic definitions.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    if indexes is None:\n        indexes = [(0, 1, 2)]\n    method_map = {\"site\": site_f3, \"node\": node_f3, \"branch\": branch_f3}\n    return method_map[mode](\n        ts, sample_sets, indexes=indexes, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass Testf3(StatsTestCase, ThreeWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n        denom = np.array([n[i] * (n[i] - 1) * n[j] * n[k] for i, j, k in indexes])\n\n        def f(x):\n            numer = np.array(\n                [\n                    x[i] * (x[i] - 1) * (n[j] - x[j]) * (n[k] - x[k])\n                    - x[i] * (n[i] - x[i]) * (n[j] - x[j]) * x[k]\n                    for i, j, k in indexes\n                ]\n            )\n            return numer / denom\n\n        self.verify_definition(ts, sample_sets, indexes, windows, f, ts.f3, f3)\n\n\nclass TestBranchf3(Testf3, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodef3(Testf3, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSitef3(Testf3, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# f4\n############################################\n\n\ndef branch_f4(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (iA, iB, iC, iD) in enumerate(indexes):\n            A = sample_sets[iA]\n            B = sample_sets[iB]\n            C = sample_sets[iC]\n            D = sample_sets[iD]\n            denom = np.float64(len(A) * len(B) * len(C) * len(D))\n            has_trees = False\n            S = 0\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                this_length = min(end, tr.interval.right) - max(begin, tr.interval.left)\n                SS = 0\n                for a in A:\n                    for b in B:\n                        for c in C:\n                            for d in D:\n                                with suppress_division_by_zero_warning():\n                                    SS += path_length(tr, tr.mrca(a, c), tr.mrca(b, d))\n                                    SS -= path_length(tr, tr.mrca(a, d), tr.mrca(b, c))\n                S += SS * this_length\n            if has_trees:\n                with suppress_division_by_zero_warning():\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef site_f4(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    samples = ts.samples()\n    haps = ts.genotype_matrix(isolated_as_missing=False).T\n    site_positions = ts.tables.sites.position\n    out = np.zeros((len(windows) - 1, len(indexes)))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i, (iA, iB, iC, iD) in enumerate(indexes):\n            A = sample_sets[iA]\n            B = sample_sets[iB]\n            C = sample_sets[iC]\n            D = sample_sets[iD]\n            denom = np.float64(len(A) * len(B) * len(C) * len(D))\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    for a in A:\n                        a_index = np.where(samples == a)[0][0]\n                        for b in B:\n                            b_index = np.where(samples == b)[0][0]\n                            for c in C:\n                                c_index = np.where(samples == c)[0][0]\n                                for d in D:\n                                    d_index = np.where(samples == d)[0][0]\n                                    if (\n                                        (haps[a_index][k] == haps[c_index][k])\n                                        and (haps[a_index][k] != haps[d_index][k])\n                                        and (haps[a_index][k] != haps[b_index][k])\n                                    ):\n                                        # ac|bd\n                                        S += 1\n                                    elif (\n                                        (haps[a_index][k] == haps[d_index][k])\n                                        and (haps[a_index][k] != haps[c_index][k])\n                                        and (haps[a_index][k] != haps[b_index][k])\n                                    ):\n                                        # ad|bc\n                                        S -= 1\n            if site_in_window:\n                with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                    out[j][i] = S / denom\n                if span_normalise:\n                    out[j][i] /= end - begin\n    return out\n\n\ndef node_f4(ts, sample_sets, indexes, windows=None, span_normalise=True):\n    windows = ts.parse_windows(windows)\n    out = np.zeros((len(windows) - 1, ts.num_nodes, len(indexes)))\n    for i, (iA, iB, iC, iD) in enumerate(indexes):\n        A = sample_sets[iA]\n        B = sample_sets[iB]\n        C = sample_sets[iC]\n        D = sample_sets[iD]\n        tA = len(A)\n        tB = len(B)\n        tC = len(C)\n        tD = len(D)\n        denom = np.float64(tA * tB * tC * tD)\n        for j in range(len(windows) - 1):\n            begin = windows[j]\n            end = windows[j + 1]\n            S = np.zeros(ts.num_nodes)\n            for t1, t2, t3, t4 in zip(\n                ts.trees(tracked_samples=A),\n                ts.trees(tracked_samples=B),\n                ts.trees(tracked_samples=C),\n                ts.trees(tracked_samples=D),\n            ):\n                if t1.interval.right <= begin:\n                    continue\n                if t1.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in t1.nodes():\n                    # count number of pairwise paths going through u\n                    nA = t1.num_tracked_samples(u)\n                    nB = t2.num_tracked_samples(u)\n                    nC = t3.num_tracked_samples(u)\n                    nD = t4.num_tracked_samples(u)\n                    # ac|bd - ad|bc\n                    SS[u] += (\n                        nA * nC * (tB - nB) * (tD - nD) + (tA - nA) * (tC - nC) * nB * nD\n                    )\n                    SS[u] -= (\n                        nA * nD * (tB - nB) * (tC - nC) + (tA - nA) * (tD - nD) * nB * nC\n                    )\n                S += SS * (min(end, t1.interval.right) - max(begin, t1.interval.left))\n            with suppress_division_by_zero_warning():\n                out[j, :, i] = S / denom\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef f4(ts, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True):\n    \"\"\"\n    Patterson's f4 statistic definitions.\n    \"\"\"\n    if indexes is None:\n        indexes = [(0, 1, 2, 3)]\n    method_map = {\"site\": site_f4, \"node\": node_f4, \"branch\": branch_f4}\n    return method_map[mode](\n        ts, sample_sets, indexes=indexes, windows=windows, span_normalise=span_normalise\n    )\n\n\nclass Testf4(StatsTestCase, FourWaySampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_sample_sets_indexes(self, ts, sample_sets, indexes, windows):\n        n = np.array([len(x) for x in sample_sets])\n        denom = np.array([n[i] * n[j] * n[k] * n[l] for i, j, k, l in indexes])\n\n        def f(x):\n            numer = np.array(\n                [\n                    x[i] * x[k] * (n[j] - x[j]) * (n[l] - x[l])\n                    - x[i] * x[l] * (n[j] - x[j]) * (n[k] - x[k])\n                    for i, j, k, l in indexes\n                ]\n            )\n            return numer / denom\n\n        self.verify_definition(ts, sample_sets, indexes, windows, f, ts.f4, f4)\n\n\nclass TestBranchf4(Testf4, TopologyExamplesMixin):\n    mode = \"branch\"\n\n\nclass TestNodef4(Testf4, TopologyExamplesMixin):\n    mode = \"node\"\n\n\nclass TestSitef4(Testf4, MutatedTopologyExamplesMixin):\n    mode = \"site\"\n\n\n############################################\n# Allele frequency spectrum\n############################################\n\n\ndef fold(x, dims):\n    \"\"\"\n    Folds the specified coordinates.\n    \"\"\"\n    x = np.array(x, dtype=int)\n    dims = np.array(dims, dtype=int)\n    k = len(dims)\n    n = np.sum(dims - 1) / 2\n    s = np.sum(x)\n    while s == n and k > 0:\n        k -= 1\n        assert k >= 0\n        n -= (dims[k] - 1) / 2\n        s -= x[k]\n    if s > n:\n        x = dims - 1 - x\n    assert np.all(x >= 0)\n    return tuple(x)\n\n\ndef foldit(A):\n    B = np.zeros(A.shape)\n    dims = A.shape\n    inds = [range(k) for k in dims]\n    for ij in itertools.product(*inds):\n        nij = fold(ij, dims)\n        B[nij] += A[ij]\n    return B\n\n\ndef fold_windowed(X):\n    Y = np.zeros(X.shape)\n    for k in range(X.shape[0]):\n        Y[k] = foldit(X[k])\n    return Y\n\n\nclass TestFold:\n    \"\"\"\n    Tests for the fold operation used in the AFS.\n    \"\"\"\n\n    def test_examples(self):\n        A = np.arange(12)\n        Af = np.array([11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])\n\n        assert np.all(foldit(A) == Af)\n\n        B = A.copy().reshape(3, 4)\n        Bf = np.array(\n            [[11.0, 11.0, 11.0, 0.0], [11.0, 11.0, 0.0, 0.0], [11.0, 0.0, 0.0, 0.0]]\n        )\n        assert np.all(foldit(B) == Bf)\n\n        C = A.copy().reshape(3, 2, 2)\n        Cf = np.array(\n            [\n                [[11.0, 11.0], [11.0, 11.0]],\n                [[11.0, 11.0], [0.0, 0.0]],\n                [[0.0, 0.0], [0.0, 0.0]],\n            ]\n        )\n        assert np.all(foldit(C) == Cf)\n\n        D = np.arange(9).reshape((3, 3))\n        Df = np.array([[8.0, 8.0, 8.0], [8.0, 4.0, 0.0], [0.0, 0.0, 0.0]])\n        assert np.all(foldit(D) == Df)\n\n        E = np.arange(9)\n        Ef = np.array([8.0, 8.0, 8.0, 8.0, 4.0, 0.0, 0.0, 0.0, 0.0])\n        assert np.all(foldit(E) == Ef)\n\n    def test_branch_folded(self, ts_ancestry_10_fixture):\n        ts = ts_ancestry_10_fixture\n        folded = ts.allele_frequency_spectrum(\n            windows=[0, 5, 8, 9, 10], mode=\"branch\", polarised=False\n        )\n        unfolded = ts.allele_frequency_spectrum(\n            windows=[0, 5, 8, 9, 10], mode=\"branch\", polarised=True\n        )\n        assert np.allclose(fold_windowed(unfolded), folded)\n\n    def test_site_folded(self, ts_ancestry_10_fixture):\n        ts = ts_ancestry_10_fixture\n        ts = msprime.sim_mutations(ts, rate=1, random_seed=1, discrete_genome=False)\n        for s in ts.sites():\n            assert len(s.mutations) == 1\n        folded = ts.allele_frequency_spectrum(\n            windows=[0, 5, 8, 9, 10], mode=\"site\", polarised=False, span_normalise=False\n        )\n        unfolded = ts.allele_frequency_spectrum(\n            windows=[0, 5, 8, 9, 10], mode=\"site\", polarised=True, span_normalise=False\n        )\n        assert np.allclose(fold_windowed(unfolded), folded)\n\n\ndef naive_site_allele_frequency_spectrum(\n    ts,\n    sample_sets,\n    windows=None,\n    time_windows=None,\n    polarised=False,\n    span_normalise=True,\n):\n    \"\"\"\n    The joint allele frequency spectrum for sites.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    num_windows = len(windows) - 1\n    out_dim = [1 + len(sample_set) for sample_set in sample_sets]\n    out = np.zeros([num_windows] + out_dim)\n    G = ts.genotype_matrix(isolated_as_missing=False)\n    samples = ts.samples()\n    # Indexes of the samples within the sample sets into the samples array.\n    sample_set_indexes = [\n        np.array([np.where(x == samples)[0][0] for x in sample_set])\n        for sample_set in sample_sets\n    ]\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for site in ts.sites():\n            S = np.zeros(out_dim)\n            if begin <= site.position < end:\n                g = G[site.id]\n                alleles = np.unique(g)\n\n                # Any site monomorphic across all samples does not contribute\n                if len(alleles) == 1:\n                    continue\n\n                # For each allele, count the number present in each sample set.\n                count = {\n                    allele: np.zeros(len(sample_sets), dtype=int) for allele in alleles\n                }\n                for k, sample_set in enumerate(sample_set_indexes):\n                    allele_counts = zip(*np.unique(g[sample_set], return_counts=True))\n                    for allele, c in allele_counts:\n                        count[allele][k] = c\n                increment = 0.5\n                if polarised:\n                    increment = 1\n                    # Remove the contribution of the ancestral state\n                    if 0 in count:\n                        del count[0]\n                for allele_count in count.values():\n                    x = tuple(allele_count)\n                    if not polarised:\n                        x = fold(x, out_dim)\n                    S[x] += increment\n            if span_normalise:\n                S /= end - begin\n            out[j, :] += S\n    return out\n\n\ndef naive_branch_allele_frequency_spectrum(\n    ts,\n    sample_sets,\n    windows=None,\n    time_windows=None,\n    polarised=False,\n    span_normalise=True,\n):\n    \"\"\"\n    The joint allele frequency spectrum for branches.\n    \"\"\"\n    drop_windows = windows is None\n    if windows is None:\n        windows = [0.0, ts.sequence_length]\n    else:\n        if windows[0] != 0:\n            windows = [0] + windows\n    drop_time_windows = time_windows is None\n    if time_windows is None:\n        time_windows = [0.0, np.inf]\n    windows = ts.parse_windows(windows)\n    num_windows = len(windows) - 1\n    num_time_windows = len(time_windows) - 1\n    out_dim = [1 + len(sample_set) for sample_set in sample_sets]\n    out = np.zeros([num_windows] + [num_time_windows] + out_dim)\n    for j in range(num_windows):\n        begin = windows[j]\n        end = windows[j + 1]\n        for k, upper_time in enumerate(time_windows[1:]):\n            S = np.zeros(out_dim)\n            if np.isfinite(upper_time):\n                decap_ts = ts.decapitate(upper_time)\n            else:\n                decap_ts = ts\n            assert np.all(list(ts.samples()) == list(decap_ts.samples()))\n            trees = [\n                next(decap_ts.trees(tracked_samples=sample_set))\n                for sample_set in sample_sets\n            ]\n            t = trees[0]\n            while True:\n                tr_len = min(end, t.interval.right) - max(begin, t.interval.left)\n                if tr_len > 0:\n                    for node in t.nodes():\n                        if 0 < t.num_samples(node) < decap_ts.num_samples:\n                            x = [tree.num_tracked_samples(node) for tree in trees]\n                            if not polarised:\n                                x = fold(x, out_dim)\n                            # Note x must be a tuple for indexing to work\n                            S[tuple(x)] += t.branch_length(node) * tr_len\n\n                # Advance the trees\n                more = [tree.next() for tree in trees]\n                assert len(set(more)) == 1\n                if not more[0]:\n                    break\n            if span_normalise:\n                S /= end - begin\n            out[j, k, :] = S - sum(out[j, 0:k, :])\n    print(out.shape)\n    if drop_time_windows:\n        assert out.ndim == 2 + len(out_dim)\n        out = out[:, 0]\n    elif drop_windows:\n        assert out.shape[0] == 1\n        out = out[0]\n    return out\n\n\ndef naive_allele_frequency_spectrum(\n    ts,\n    sample_sets,\n    windows=None,\n    time_windows=None,\n    polarised=False,\n    mode=\"site\",\n    span_normalise=True,\n):\n    \"\"\"\n    Naive definition of the generalised site frequency spectrum.\n    \"\"\"\n    method_map = {\n        \"site\": naive_site_allele_frequency_spectrum,\n        \"branch\": naive_branch_allele_frequency_spectrum,\n    }\n    return method_map[mode](\n        ts,\n        sample_sets,\n        windows=windows,\n        time_windows=time_windows,\n        polarised=polarised,\n        span_normalise=span_normalise,\n    )\n\n\ndef branch_allele_frequency_spectrum(\n    ts, sample_sets, windows, time_windows=None, polarised=False, span_normalise=True\n):\n    \"\"\"\n    Efficient implementation of the algorithm used as the basis for the\n    underlying C version.\n    \"\"\"\n    num_sample_sets = len(sample_sets)\n    drop_windows = windows is None\n    windows = ts.parse_windows(windows)\n    drop_time_windows = time_windows is None\n    if time_windows is None:\n        time_windows = [0.0, np.inf]\n    num_windows = windows.shape[0] - 1\n    num_time_windows = len(time_windows) - 1\n    out_dim = [1 + len(sample_set) for sample_set in sample_sets]\n    time = ts.tables.nodes.time\n\n    result = np.zeros([num_windows] + [num_time_windows] + out_dim)\n    # Number of nodes in sample_set j ancestral to each node u.\n    count = np.zeros((ts.num_nodes, num_sample_sets + 1), dtype=np.uint32)\n    for j in range(num_sample_sets):\n        count[sample_sets[j], j] = 1\n    # The last column counts across all samples\n    count[ts.samples(), -1] = 1\n    # contains the location of the last time we updated the output for a node.\n    last_update = np.zeros(ts.num_nodes)\n    window_index = 0\n    parent = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n    # branch_length = np.zeros(ts.num_nodes)\n    tree_index = 0\n\n    def update_result(window_index, u, right):\n        if parent[u] != -1:\n            t_v = time[parent[u]]\n            if 0 < count[u, -1] < ts.num_samples:\n                time_window_index = 0\n                while (\n                    time_window_index < num_time_windows\n                    and time_windows[time_window_index] < t_v\n                ):\n                    assert parent[u] != -1\n                    tw_branch_length = max(\n                        0.0,\n                        min(time_windows[time_window_index + 1], t_v)\n                        - max(time_windows[time_window_index], time[u]),\n                    )\n                    x = (right - last_update[u]) * tw_branch_length\n                    c = count[u, :num_sample_sets]\n                    if not polarised:\n                        c = fold(c, out_dim)\n                    index = tuple([window_index] + [time_window_index] + list(c))\n                    result[index] += x\n                    time_window_index += 1\n        last_update[u] = right\n\n    for (t_left, t_right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            u = edge.child\n            v = edge.parent\n            update_result(window_index, u, t_left)\n            while v != -1:\n                update_result(window_index, v, t_left)\n                count[v] -= count[u]\n                v = parent[v]\n            parent[u] = -1\n\n        for edge in edges_in:\n            u = edge.child\n            v = edge.parent\n            while v != -1:\n                update_result(window_index, v, t_left)\n                count[v] += count[u]\n                v = parent[v]\n            parent[u] = edge.parent\n\n        # Update the windows\n        while window_index < num_windows and windows[window_index + 1] <= t_right:\n            w_right = windows[window_index + 1]\n            # This seems like a bad idea as we incur a O(N) cost for each window,\n            # where N is the number of nodes.  It might be hard to do much better\n            # though, since we can't help but incur O(|sample_set|) cost at each window\n            # which we'll assume is O(n), and for large n, N isn't much larger than n.\n            # For K > 1 dimensions, the cost of the scan through the nodes is much\n            # less than the O(n^K) required to copy (if n is large and K is small).\n            # We could keep track of the roots and do a tree traversal, bringing this\n            # down to O(n), but this adds a lot of complexity and memory and I'm\n            # fairly confident would be slower overall. We could keep a set of\n            # non-zero branches, but this would add a O(log n) cost to each edge\n            # insertion and removal and a lot of complexity to the C implementation.\n            for u in range(ts.num_nodes):\n                update_result(window_index, u, w_right)\n            window_index += 1\n        tree_index += 1\n\n    assert window_index == windows.shape[0] - 1\n    if span_normalise:\n        for j in range(num_windows):\n            result[j] /= windows[j + 1] - windows[j]\n\n    if drop_time_windows:\n        assert result.ndim == 2 + len(out_dim)\n        assert result.shape[1] == 1\n        result = result[:, 0]\n    elif drop_windows:\n        assert result.shape[0] == 1\n        result = result[0]\n    return result\n\n\ndef site_allele_frequency_spectrum(\n    ts, sample_sets, windows, time_windows=None, polarised=False, span_normalise=True\n):\n    \"\"\"\n    Efficient implementation of the algorithm used as the basis for the\n    underlying C version.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    assert time_windows is None\n    num_windows = windows.shape[0] - 1\n    out_dim = [1 + len(sample_set) for sample_set in sample_sets]\n\n    result = np.zeros([num_windows] + out_dim)\n    # Add an extra sample set to count across all samples\n    sample_sets = list(sample_sets) + [ts.samples()]\n    # Number of nodes in sample_set j ancestral to each node u.\n    count = np.zeros((ts.num_nodes, len(sample_sets)), dtype=np.uint32)\n    for j in range(len(sample_sets)):\n        count[sample_sets[j], j] = 1\n\n    site_index = 0\n    mutation_index = 0\n    window_index = 0\n    sites = ts.tables.sites\n    mutations = ts.tables.mutations\n    parent = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n    for (t_left, t_right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            u = edge.child\n            v = edge.parent\n            while v != -1:\n                count[v] -= count[u]\n                v = parent[v]\n            parent[u] = -1\n\n        for edge in edges_in:\n            u = edge.child\n            v = edge.parent\n            parent[u] = v\n            while v != -1:\n                count[v] += count[u]\n                v = parent[v]\n\n        while site_index < len(sites) and sites.position[site_index] < t_right:\n            assert t_left <= sites.position[site_index]\n            ancestral_state = sites[site_index].ancestral_state\n            allele_count = collections.defaultdict(\n                functools.partial(np.zeros, len(sample_sets), dtype=int)\n            )\n            allele_count[ancestral_state][:] = [\n                len(sample_set) for sample_set in sample_sets\n            ]\n            while (\n                mutation_index < len(mutations)\n                and mutations[mutation_index].site == site_index\n            ):\n                mutation = mutations[mutation_index]\n                allele_count[mutation.derived_state] += count[mutation.node]\n                if mutation.parent != -1:\n                    parent_allele = mutations[mutation.parent].derived_state\n                    allele_count[parent_allele] -= count[mutation.node]\n                else:\n                    allele_count[ancestral_state] -= count[mutation.node]\n                mutation_index += 1\n\n            pos = sites.position[site_index]\n            while windows[window_index + 1] <= pos:\n                window_index += 1\n            assert windows[window_index] <= pos < windows[window_index + 1]\n            site_result = result[window_index]\n\n            for allele, c in dict(allele_count).items():\n                # Any allele monomorphic across all samples does not\n                # contribute to the AFS\n                if 0 == c[-1] or c[-1] == ts.num_samples:\n                    del allele_count[allele]\n            if polarised and ancestral_state in allele_count:\n                del allele_count[ancestral_state]\n\n            increment = 1 if polarised else 0.5\n            for _allele, c in allele_count.items():\n                x = tuple(c[:-1])\n                if not polarised:\n                    x = fold(x, out_dim)\n                site_result[x] += increment\n            site_index += 1\n\n    if span_normalise:\n        for j in range(num_windows):\n            span = windows[j + 1] - windows[j]\n            result[j] /= span\n    return result\n\n\ndef allele_frequency_spectrum(\n    ts,\n    sample_sets,\n    windows=None,\n    time_windows=None,\n    polarised=False,\n    mode=\"site\",\n    span_normalise=True,\n):\n    \"\"\"\n    Generalised site frequency spectrum.\n    \"\"\"\n    method_map = {\n        \"site\": site_allele_frequency_spectrum,\n        \"branch\": branch_allele_frequency_spectrum,\n    }\n    return method_map[mode](\n        ts,\n        sample_sets,\n        windows=windows,\n        time_windows=time_windows,\n        polarised=polarised,\n        span_normalise=span_normalise,\n    )\n\n\nclass TestAlleleFrequencySpectrum(StatsTestCase, SampleSetStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def verify_unwrapped_sample_set(self, ts, sample_set):\n        # check that if we pass sample sets in like [0,1,2,3]\n        # this is equivalent to [[0,1,2,3]]\n        assert len(sample_set) == 1\n        afs1 = ts.allele_frequency_spectrum(sample_set[0])\n        afs2 = ts.allele_frequency_spectrum(sample_set)\n        self.assertArrayEqual(afs1, afs2)\n\n    def verify_single_sample_set(self, ts):\n        L = ts.sequence_length\n        samples = ts.samples()\n        self.verify_unwrapped_sample_set(ts, [samples[:3]])\n        a1 = ts.allele_frequency_spectrum(mode=self.mode)\n        a2 = ts.allele_frequency_spectrum([samples], mode=self.mode)\n        self.assertArrayEqual(a1, a2)\n        for windows in [None, (0, L), (0, L / 2, L)]:\n            a1 = ts.allele_frequency_spectrum(mode=self.mode, windows=windows)\n            a2 = ts.allele_frequency_spectrum([samples], mode=self.mode, windows=windows)\n            self.assertArrayEqual(a1, a2)\n        for polarised in [True, False]:\n            a1 = ts.allele_frequency_spectrum(mode=self.mode, polarised=polarised)\n            a2 = ts.allele_frequency_spectrum(\n                [samples], mode=self.mode, polarised=polarised\n            )\n            self.assertArrayEqual(a1, a2)\n        for span_normalise in [True, False]:\n            a1 = ts.allele_frequency_spectrum(\n                mode=self.mode, span_normalise=span_normalise\n            )\n            a2 = ts.allele_frequency_spectrum(\n                [samples], mode=self.mode, span_normalise=span_normalise\n            )\n            self.assertArrayEqual(a1, a2)\n\n    def verify_sample_sets(self, ts, sample_sets, windows, time_windows=None):\n        # print(ts.genotype_matrix())\n        # print(ts.draw_text())\n        # print(\"sample_sets = \", sample_sets)\n        windows = ts.parse_windows(windows)\n        for span_normalise, polarised in itertools.product([True, False], [True, False]):\n            try:\n                _ = [len(x) for x in sample_sets]\n            except TypeError:\n                S = [sample_sets]\n            else:\n                S = sample_sets\n            sfs1 = naive_allele_frequency_spectrum(\n                ts,\n                S,\n                windows,\n                time_windows,\n                mode=self.mode,\n                polarised=polarised,\n                span_normalise=span_normalise,\n            )\n            sfs2 = allele_frequency_spectrum(\n                ts,\n                S,\n                windows,\n                time_windows,\n                mode=self.mode,\n                polarised=polarised,\n                span_normalise=span_normalise,\n            )\n            sfs3 = ts.allele_frequency_spectrum(\n                sample_sets,\n                windows,\n                time_windows,\n                mode=self.mode,\n                polarised=polarised,\n                span_normalise=span_normalise,\n            )\n            assert sfs1.shape[0] == len(windows) - 1\n            has_tw = time_windows is not None\n            assert len(sfs1.shape) == len(S) + 1 + has_tw\n            for j, sample_set in enumerate(S):\n                n = 1 + len(sample_set)\n                assert sfs1.shape[j + 1 + has_tw] == n\n\n            assert sfs1.shape == sfs2.shape\n            assert sfs1.shape == sfs3.shape\n            if not np.allclose(sfs1, sfs3):\n                print()\n                print(\"sample sets\", sample_sets)\n                print(\"simple\", sfs1)\n                print(\"effic \", sfs2)\n                print(\"ts    \", sfs3)\n            self.assertArrayAlmostEqual(sfs1, sfs2)\n            self.assertArrayAlmostEqual(sfs1, sfs3)\n            assert np.all(sfs3 >= 0)\n\n\nclass TestBranchAlleleFrequencySpectrum(\n    TestAlleleFrequencySpectrum, TopologyExamplesMixin\n):\n    mode = \"branch\"\n\n    def generate_params(self, ts):\n        s = list(ts.samples())\n        S = [s[:2], s[2:]]\n        yield (s, None, None)\n        yield ([s], None, None)\n        yield ([s[:2], s[2:]], None, None)\n        yield (S, [0, ts.sequence_length], None)\n        yield (S, None, [0, math.inf])\n        yield (S, None, [0, 10, 100])\n        yield (S, [0, ts.sequence_length / 4, ts.sequence_length], [0, 10, math.inf])\n\n    def verify(self, ts):\n        for s, w, t in self.generate_params(ts):\n            self.verify_sample_sets(ts, s, windows=w, time_windows=t)\n\n    def test_simple_example(self):\n        ts = msprime.simulate(6, recombination_rate=0.1, random_seed=1)\n        self.verify_single_sample_set(ts)\n\n        self.verify_sample_sets(ts, [range(6)], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1], [2, 3]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1, 2, 3, 4, 5]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1, 2], [3, 4, 5]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1], [2, 3], [4, 5]], [0, 1])\n\n\nclass TestSiteAlleleFrequencySpectrum(\n    TestAlleleFrequencySpectrum, MutatedTopologyExamplesMixin\n):\n    mode = \"site\"\n\n    def test_simple_example(self):\n        ts = msprime.simulate(6, mutation_rate=0.2, random_seed=1)\n        self.verify_single_sample_set(ts)\n\n        self.verify_sample_sets(ts, [[0]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1, 2, 3, 4, 5]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1, 2], [3, 4, 5]], [0, 1])\n        self.verify_sample_sets(ts, [[0, 1], [2, 3], [4, 5]], [0, 1])\n\n\nclass TestBranchAlleleFrequencySpectrumProperties(StatsTestCase, TopologyExamplesMixin):\n    def verify(self, ts):\n        # If we split by tree, the sum of the AFS should be equal to the\n        # tree total branch length in each window\n        windows = ts.breakpoints(as_array=True)\n        S = ts.samples()\n        examples = [\n            [S],\n            [S[:1]],\n            [S[:-1]],\n            [S[:1], S[1:]],\n            [S[:1], S[:-1]],\n        ]\n        if len(S) > 2:\n            examples += [[S[:1], S[2:], S[:3]]]\n        # This is the same definition that we use for segregating_sites\n        tbl = [\n            sum(\n                tree.branch_length(u)\n                for u in tree.nodes()\n                if 0 < tree.num_samples(u) < ts.num_samples\n            )\n            for tree in ts.trees()\n        ]\n        for polarised in [True, False]:\n            for sample_sets in examples:\n                afs = ts.allele_frequency_spectrum(\n                    sample_sets,\n                    windows=windows,\n                    mode=\"branch\",\n                    polarised=polarised,\n                    span_normalise=True,\n                )\n                afs_sum = [np.sum(window) for window in afs]\n                self.assertArrayAlmostEqual(afs_sum, tbl)\n\n\n############################################\n# End of specific stats tests.\n############################################\n\n\nclass TestWindowedTreeStat(StatsTestCase):\n    \"\"\"\n    Tests that the treewise windowing function defined here has the correct\n    behaviour.\n    \"\"\"\n\n    # TODO add more tests here covering the various windowing possibilities.\n    def get_tree_sequence(self):\n        ts = msprime.simulate(10, recombination_rate=2, random_seed=1)\n        assert ts.num_trees > 3\n        return ts\n\n    def test_all_trees(self):\n        ts = self.get_tree_sequence()\n        A1 = np.ones((ts.num_trees, 1))\n        windows = np.array(list(ts.breakpoints()))\n        A2 = windowed_tree_stat(ts, A1, windows)\n        # print(\"breakpoints = \", windows)\n        # print(A2)\n        assert A1.shape == A2.shape\n        # JK: I don't understand what we're computing here, this normalisation\n        # seems pretty weird.\n        # for tree in ts.trees():\n        #     self.assertAlmostEqual(A2[tree.index, 0], tree.span / ts.sequence_length)\n\n    def test_single_interval(self):\n        ts = self.get_tree_sequence()\n        A1 = np.ones((ts.num_trees, 1))\n        windows = np.array([0, ts.sequence_length])\n        A2 = windowed_tree_stat(ts, A1, windows)\n        assert A2.shape == (1, 1)\n        # TODO: Test output\n\n\nclass TestSampleSets(StatsTestCase):\n    \"\"\"\n    Tests that passing sample sets in various ways gets interpreted correctly.\n    \"\"\"\n\n    def get_example_ts(self, ts):\n        assert ts.num_mutations > 0\n        return ts\n\n    def test_duplicate_samples(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        for bad_set in [[1, 1], [1, 2, 1], list(range(10)) + [9]]:\n            with pytest.raises(exceptions.LibraryError):\n                ts.diversity([bad_set])\n            with pytest.raises(exceptions.LibraryError):\n                ts.divergence([[0, 1], bad_set])\n            with pytest.raises(ValueError):\n                ts.sample_count_stat([bad_set], self.identity_f(ts), 1)\n\n    def test_empty_sample_set(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        with pytest.raises(ValueError):\n            ts.diversity([[]])\n        for bad_sample_sets in [[[], []], [[1], []], [[1, 2], [1], []]]:\n            with pytest.raises(ValueError):\n                ts.diversity(bad_sample_sets)\n            with pytest.raises(ValueError):\n                ts.divergence(bad_sample_sets)\n            with pytest.raises(ValueError):\n                ts.sample_count_stat(bad_sample_sets, self.identity_f(ts), 1)\n\n    def test_non_samples(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        with pytest.raises(exceptions.LibraryError):\n            ts.diversity([[ts.num_samples]])\n\n        with pytest.raises(exceptions.LibraryError):\n            ts.divergence([[ts.num_samples], [1, 2]])\n\n        with pytest.raises(ValueError):\n            ts.sample_count_stat([[ts.num_samples]], self.identity_f(ts), 1)\n\n    def test_span_normalise(self, ts_10_mut_recomb_fixture):\n        np.random.seed(92)\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        sample_sets = [[0, 1], [2, 3, 4], [5, 6]]\n        windows = ts.sequence_length * np.random.uniform(size=10)\n        windows.sort()\n        windows[0] = 0.0\n        windows[-1] = ts.sequence_length\n        n = np.array([len(u) for u in sample_sets])\n\n        def f(x):\n            return x * (x < n)\n\n        # Determine output_dim of the function\n        for mode in (\"site\", \"branch\", \"node\"):\n            sigma1 = ts.sample_count_stat(sample_sets, f, 3, windows=windows, mode=mode)\n            sigma2 = ts.sample_count_stat(\n                sample_sets, f, 3, windows=windows, mode=mode, span_normalise=True\n            )\n            sigma3 = ts.sample_count_stat(\n                sample_sets, f, 3, windows=windows, mode=mode, span_normalise=False\n            )\n            denom = np.diff(windows)[:, np.newaxis]\n            if mode == \"node\":\n                denom = np.diff(windows)[:, np.newaxis, np.newaxis]\n\n            assert sigma1.shape == sigma2.shape\n            assert sigma1.shape == sigma3.shape\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n            self.assertArrayAlmostEqual(sigma1, sigma3 / denom)\n\n\nclass TestSampleSetIndexes(StatsTestCase):\n    \"\"\"\n    Tests that we get the correct behaviour from the indexes argument to\n    k-way stats functions.\n    \"\"\"\n\n    def get_example_ts(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        assert ts.num_mutations > 0\n        return ts\n\n    def test_2_way_default(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 2)\n        S1 = ts.divergence(sample_sets)\n        S2 = divergence(ts, sample_sets)[0, 0]\n        S3 = ts.divergence(sample_sets, [0, 1])\n        assert S1.shape == S2.shape\n        self.assertArrayAlmostEqual(S1, S2)\n        self.assertArrayAlmostEqual(S1, S3)\n        sample_sets = np.array_split(ts.samples(), 3)\n        with pytest.raises(ValueError):\n            _ = ts.divergence(sample_sets)\n        with pytest.raises(ValueError):\n            _ = ts.divergence([sample_sets[0]])\n\n    def test_3_way_default(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 3)\n        S1 = ts.f3(sample_sets)\n        S2 = f3(ts, sample_sets)[0, 0]\n        S3 = ts.f3(sample_sets, [0, 1, 2])\n        assert S1.shape == S2.shape\n        self.assertArrayAlmostEqual(S1, S2)\n        self.assertArrayAlmostEqual(S1, S3)\n        sample_sets = np.array_split(ts.samples(), 4)\n        with pytest.raises(ValueError):\n            _ = ts.f3(sample_sets)\n\n    def test_4_way_default(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 4)\n        S1 = ts.f4(sample_sets)\n        S2 = f4(ts, sample_sets)\n        S3 = ts.f4(sample_sets, [0, 1, 2, 3])\n        assert S1.shape == S3.shape\n        self.assertArrayAlmostEqual(S1, S2)\n        self.assertArrayAlmostEqual(S1, S3)\n        sample_sets = np.array_split(ts.samples(), 5)\n        with pytest.raises(ValueError):\n            _ = ts.f4(sample_sets)\n\n    def test_2_way_combinations(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 4)\n        pairs = list(itertools.combinations(range(4), 2))\n        for k in range(1, len(pairs)):\n            S1 = ts.divergence(sample_sets, pairs[:k])\n            S2 = divergence(ts, sample_sets, pairs[:k])[0]\n            assert S1.shape[-1] == k\n            assert S1.shape == S2.shape\n            self.assertArrayAlmostEqual(S1, S2)\n\n    def test_3_way_combinations(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 5)\n        triples = list(itertools.combinations(range(5), 3))\n        for k in range(1, len(triples)):\n            S1 = ts.Y3(sample_sets, triples[:k])\n            S2 = Y3(ts, sample_sets, triples[:k])[0]\n            assert S1.shape[-1] == k\n            assert S1.shape == S2.shape\n            self.assertArrayAlmostEqual(S1, S2)\n\n    def test_4_way_combinations(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 5)\n        quads = list(itertools.combinations(range(5), 4))\n        for k in range(1, len(quads)):\n            S1 = ts.f4(sample_sets, quads[:k], windows=[0, ts.sequence_length])\n            S2 = f4(ts, sample_sets, quads[:k])\n            assert S1.shape[-1] == k\n            assert S2.shape == S2.shape\n            self.assertArrayAlmostEqual(S1, S2)\n\n    def test_errors(self, ts_10_mut_fixture):\n        ts = self.get_example_ts(ts_10_mut_fixture)\n        sample_sets = np.array_split(ts.samples(), 2)\n        with pytest.raises(ValueError):\n            ts.divergence(sample_sets, indexes=[])\n        with pytest.raises(ValueError):\n            ts.divergence(sample_sets, indexes=[(1, 1, 1)])\n        with pytest.raises(exceptions.LibraryError):\n            ts.divergence(sample_sets, indexes=[(1, 2)])\n\n\nclass TestGeneralStatInterface(StatsTestCase):\n    \"\"\"\n    Tests for the basic interface for general_stats.\n    \"\"\"\n\n    def get_tree_sequence(self):\n        ts = msprime.simulate(10, recombination_rate=2, mutation_rate=2, random_seed=1)\n        return ts\n\n    def test_function_cannot_update_state(self):\n        ts = self.get_tree_sequence()\n\n        def f(x):\n            out = x.copy()\n            x[:] = 0.0\n            return out\n\n        def g(x):\n            return x\n\n        x = ts.sample_count_stat(\n            [ts.samples()],\n            f,\n            output_dim=1,\n            strict=False,\n            mode=\"node\",\n            span_normalise=False,\n        )\n        y = ts.sample_count_stat(\n            [ts.samples()],\n            g,\n            output_dim=1,\n            strict=False,\n            mode=\"node\",\n            span_normalise=False,\n        )\n        self.assertArrayEqual(x, y)\n\n    def test_default_mode(self, ts_10_recomb_fixture):\n        ts = ts_10_recomb_fixture\n        W = np.ones((ts.num_samples, 2))\n        sigma1 = ts.general_stat(W, self.identity_f(ts), W.shape[1])\n        sigma2 = ts.general_stat(W, self.identity_f(ts), W.shape[1], mode=\"site\")\n        self.assertArrayEqual(sigma1, sigma2)\n\n    def test_bad_mode(self, ts_10_recomb_fixture):\n        ts = ts_10_recomb_fixture\n        W = np.ones((ts.num_samples, 2))\n        for bad_mode in [\"\", \"MODE\", \"x\" * 8192]:\n            with pytest.raises(ValueError):\n                ts.general_stat(W, self.identity_f(ts), W.shape[1], mode=bad_mode)\n\n    def test_bad_window_strings(self):\n        ts = self.get_tree_sequence()\n        with pytest.raises(ValueError):\n            ts.diversity([ts.samples()], mode=\"site\", windows=\"abc\")\n        with pytest.raises(ValueError):\n            ts.diversity([ts.samples()], mode=\"site\", windows=\"\")\n        with pytest.raises(ValueError):\n            ts.diversity([ts.samples()], mode=\"tree\", windows=\"abc\")\n\n    def test_bad_summary_function(self):\n        ts = self.get_tree_sequence()\n        W = np.ones((ts.num_samples, 3))\n        with pytest.raises(ValueError):\n            ts.general_stat(W, lambda x: x, 3, windows=\"sites\")\n        with pytest.raises(ValueError):\n            ts.general_stat(W, lambda x: np.array([1.0]), 1, windows=\"sites\")\n\n    def test_nonnumpy_summary_function(self):\n        ts = self.get_tree_sequence()\n        W = np.ones((ts.num_samples, 3))\n        sigma1 = ts.general_stat(W, lambda x: [0.0], 1)\n        sigma2 = ts.general_stat(W, lambda x: np.array([0.0]), 1)\n        self.assertArrayEqual(sigma1, sigma2)\n\n\nclass TestGeneralBranchStats(StatsTestCase):\n    \"\"\"\n    Tests for general branch stats (using functions and arbitrary weights)\n    \"\"\"\n\n    def compare_general_stat(self, ts, W, f, windows=None, polarised=False):\n        # Determine output_dim of the function\n        M = len(f(W[0]))\n        sigma1 = naive_branch_general_stat(ts, W, f, windows, polarised=polarised)\n        sigma2 = ts.general_stat(W, f, M, windows, polarised=polarised, mode=\"branch\")\n        sigma3 = branch_general_stat(ts, W, f, windows, polarised=polarised)\n        assert sigma1.shape == sigma2.shape\n        assert sigma1.shape == sigma3.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3)\n        return sigma1\n\n    def test_simple_identity_f_w_zeros(self, ts_12_highrecomb_fixture):\n        ts = ts_12_highrecomb_fixture\n        W = np.zeros((ts.num_samples, 3))\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.identity_f(ts), windows=\"trees\", polarised=polarised\n            )\n            assert sigma.shape == (ts.num_trees, W.shape[1])\n            assert np.all(sigma == 0)\n\n    def test_simple_identity_f_w_ones(self, ts_10_recomb_fixture):\n        ts = ts_10_recomb_fixture\n        W = np.ones((ts.num_samples, 2))\n        sigma = self.compare_general_stat(\n            ts, W, self.identity_f(ts), windows=\"trees\", polarised=True\n        )\n        assert sigma.shape == (ts.num_trees, W.shape[1])\n        # A W of 1 for every node and identity f counts the samples in the subtree\n        # if polarised is True.\n        for tree in ts.trees():\n            s = sum(tree.num_samples(u) * tree.branch_length(u) for u in tree.nodes())\n            assert np.allclose(sigma[tree.index], s)\n\n    def test_simple_cumsum_f_w_ones(self):\n        ts = msprime.simulate(13, recombination_rate=1, random_seed=2)\n        W = np.ones((ts.num_samples, 8))\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.cumsum_f(ts), windows=\"trees\", polarised=polarised\n            )\n            assert sigma.shape == (ts.num_trees, W.shape[1])\n\n    def test_simple_cumsum_f_w_ones_many_windows(self):\n        ts = msprime.simulate(15, recombination_rate=3, random_seed=3)\n        assert ts.num_trees > 3\n        windows = np.linspace(0, ts.sequence_length, num=ts.num_trees * 10)\n        W = np.ones((ts.num_samples, 3))\n        sigma = self.compare_general_stat(ts, W, self.cumsum_f(ts), windows=windows)\n        assert sigma.shape == (windows.shape[0] - 1, W.shape[1])\n\n    def test_windows_equal_to_ts_breakpoints(self):\n        ts = msprime.simulate(14, recombination_rate=1, random_seed=2)\n        W = np.ones((ts.num_samples, 1))\n        for polarised in [True, False]:\n            sigma_no_windows = self.compare_general_stat(\n                ts, W, self.cumsum_f(ts), windows=\"trees\", polarised=polarised\n            )\n            assert sigma_no_windows.shape == (ts.num_trees, W.shape[1])\n            sigma_windows = self.compare_general_stat(\n                ts,\n                W,\n                self.cumsum_f(ts),\n                windows=ts.breakpoints(as_array=True),\n                polarised=polarised,\n            )\n            assert sigma_windows.shape == sigma_no_windows.shape\n            assert np.allclose(sigma_windows.shape, sigma_no_windows.shape)\n\n    def test_single_tree_windows(self):\n        ts = msprime.simulate(15, random_seed=2, length=100)\n        W = np.ones((ts.num_samples, 2))\n        f = self.sum_f(ts)\n        # for num_windows in range(1, 10):\n        for num_windows in [2]:\n            windows = np.linspace(0, ts.sequence_length, num=num_windows + 1)\n            sigma = self.compare_general_stat(ts, W, f, windows)\n            assert sigma.shape == (num_windows, 1)\n\n    def test_simple_identity_f_w_zeros_windows(self, ts_12_highrecomb_fixture):\n        ts = ts_12_highrecomb_fixture\n        W = np.zeros((ts.num_samples, 3))\n        f = self.identity_f(ts)\n        windows = np.linspace(0, ts.sequence_length, num=11)\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(ts, W, f, windows, polarised=polarised)\n            assert sigma.shape == (10, W.shape[1])\n            assert np.all(sigma == 0)\n\n    def test_nonstrict_nonancestral_material(self):\n        # 0 is a sample, 1 is not\n        #\n        # 2.00┊  2  ┊ 2 ┊  2  ┊\n        #     ┊ ┏┻┓ ┊ ┃ ┊ ┏┻┓ ┊\n        # 1.00┊ ┃ 1 ┊ 1 ┊ ┃ 1 ┊\n        #     ┊ ┃   ┊ ┃ ┊ ┃   ┊\n        # 0.00┊ 0   ┊ 0 ┊ 0   ┊\n        #     0     1   2     3\n\n        tables = tskit.TableCollection(sequence_length=3)\n\n        node_times = [0, 1, 2]\n        samples = [0]\n        for n, t in enumerate(node_times):\n            tables.nodes.add_row(\n                time=t, flags=tskit.NODE_IS_SAMPLE if n in samples else 0\n            )\n\n        # p, c, l, r\n        edges = [\n            (1, 0, 1, 2),\n            (2, 0, 0, 1),\n            (2, 0, 2, 3),\n            (2, 1, 0, 3),\n        ]\n        for p, c, l, r in edges:\n            tables.edges.add_row(parent=p, child=c, left=l, right=r)\n\n        # this makes it so 'site' mode counts branches\n        for x in range(int(tables.sequence_length)):\n            for n in range(tables.nodes.num_rows - 1):\n                offset = n / tables.nodes.num_rows\n                s = tables.sites.add_row(position=x + offset, ancestral_state=\"0\")\n                tables.mutations.add_row(site=s, node=n, derived_state=\"1\")\n\n        ts = tables.tree_sequence()\n\n        def f(x):\n            return x\n\n        for polarised, mode, answer in [\n            (True, \"branch\", 6),\n            (True, \"site\", 4),\n            (False, \"branch\", 8),\n            (False, \"site\", 6),\n        ]:\n            (stat,) = ts.sample_count_stat(\n                [[0]],\n                f,\n                1,\n                strict=False,\n                span_normalise=False,\n                polarised=polarised,\n                mode=mode,\n            )\n            assert stat == answer\n\n\nclass TestGeneralSiteStats(StatsTestCase):\n    \"\"\"\n    Tests for general site stats (using functions and arbitrary weights)\n    \"\"\"\n\n    def compare_general_stat(self, ts, W, f, windows=None, polarised=False):\n        # Determine output_dim of the function\n        M = len(f(W[0]))\n        sigma1 = naive_site_general_stat(ts, W, f, windows, polarised=polarised)\n        sigma2 = ts.general_stat(W, f, M, windows, polarised=polarised, mode=\"site\")\n        sigma3 = site_general_stat(ts, W, f, windows, polarised=polarised)\n        assert sigma1.shape == sigma2.shape\n        assert sigma1.shape == sigma3.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3)\n        return sigma1\n\n    def test_identity_f_W_0_multiple_alleles(self):\n        ts = msprime.simulate(20, recombination_rate=0, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        W = np.zeros((ts.num_samples, 3))\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.identity_f(ts), windows=\"sites\", polarised=polarised\n            )\n            assert sigma.shape == (ts.num_sites, W.shape[1])\n            assert np.all(sigma == 0)\n\n    def test_identity_f_W_0_multiple_alleles_windows(self):\n        ts = msprime.simulate(34, recombination_rate=0, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        W = np.zeros((ts.num_samples, 3))\n        windows = np.linspace(0, 1, num=11)\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.identity_f(ts), windows=windows, polarised=polarised\n            )\n            assert sigma.shape == (windows.shape[0] - 1, W.shape[1])\n            assert np.all(sigma == 0)\n\n    def test_cumsum_f_W_1_multiple_alleles(self):\n        ts = msprime.simulate(3, recombination_rate=2, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)\n        W = np.ones((ts.num_samples, 3))\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.cumsum_f(ts), windows=\"sites\", polarised=polarised\n            )\n            assert sigma.shape == (ts.num_sites, W.shape[1])\n\n    def test_cumsum_f_W_1_two_alleles(self):\n        ts = msprime.simulate(33, recombination_rate=1, mutation_rate=2, random_seed=1)\n        W = np.ones((ts.num_samples, 5))\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.cumsum_f(ts), windows=\"sites\", polarised=polarised\n            )\n            assert sigma.shape == (ts.num_sites, W.shape[1])\n\n\nclass TestGeneralNodeStats(StatsTestCase):\n    \"\"\"\n    Tests for general node stats (using functions and arbitrary weights)\n    \"\"\"\n\n    def compare_general_stat(self, ts, W, f, windows=None, polarised=False):\n        # Determine output_dim of the function\n        M = len(f(W[0]))\n        sigma1 = naive_node_general_stat(ts, W, f, windows, polarised=polarised)\n        sigma2 = ts.general_stat(W, f, M, windows, polarised=polarised, mode=\"node\")\n        sigma3 = node_general_stat(ts, W, f, windows, polarised=polarised)\n        assert sigma1.shape == sigma2.shape\n        assert sigma1.shape == sigma3.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3)\n        return sigma1\n\n    def test_simple_sum_f_w_zeros(self, ts_12_highrecomb_fixture):\n        ts = ts_12_highrecomb_fixture\n        W = np.zeros((ts.num_samples, 3))\n        for polarised in [True, False]:\n            sigma = self.compare_general_stat(\n                ts, W, self.identity_f(ts), windows=\"trees\", polarised=polarised\n            )\n            assert sigma.shape == (ts.num_trees, ts.num_nodes, 3)\n            assert np.all(sigma == 0)\n\n    def test_simple_sum_f_w_ones(self, ts_44_recomb_fixture):\n        ts = ts_44_recomb_fixture\n        W = np.ones((ts.num_samples, 2))\n        f = self.sum_f(ts)\n        sigma = self.compare_general_stat(ts, W, f, windows=\"trees\", polarised=True)\n        assert sigma.shape == (ts.num_trees, ts.num_nodes, 1)\n        # Drop the last dimension\n        sigma = sigma.reshape((ts.num_trees, ts.num_nodes))\n        # A W of 1 for every node and f(x)=sum(x) counts the samples in the subtree\n        # times 2 if polarised is True.\n        for tree in ts.trees():\n            s = np.array(\n                [\n                    tree.num_samples(u) if tree.num_samples(u) < ts.num_samples else 0\n                    for u in range(ts.num_nodes)\n                ]\n            )\n            self.assertArrayAlmostEqual(sigma[tree.index], 2 * s)\n\n    def test_simple_sum_f_w_ones_notstrict(self, ts_44_recomb_fixture):\n        ts = ts_44_recomb_fixture\n        W = np.ones((ts.num_samples, 2))\n        sigma = ts.general_stat(\n            W,\n            lambda x: np.array([np.sum(x)]),\n            1,\n            windows=\"trees\",\n            polarised=True,\n            mode=\"node\",\n            strict=False,\n        )\n        assert sigma.shape == (ts.num_trees, ts.num_nodes, 1)\n        # Drop the last dimension\n        sigma = sigma.reshape((ts.num_trees, ts.num_nodes))\n        # A W of 1 for every node and f(x)=sum(x) counts the samples in the subtree\n        # times 2 if polarised is True.\n        for tree in ts.trees():\n            s = np.array([tree.num_samples(u) for u in range(ts.num_nodes)])\n            self.assertArrayAlmostEqual(sigma[tree.index], 2 * s)\n\n    def test_small_tree_windows_polarised(self):\n        ts = msprime.simulate(4, recombination_rate=0.5, random_seed=2)\n        assert ts.num_trees > 1\n        W = np.ones((ts.num_samples, 1))\n        sigma = self.compare_general_stat(\n            ts,\n            W,\n            self.cumsum_f(ts),\n            windows=ts.breakpoints(as_array=True),\n            polarised=True,\n        )\n        assert sigma.shape == (ts.num_trees, ts.num_nodes, 1)\n\n    def test_one_window_polarised(self, ts_4_recomb_fixture):\n        ts = ts_4_recomb_fixture\n        W = np.ones((ts.num_samples, 1))\n        sigma = self.compare_general_stat(\n            ts, W, self.cumsum_f(ts), windows=[0, ts.sequence_length], polarised=True\n        )\n        assert sigma.shape == (1, ts.num_nodes, W.shape[1])\n\n    def test_one_window_unpolarised(self, ts_4_recomb_fixture):\n        ts = ts_4_recomb_fixture\n        W = np.ones((ts.num_samples, 2))\n        sigma = self.compare_general_stat(\n            ts, W, self.cumsum_f(ts), windows=[0, ts.sequence_length], polarised=False\n        )\n        assert sigma.shape == (1, ts.num_nodes, 2)\n\n    def test_many_windows(self):\n        ts = msprime.simulate(24, recombination_rate=3, random_seed=2)\n        W = np.ones((ts.num_samples, 3))\n        for k in [1, ts.num_trees // 2, ts.num_trees, ts.num_trees * 2]:\n            windows = np.linspace(0, 1, num=k + 1)\n            for polarised in [True]:\n                sigma = self.compare_general_stat(\n                    ts, W, self.cumsum_f(ts), windows=windows, polarised=polarised\n                )\n            assert sigma.shape == (k, ts.num_nodes, 3)\n\n    def test_one_tree(self):\n        ts = msprime.simulate(10, random_seed=3)\n        W = np.ones((ts.num_samples, 2))\n        f = self.sum_f(ts, k=2)\n        sigma = self.compare_general_stat(ts, W, f, windows=[0, 1], polarised=True)\n        assert sigma.shape == (1, ts.num_nodes, 2)\n        # A W of 1 for every node and f(x)=sum(x) counts the samples in the subtree\n        # times 2 if polarised is True.\n        tree = ts.first()\n        s = np.array(\n            [\n                tree.num_samples(u) if tree.num_samples(u) < ts.num_samples else 0\n                for u in range(ts.num_nodes)\n            ]\n        )\n        self.assertArrayAlmostEqual(sigma[tree.index, :, 0], 2 * s)\n        self.assertArrayAlmostEqual(sigma[tree.index, :, 1], 2 * s)\n\n\n##############################\n# Trait covariance\n##############################\n\n\n@cached_np\ndef covsq(x, y):\n    cov = np.dot(x - np.mean(x), y - np.mean(y)) / (len(x) - 1)\n    return cov * cov\n\n\n@cached_np\ndef corsq(x, y):\n    vx = covsq(x, x)\n    vy = covsq(y, y)\n    # sqrt is because vx and vy are *squared* variances\n    return covsq(x, y) / np.sqrt(vx * vy)\n\n\ndef site_trait_covariance(ts, W, windows=None, span_normalise=True):\n    \"\"\"\n    For each site, computes the covariance between the columns of W and the genotypes.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, K))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False)\n        site_positions = [x.position for x in ts.sites()]\n        for i in range(K):\n            w = W[:, i].copy()\n            w -= np.mean(w)\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    hX = haps[k]\n                    alleles = set(hX)\n                    for a in alleles:\n                        S += covsq(w, hX == a) / 2\n            if site_in_window:\n                out[j, i] = S\n                if span_normalise:\n                    out[j, i] /= end - begin\n    return out\n\n\ndef branch_trait_covariance(ts, W, windows=None, span_normalise=True):\n    \"\"\"\n    For each branch, computes the covariance between the columns of W and the split\n    induced by the branch, multiplied by the length of the branch.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, K))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i in range(K):\n            w = W[:, i].copy()\n            w -= np.mean(w)\n            S = 0\n            has_trees = False\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                SS = 0\n                for u in range(ts.num_nodes):\n                    tree_samples = set(tr.samples(u))\n                    below = np.fromiter((s in tree_samples for s in samples), dtype=bool)\n                    branch_length = tr.branch_length(u)\n                    SS += covsq(w, below) * branch_length\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            if has_trees:\n                out[j, i] = S\n                if span_normalise:\n                    out[j, i] /= end - begin\n    return out\n\n\ndef node_trait_covariance(ts, W, windows=None, span_normalise=True):\n    \"\"\"\n    For each node, computes the covariance between the columns of W and the split\n    induced by above/below the node.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, ts.num_nodes, K))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i in range(K):\n            w = W[:, i].copy()\n            w -= np.mean(w)\n            S = np.zeros(ts.num_nodes)\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in range(ts.num_nodes):\n                    tree_samples = set(tr.samples(u))\n                    below = np.fromiter((s in tree_samples for s in samples), dtype=bool)\n                    SS[u] += covsq(w, below)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            out[j, :, i] = S\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef trait_covariance(ts, W, windows=None, mode=\"site\", span_normalise=True):\n    method_map = {\n        \"site\": site_trait_covariance,\n        \"node\": node_trait_covariance,\n        \"branch\": branch_trait_covariance,\n    }\n    return method_map[mode](ts, W, windows=windows, span_normalise=span_normalise)\n\n\nclass TestTraitCovariance(StatsTestCase, WeightStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def get_example_ts(self, ts_10_mut_recomb_fixture):\n        ts = ts_10_mut_recomb_fixture\n        assert ts.num_mutations > 0\n        return ts\n\n    def transform_weights(self, W):\n        \"\"\"\n        Need centered weights to compare to general stats.\n        \"\"\"\n        W -= np.mean(W, axis=0)\n        return W\n\n    def verify_weighted_stat(self, ts, W, windows):\n        n = W.shape[0]\n\n        def f(x):\n            return (x**2) / (2 * (n - 1) * (n - 1))\n\n        self.verify_definition(ts, W, windows, f, ts.trait_covariance, trait_covariance)\n\n    def verify_interface(self, ts, ts_method):\n        W = np.array([np.arange(ts.num_samples)]).T\n        sigma1 = ts_method(W, mode=self.mode)\n        sigma2 = ts_method(W, windows=None, mode=self.mode)\n        sigma3 = ts_method(W, windows=[0.0, ts.sequence_length], mode=self.mode)\n        assert sigma1.shape == sigma2.shape\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3[0])\n\n    def verify_centering(self, ts, method, ts_method):\n        # Since weights are mean-centered, adding a constant shouldn't change anything.\n        # ts is already passed as parameter, no need to call get_example_ts()\n        for W, windows in subset_combos(\n            self.example_weights(ts), example_windows(ts), p=0.1\n        ):\n            shift = np.arange(1, W.shape[1] + 1)\n            sigma1 = ts_method(W, windows=windows, mode=self.mode)\n            sigma2 = ts_method(W + shift, windows=windows, mode=self.mode)\n            sigma3 = method(ts, W, windows=windows, mode=self.mode)\n            sigma4 = method(ts, W + shift, windows=windows, mode=self.mode)\n            assert sigma1.shape == sigma2.shape\n            assert sigma1.shape == sigma3.shape\n            assert sigma1.shape == sigma4.shape\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n            self.assertArrayAlmostEqual(sigma1, sigma3)\n            self.assertArrayAlmostEqual(sigma1, sigma4)\n\n\nclass TraitCovarianceMixin:\n    def test_interface(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        self.verify_interface(ts, ts.trait_covariance)\n\n    def test_normalisation(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        self.verify_centering(ts, trait_covariance, ts.trait_covariance)\n\n    def test_errors(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        W = np.ones((ts.num_samples, 2))\n        # W must have the right number of rows\n        with pytest.raises(ValueError):\n            ts.trait_correlation(W[1:, :])\n\n\n@pytest.mark.slow\nclass TestBranchTraitCovariance(\n    TestTraitCovariance, TopologyExamplesMixin, TraitCovarianceMixin\n):\n    mode = \"branch\"\n\n\n@pytest.mark.slow\nclass TestNodeTraitCovariance(\n    TestTraitCovariance, TopologyExamplesMixin, TraitCovarianceMixin\n):\n    mode = \"node\"\n\n\nclass TestSiteTraitCovariance(\n    TestTraitCovariance, MutatedTopologyExamplesMixin, TraitCovarianceMixin\n):\n    mode = \"site\"\n\n\n##############################\n# Trait correlation\n##############################\n\n\ndef site_trait_correlation(ts, W, windows=None, span_normalise=True):\n    \"\"\"\n    For each site, computes the correlation between the columns of W and the genotypes.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, K))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False)\n        site_positions = [x.position for x in ts.sites()]\n        for i in range(K):\n            w = W[:, i].copy()\n            w -= np.mean(w)\n            w /= np.std(w) * np.sqrt(len(w) / (len(w) - 1))\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    hX = haps[k]\n                    alleles = set(hX)\n                    for a in alleles:\n                        p = np.mean(hX == a)\n                        if p > 0 and p < 1:\n                            # S += sum(w[hX == a])**2 / (2 * (p * (1 - p)))\n                            S += corsq(w, hX == a) / 2\n            if site_in_window:\n                out[j, i] = S\n                if span_normalise:\n                    out[j, i] /= end - begin\n    return out\n\n\ndef branch_trait_correlation(ts, W, windows=None, span_normalise=True):\n    \"\"\"\n    For each branch, computes the correlation between the columns of W and the split\n    induced by the branch, multiplied by the length of the branch.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, K))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i in range(K):\n            w = W[:, i].copy()\n            w -= np.mean(w)\n            w /= np.std(w) * np.sqrt(len(w) / (len(w) - 1))\n            S = 0\n            has_trees = False\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                SS = 0\n                for u in range(ts.num_nodes):\n                    tree_samples = set(tr.samples(u))\n                    below = np.fromiter((s in tree_samples for s in samples), dtype=bool)\n                    p = np.mean(below)\n                    if p > 0 and p < 1:\n                        branch_length = tr.branch_length(u)\n                        # SS += ((sum(w[below])**2 +\n                        #         sum(w[np.logical_not(below)])**2) * branch_length\n                        #        / (2 * (p * (1 - p))))\n                        SS += corsq(w, below) * branch_length\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            if has_trees:\n                out[j, i] = S\n                if span_normalise:\n                    out[j, i] /= end - begin\n    return out\n\n\ndef node_trait_correlation(ts, W, windows=None, span_normalise=True):\n    \"\"\"\n    For each node, computes the correlation between the columns of W and the split\n    induced by above/below the node.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, ts.num_nodes, K))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i in range(K):\n            w = W[:, i].copy()\n            w -= np.mean(w)\n            w /= np.std(w) * np.sqrt(len(w) / (len(w) - 1))\n            S = np.zeros(ts.num_nodes)\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in range(ts.num_nodes):\n                    tree_samples = set(tr.samples(u))\n                    below = np.fromiter((s in tree_samples for s in samples), dtype=bool)\n                    p = np.mean(below)\n                    if p > 0 and p < 1:\n                        # SS[u] += sum(w[below])**2 / 2\n                        # SS[u] += sum(w[np.logical_not(below)])**2 / 2\n                        # SS[u] /= (p * (1 - p))\n                        SS[u] += corsq(w, below)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            out[j, :, i] = S\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef trait_correlation(ts, W, windows=None, mode=\"site\", span_normalise=True):\n    method_map = {\n        \"site\": site_trait_correlation,\n        \"node\": node_trait_correlation,\n        \"branch\": branch_trait_correlation,\n    }\n    return method_map[mode](ts, W, windows=windows, span_normalise=span_normalise)\n\n\nclass TestTraitCorrelation(TestTraitCovariance):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def transform_weights(self, W):\n        \"\"\"\n        Need standardised weights to compare to general stats,\n        and also an extra column to compute allele frequencies.\n        \"\"\"\n        W -= np.mean(W, axis=0)\n        n = W.shape[0]\n        with suppress_division_by_zero_warning():\n            W /= np.std(W, axis=0) * np.sqrt(n / (n - 1))\n        return np.column_stack((W, np.ones(W.shape[0]) / W.shape[0]))\n\n    def verify_weighted_stat(self, ts, W, windows):\n        n = W.shape[0]\n\n        def f(x):\n            p = x[-1]\n            if p > 0 and p < 1:\n                return (x[:-1] ** 2) / (2 * (p * (1 - p)) * n * (n - 1))\n            else:\n                return x[:-1] * 0.0\n\n        self.verify_definition(\n            ts, W, windows, f, ts.trait_correlation, trait_correlation\n        )\n\n    def test_errors(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        # columns of W must have positive SD\n        W = np.ones((ts.num_samples, 2))\n        with pytest.raises(ValueError):\n            ts.trait_correlation(W)\n        # W must have the right number of rows\n        with pytest.raises(ValueError):\n            ts.trait_correlation(W[1:, :])\n\n    def verify_standardising(self, ts, method, ts_method):\n        \"\"\"\n        Since weights are standardised, multiplying by a constant shouldn't\n        change anything.\n        \"\"\"\n        for W, windows in subset_combos(\n            self.example_weights(ts), example_windows(ts), p=0.1\n        ):\n            scale = np.arange(1, W.shape[1] + 1)\n            sigma1 = ts_method(W, windows=windows, mode=self.mode)\n            sigma2 = ts_method(W * scale, windows=windows, mode=self.mode)\n            sigma3 = method(ts, W, windows=windows, mode=self.mode)\n            sigma4 = method(ts, W * scale, windows=windows, mode=self.mode)\n            assert sigma1.shape == sigma2.shape\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n            self.assertArrayAlmostEqual(sigma1, sigma3)\n            self.assertArrayAlmostEqual(sigma1, sigma4)\n\n\nclass TraitCorrelationMixin:\n    def test_interface(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        self.verify_interface(ts, ts.trait_correlation)\n\n    def test_normalisation(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        self.verify_centering(ts, trait_correlation, ts.trait_correlation)\n        self.verify_standardising(ts, trait_correlation, ts.trait_correlation)\n\n\n@pytest.mark.slow\nclass TestBranchTraitCorrelation(\n    TestTraitCorrelation, TopologyExamplesMixin, TraitCorrelationMixin\n):\n    mode = \"branch\"\n\n\n@pytest.mark.slow\nclass TestNodeTraitCorrelation(\n    TestTraitCorrelation, TopologyExamplesMixin, TraitCorrelationMixin\n):\n    mode = \"node\"\n\n\nclass TestSiteTraitCorrelation(\n    TestTraitCorrelation, MutatedTopologyExamplesMixin, TraitCorrelationMixin\n):\n    mode = \"site\"\n\n\n##############################\n# Trait linear_model\n##############################\n\n# Quick hack to speed up the tests a bit. We're running this linear_model\n# function a gazillion times with similar arguments, so it's worth\n# caching the results. We could use functools.lru_cache, but the arguments\n# are numpy arrays and so we have to do something different. This\n# reduces runtime from ~40 seconds to ~7 seconds on the\n# TestNodeTraitLinearModel.\n# See https://github.com/tskit-dev/tskit/issues/1856 for more info\n_lm_cache = {}\n\n\n@cached_np\ndef linear_model(y, x, z):\n    key = (y.tobytes(), x.tobytes(), z.tobytes())\n    if key not in _lm_cache:\n        _lm_cache[key] = _linear_model(y, x, z)\n    return _lm_cache[key]\n\n\ndef _linear_model(y, x, z):\n    \"\"\"\n    Returns the squared coefficient of x in the least-squares linear model\n    :   y ~ x + z\n    where x and y are vectors and z is a matrix.\n    Note that if z is None then the output is\n      cor(x, y) * sd(y) / sd(x) = cov(x, y) / (sd(x) ** 2) .\n    \"\"\"\n    # add the constant vector to z\n    if z is None:\n        z = np.ones((len(x), 1))\n    else:\n        xz = np.column_stack([z, np.ones((len(x), 1))])\n        if np.linalg.matrix_rank(xz) == xz.shape[1]:\n            z = xz\n    xz = np.column_stack([x, z])\n    # check if y is sufficiently independent of the subspace spanned by xz\n    Pz = np.matmul(z, np.linalg.pinv(z))\n    Py = np.matmul(Pz, y)\n    denom = np.sum((y - Py) ** 2)\n    if np.linalg.matrix_rank(xz) < xz.shape[1] or denom < 1e-8:\n        return 0.0\n    else:\n        coefs, _, _, _ = np.linalg.lstsq(xz, y, rcond=None)\n        return coefs[0] * coefs[0]\n\n\ndef site_trait_linear_model(ts, W, Z, windows=None, span_normalise=True):\n    \"\"\"\n    For each site, and for each trait w (column of W), computes the coefficient\n    of site in the linear model:\n      w ~ site + Z\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, K))\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        haps = ts.genotype_matrix(isolated_as_missing=False)\n        site_positions = [x.position for x in ts.sites()]\n        for i in range(K):\n            w = W[:, i]\n            S = 0\n            site_in_window = False\n            for k in range(ts.num_sites):\n                if (site_positions[k] >= begin) and (site_positions[k] < end):\n                    site_in_window = True\n                    hX = haps[k]\n                    alleles = set(hX)\n                    for a in alleles:\n                        p = np.mean(hX == a)\n                        if p > 0 and p < 1:\n                            S += linear_model(w, hX == a, Z) / 2\n            if site_in_window:\n                out[j, i] = S\n                if span_normalise:\n                    out[j, i] /= end - begin\n    return out\n\n\ndef branch_trait_linear_model(ts, W, Z, windows=None, span_normalise=True):\n    \"\"\"\n    For each branch, fits the linear_model of each column of W onto the split\n    induced by the branch and the covariates Z, multiplied by the length of the branch,\n    returning the squared coefficient of the column of W.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, K))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i in range(K):\n            w = W[:, i]\n            S = 0\n            has_trees = False\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                if tr.total_branch_length > 0:\n                    has_trees = True\n                SS = 0\n                for u in range(ts.num_nodes):\n                    tree_samples = set(tr.samples(u))\n                    below = np.fromiter((s in tree_samples for s in samples), dtype=bool)\n                    branch_length = tr.branch_length(u)\n                    SS += linear_model(w, below, Z) * branch_length\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            if has_trees:\n                out[j, i] = S\n                if span_normalise:\n                    out[j, i] /= end - begin\n    return out\n\n\ndef node_trait_linear_model(ts, W, Z, windows=None, span_normalise=True):\n    \"\"\"\n    For each node, fits the linear model of each columns of W on the split\n    induced by above/below the node and the covariates Z, returning the squared\n    coefficient of the column of W.\n    \"\"\"\n    windows = ts.parse_windows(windows)\n    n, K = W.shape\n    assert n == ts.num_samples\n    out = np.zeros((len(windows) - 1, ts.num_nodes, K))\n    samples = ts.samples()\n    for j in range(len(windows) - 1):\n        begin = windows[j]\n        end = windows[j + 1]\n        for i in range(K):\n            w = W[:, i]\n            S = np.zeros(ts.num_nodes)\n            for tr in ts.trees():\n                if tr.interval.right <= begin:\n                    continue\n                if tr.interval.left >= end:\n                    break\n                SS = np.zeros(ts.num_nodes)\n                for u in range(ts.num_nodes):\n                    tree_samples = set(tr.samples(u))\n                    below = np.fromiter((s in tree_samples for s in samples), dtype=bool)\n                    SS[u] += linear_model(w, below, Z)\n                S += SS * (min(end, tr.interval.right) - max(begin, tr.interval.left))\n            out[j, :, i] = S\n            if span_normalise:\n                out[j, :, i] /= end - begin\n    return out\n\n\ndef trait_linear_model(ts, W, Z, windows=None, mode=\"site\", span_normalise=True):\n    method_map = {\n        \"site\": site_trait_linear_model,\n        \"node\": node_trait_linear_model,\n        \"branch\": branch_trait_linear_model,\n    }\n    return method_map[mode](ts, W, Z, windows=windows, span_normalise=span_normalise)\n\n\nclass TestTraitLinearModel(StatsTestCase, WeightStatsMixin):\n    # Derived classes define this to get a specific stats mode.\n    mode = None\n\n    def get_example_ts(self, ts_10_mut_recomb_fixture):\n        ts = ts_10_mut_recomb_fixture\n        assert ts.num_mutations > 0\n        return ts\n\n    def example_covariates(self, ts, k_values=None):\n        if k_values is None:\n            k_values = [2]  # Default to [2] to maintain current optimization\n\n        np.random.seed(999)\n        N = ts.num_samples\n\n        for k in k_values:\n            k = min(k, ts.num_samples)\n\n            # Uniform covariates\n            Z = np.ones((N, k))\n            Z[1, :] = np.arange(k, 2 * k)\n            yield Z\n\n            # Include one normal case for test coverage\n            if N >= 6:  # Only for larger samples to reduce computations\n                Z_normal = np.ones((N, k))\n                for j in range(k):\n                    Z_normal[:, j] = np.random.normal(0, 1, N)\n                yield Z_normal\n\n    def transform_weights(self, W, Z):\n        n = W.shape[0]\n        return np.column_stack([W, Z, np.ones((n, 1))])\n\n    def transform_covariates(self, Z):\n        tZ = np.column_stack([Z, np.ones((Z.shape[0], 1))])\n        if np.linalg.matrix_rank(tZ) == tZ.shape[1]:\n            Z = tZ\n        assert np.linalg.matrix_rank(Z) == Z.shape[1]\n        K = np.linalg.cholesky(np.matmul(Z.T, Z)).T\n        Z = np.matmul(Z, np.linalg.inv(K))\n        return Z\n\n    def verify(self, ts):\n        for W, Z, windows in subset_combos(\n            self.example_weights(ts),\n            self.example_covariates(ts, k_values=[2]),\n            example_windows(ts),\n            p=0.02,  # Reduced from 0.04 for performance\n        ):\n            self.verify_trait_linear_model(ts, W, Z, windows=windows)\n\n    def verify_trait_linear_model(self, ts, W, Z, windows):\n        n, result_dim = W.shape\n        tZ = self.transform_covariates(Z)\n        n, k = tZ.shape\n        V = np.matmul(W.T, tZ)\n\n        def f(x):\n            m = x[-1]\n            a = np.zeros(result_dim)\n            for i in range(result_dim):\n                # print(\"i=\", i, \"result_dim=\", result_dim, \"m=\", m, \"x=\", x)\n                # print(\"V=\", V)\n                if m > 0 and m < ts.num_samples:\n                    v = V[i, :]\n                    a[i] = x[i]\n                    denom = m\n                    for j in range(k):\n                        xx = x[result_dim + j]\n                        a[i] -= xx * v[j]\n                        denom -= xx * xx\n                    if abs(denom) < 1e-8:\n                        a[i] = 0.0\n                    else:\n                        a[i] /= denom\n                else:\n                    a[i] = 0.0\n            # print(\"out\", a*a/2)\n            return a * a / 2\n\n        # general_stat will need Z added, and an extra column for m\n        gW = self.transform_weights(W, tZ)\n\n        def wrapped_summary_func(x):\n            with suppress_division_by_zero_warning():\n                return f(x)\n\n        # Determine output_dim of the function\n        M = len(wrapped_summary_func(gW[0]))\n        for sn in [True, False]:\n            sigma1 = ts.general_stat(\n                gW, wrapped_summary_func, M, windows, mode=self.mode, span_normalise=sn\n            )\n            sigma2 = general_stat(\n                ts, gW, wrapped_summary_func, windows, mode=self.mode, span_normalise=sn\n            )\n            sigma3 = ts.trait_linear_model(\n                W, Z, windows=windows, mode=self.mode, span_normalise=sn\n            )\n            sigma4 = trait_linear_model(\n                ts, W, Z, windows=windows, mode=self.mode, span_normalise=sn\n            )\n\n            assert sigma1.shape == sigma2.shape\n            assert sigma1.shape == sigma3.shape\n            assert sigma1.shape == sigma4.shape\n            self.assertArrayAlmostEqual(sigma1, sigma2)\n            self.assertArrayAlmostEqual(sigma1, sigma3)\n            self.assertArrayAlmostEqual(sigma1, sigma4)\n\n\nclass TraitLinearModelMixin:\n    def test_interface(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        W = np.array([np.arange(ts.num_samples)]).T\n        Z = np.ones((ts.num_samples, 1))\n        sigma1 = ts.trait_linear_model(W, Z=Z, mode=self.mode)\n        sigma2 = ts.trait_linear_model(W, Z=Z, windows=None, mode=self.mode)\n        sigma3 = ts.trait_linear_model(\n            W, Z=Z, windows=[0.0, ts.sequence_length], mode=self.mode\n        )\n        sigma4 = ts.trait_linear_model(\n            W, Z=None, windows=[0.0, ts.sequence_length], mode=self.mode\n        )\n        assert sigma1.shape == sigma2.shape\n        assert sigma3.shape[0] == 1\n        assert sigma1.shape == sigma3.shape[1:]\n        assert sigma1.shape == sigma4.shape[1:]\n        self.assertArrayAlmostEqual(sigma1, sigma2)\n        self.assertArrayAlmostEqual(sigma1, sigma3[0])\n        self.assertArrayAlmostEqual(sigma1, sigma4[0])\n\n    def test_errors(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        W = np.array([np.arange(ts.num_samples)]).T\n        Z = np.ones((ts.num_samples, 1))\n        # singular covariates\n        with pytest.raises(ValueError):\n            ts.trait_linear_model(\n                W,\n                np.ones((ts.num_samples, 2)),\n                mode=self.mode,\n            )\n        # wrong dimensions of W\n        with pytest.raises(ValueError):\n            ts.trait_linear_model(W[1:, :], Z, mode=self.mode)\n        # wrong dimensions of Z\n        with pytest.raises(ValueError):\n            ts.trait_linear_model(W, Z[1:, :], mode=self.mode)\n\n    def test_deprecation(self, ts_10_mut_recomb_fixture):\n        ts = self.get_example_ts(ts_10_mut_recomb_fixture)\n        W = np.array([np.arange(ts.num_samples)]).T\n        Z = np.ones((ts.num_samples, 1))\n        with pytest.warns(FutureWarning):\n            ts.trait_regression(W, Z=Z, mode=self.mode)\n\n\n@pytest.mark.slow\nclass TestBranchTraitLinearModel(\n    TestTraitLinearModel, TopologyExamplesMixin, TraitLinearModelMixin\n):\n    mode = \"branch\"\n\n\n@pytest.mark.slow\nclass TestNodeTraitLinearModel(\n    TestTraitLinearModel, TopologyExamplesMixin, TraitLinearModelMixin\n):\n    mode = \"node\"\n\n\nclass TestSiteTraitLinearModel(\n    TestTraitLinearModel, MutatedTopologyExamplesMixin, TraitLinearModelMixin\n):\n    mode = \"site\"\n\n\n##############################\n# Sample set statistics\n##############################\n\n\n@pytest.mark.skip(reason=\"Broken - need to port tests\")\nclass SampleSetStatTestCase(StatsTestCase):\n    \"\"\"\n    Provides checks for testing of sample set-based statistics.  Actual testing\n    is done by derived classes, which should have attributes `stat_type` and `rng`.\n    This works by using parallel structure between different statistic \"modes\",\n    in tree sequence methods (with stat_type=X) and python stat calculators as\n    implemented here (with StatCalculator.X).\n    \"\"\"\n\n    random_seed = 123456\n\n    def compare_sfs(self, ts, tree_fn, sample_sets, tsc_fn):\n        for sample_set in sample_sets:\n            windows = [\n                k * ts.sequence_length / 20\n                for k in [0] + sorted(self.rng.sample(range(1, 20), 4)) + [20]\n            ]\n            win_args = [\n                {\"begin\": windows[i], \"end\": windows[i + 1]}\n                for i in range(len(windows) - 1)\n            ]\n            tree_vals = [tree_fn(sample_set, **b) for b in win_args]\n\n            tsc_vals = tsc_fn(sample_set, windows)\n            assert len(tsc_vals) == len(windows) - 1\n            for i in range(len(windows) - 1):\n                self.assertListAlmostEqual(tsc_vals[i], tree_vals[i])\n\n    def check_sfs_interface(self, ts):\n        samples = ts.samples()\n\n        # empty sample sets will raise an error\n        with pytest.raises(ValueError):\n            ts.site_frequency_spectrum([], self.stat_type)\n        # sample_sets must be lists without repeated elements\n        with pytest.raises(ValueError):\n            ts.site_frequency_spectrum(\n                [samples[2], samples[2]],\n                self.stat_type,\n            )\n        # and must all be samples\n        with pytest.raises(ValueError):\n            ts.site_frequency_spectrum(\n                [samples[0], max(samples) + 1],\n                self.stat_type,\n            )\n        # windows must start at 0.0, be increasing, and extend to the end\n        with pytest.raises(ValueError):\n            ts.site_frequency_spectrum(\n                samples[0:2],\n                [0.1, ts.sequence_length],\n                self.stat_type,\n            )\n        with pytest.raises(ValueError):\n            ts.site_frequency_spectrum(\n                samples[0:2],\n                [0.0, 0.8 * ts.sequence_length],\n                self.stat_type,\n            )\n        with pytest.raises(ValueError):\n            ts.site_frequency_spectrum(\n                samples[0:2],\n                [\n                    0.0,\n                    0.8 * ts.sequence_length,\n                    0.4 * ts.sequence_length,\n                    ts.sequence_length,\n                ],\n                self.stat_type,\n            )\n\n    def check_sfs(self, ts):\n        # check site frequency spectrum\n        self.check_sfs_interface(ts)\n        A = [\n            self.rng.sample(list(ts.samples()), 2),\n            self.rng.sample(list(ts.samples()), 4),\n            self.rng.sample(list(ts.samples()), 8),\n            self.rng.sample(list(ts.samples()), 10),\n            self.rng.sample(list(ts.samples()), 12),\n        ]\n        py_tsc = self.py_stat_class(ts)\n\n        self.compare_sfs(\n            ts, py_tsc.site_frequency_spectrum, A, ts.site_frequency_spectrum\n        )\n\n\nclass BranchSampleSetStatsTestCase(SampleSetStatTestCase):\n    \"\"\"\n    Tests of branch statistic computation with sample sets,\n    mostly running the checks in SampleSetStatTestCase.\n    \"\"\"\n\n    def setUp(self):\n        self.rng = random.Random(self.random_seed)\n        self.stat_type = \"branch\"\n\n    def get_ts(self):\n        for N in [12, 15, 20]:\n            yield msprime.simulate(\n                N, random_seed=self.random_seed, recombination_rate=10\n            )\n\n    @pytest.mark.skip(reason=\"Skipping SFS.\")\n    def test_sfs_interface(self):\n        ts = msprime.simulate(10)\n        tsc = tskit.BranchStatCalculator(ts)\n\n        # Duplicated samples raise an error\n        with pytest.raises(ValueError):\n            tsc.site_frequency_spectrum([1, 1])\n        with pytest.raises(ValueError):\n            tsc.site_frequency_spectrum([])\n        with pytest.raises(ValueError):\n            tsc.site_frequency_spectrum([0, 11])\n        # Check for bad windows\n        for bad_start in [-1, 1, 1e-7]:\n            with pytest.raises(ValueError):\n                tsc.site_frequency_spectrum(\n                    [1, 2],\n                    [bad_start, ts.sequence_length],\n                )\n        for bad_end in [0, ts.sequence_length - 1, ts.sequence_length + 1]:\n            with pytest.raises(ValueError):\n                tsc.site_frequency_spectrum([1, 2], [0, bad_end])\n        # Windows must be increasing.\n        with pytest.raises(ValueError):\n            tsc.site_frequency_spectrum([1, 2], [0, 1, 1])\n\n    @pytest.mark.skip(reason=\"No SFS.\")\n    def test_branch_sfs(self):\n        for ts in self.get_ts():\n            self.check_sfs(ts)\n\n\nclass SpecificTreesTestCase(StatsTestCase):\n    \"\"\"\n    Some particular cases, that are easy to see and debug.\n    \"\"\"\n\n    seed = 21\n\n    def test_case_1(self):\n        # With mutations:\n        #\n        # 1.0          6\n        # 0.7         / \\                                    5\n        #            /   X                                  / \\\n        # 0.5       X     4                4               /   4\n        #          /     / \\              / \\             /   X X\n        # 0.4     X     X   \\            X   3           X   /   \\\n        #        /     /     X          /   / X         /   /     \\\n        # 0.0   0     1       2        1   0   2       0   1       2\n        #          (0.0, 0.2),        (0.2, 0.8),       (0.8, 1.0)\n        #\n        # haplotypes:\n        # site:  0   1 2  3 4         5    6              7   8 9\n        # 0:     0   1 0  1 0         0    0              1   0 0\n        # 1:     1   0 0  0 1         1    0              0   1 0\n        # 2:     1   0 1  0 0         0    1              0   0 1\n        branch_true_diversity_01 = 2 * (\n            1 * (0.2 - 0) + 0.5 * (0.8 - 0.2) + 0.7 * (1.0 - 0.8)\n        )\n        branch_true_diversity_02 = 2 * (\n            1 * (0.2 - 0) + 0.4 * (0.8 - 0.2) + 0.7 * (1.0 - 0.8)\n        )\n        branch_true_diversity_12 = 2 * (\n            0.5 * (0.2 - 0) + 0.5 * (0.8 - 0.2) + 0.5 * (1.0 - 0.8)\n        )\n        branch_true_Y = 0.2 * (1 + 0.5) + 0.6 * (0.4) + 0.2 * (0.7 + 0.2)\n        site_true_Y = 3 + 0 + 1\n        node_true_diversity_012 = (\n            np.array(\n                [\n                    0.2 * np.array([2, 2, 2, 0, 2, 0, 0])\n                    + 0.6 * np.array([2, 2, 2, 2, 0, 0, 0])\n                    + 0.2 * np.array([2, 2, 2, 0, 2, 0, 0])\n                ]\n            )\n            / 3\n        )\n        node_true_divergence_0_12 = (\n            np.array(\n                [\n                    0.2 * np.array([2, 1, 1, 0, 2, 0, 0])\n                    + 0.6 * np.array([2, 1, 1, 1, 0, 0, 0])\n                    + 0.2 * np.array([2, 1, 1, 0, 2, 0, 0])\n                ]\n            )\n            / 2\n        )\n        haplotypes = np.array(\n            [\n                [0, 1, 1],\n                [1, 0, 0],\n                [0, 0, 1],\n                [1, 0, 0],\n                [0, 1, 0],\n                [0, 1, 0],\n                [0, 0, 1],\n                [1, 0, 0],\n                [0, 1, 0],\n                [0, 0, 1],\n            ]\n        )\n        traits = np.array([[1, 2, 3, 0], [-5, 0, 1, 1], [3, 4, 1.2, 2]])\n        # nb: verified the following with R\n        true_cov = (\n            np.cov(haplotypes, traits.T)[: haplotypes.shape[0], haplotypes.shape[0] :]\n            ** 2\n        )\n        true_cor = (\n            np.corrcoef(haplotypes, traits.T)[\n                : haplotypes.shape[0], haplotypes.shape[0] :\n            ]\n            ** 2\n        )\n        cov02 = np.cov(np.array([1, 0, 1]), traits.T)[:1, 1:] ** 2\n        true_branch_cov = (\n            true_cov[1, :] * 1.0 * 0.2\n            + true_cov[4, :] * 0.5 * 0.2  # branch 0, tree 0\n            + true_cov[2, :] * 0.5 * 0.2  # branch 1, tree 0\n            + true_cov[0, :] * 0.5 * 0.2  # branch 2, tree 0\n            + true_cov[1, :] * 0.4 * 0.6  # branch 4, tree 0\n            + true_cov[4, :] * 0.5 * 0.6  # branch 0, tree 1\n            + true_cov[2, :] * 0.4 * 0.6  # branch 1, tree 1\n            + cov02 * 0.1 * 0.6  # branch 2, tree 1\n            + true_cov[1, :] * 0.7 * 0.2  # branch 3, tree 1\n            + true_cov[4, :] * 0.5 * 0.2  # branch 0, tree 2\n            + true_cov[2, :] * 0.5 * 0.2  # branch 1, tree 2\n            + true_cov[0, :] * 0.2 * 0.2  # branch 2, tree 2\n        )  # branch 4, tree 2\n        cor02 = np.corrcoef(np.array([1, 0, 1]), traits.T)[:1, 1:] ** 2\n        true_branch_cor = (\n            true_cor[1, :] * 1.0 * 0.2\n            + true_cor[4, :] * 0.5 * 0.2  # branch 0, tree 0\n            + true_cor[2, :] * 0.5 * 0.2  # branch 1, tree 0\n            + true_cor[0, :] * 0.5 * 0.2  # branch 2, tree 0\n            + true_cor[1, :] * 0.4 * 0.6  # branch 4, tree 0\n            + true_cor[4, :] * 0.5 * 0.6  # branch 0, tree 1\n            + true_cor[2, :] * 0.4 * 0.6  # branch 1, tree 1\n            + cor02 * 0.1 * 0.6  # branch 2, tree 1\n            + true_cor[1, :] * 0.7 * 0.2  # branch 3, tree 1\n            + true_cor[4, :] * 0.5 * 0.2  # branch 0, tree 2\n            + true_cor[2, :] * 0.5 * 0.2  # branch 1, tree 2\n            + true_cor[0, :] * 0.2 * 0.2  # branch 2, tree 2\n        )  # branch 4, tree 2\n\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           0.4\n        4       0           0.5\n        5       0           0.7\n        6       0           1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.2     0.8     3       0,2\n        0.0     0.2     4       1,2\n        0.2     0.8     4       1,3\n        0.8     1.0     4       1,2\n        0.8     1.0     5       0,4\n        0.0     0.2     6       0,4\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.05        0\n        1   0.1         0\n        2   0.11        0\n        3   0.15        0\n        4   0.151       0\n        5   0.3         0\n        6   0.6         0\n        7   0.9         0\n        8   0.95        0\n        9   0.951       0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state\n        0       4       1\n        1       0       1\n        2       2       1\n        3       0       1\n        4       1       1\n        5       1       1\n        6       2       1\n        7       0       1\n        8       1       1\n        9       2       1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n\n        # diversity between 0 and 1\n        A = [[0], [1]]\n        n = [len(a) for a in A]\n\n        def f(x):\n            return np.array(\n                [float(x[0] * (n[1] - x[1]) + (n[0] - x[0]) * x[1]) / (2 * n[0] * n[1])]\n            )\n\n        # tree lengths:\n        mode = \"branch\"\n        self.assertAlmostEqual(\n            divergence(ts, [[0], [1]], [(0, 1)], mode=mode), branch_true_diversity_01\n        )\n        self.assertAlmostEqual(\n            ts.divergence([[0], [1]], [(0, 1)], mode=mode), branch_true_diversity_01\n        )\n        self.assertAlmostEqual(\n            ts.sample_count_stat(A, f, 1, mode=mode)[0], branch_true_diversity_01\n        )\n        self.assertAlmostEqual(\n            ts.diversity([[0, 1]], mode=mode)[0], branch_true_diversity_01\n        )\n\n        # mean diversity between [0, 1] and [0, 2]:\n        branch_true_mean_diversity = (\n            0\n            + branch_true_diversity_02\n            + branch_true_diversity_01\n            + branch_true_diversity_12\n        ) / 4\n        A = [[0, 1], [0, 2]]\n        n = [len(a) for a in A]\n\n        def f(x):\n            return np.array([float(x[0] * (n[1] - x[1]) + (n[0] - x[0]) * x[1]) / 8.0])\n\n        # tree lengths:\n        self.assertAlmostEqual(\n            divergence(ts, [A[0], A[1]], [(0, 1)], mode=mode),\n            branch_true_mean_diversity,\n        )\n        self.assertAlmostEqual(\n            ts.divergence([A[0], A[1]], [(0, 1)], mode=mode), branch_true_mean_diversity\n        )\n        self.assertAlmostEqual(\n            ts.sample_count_stat(A, f, 1, mode=mode)[0], branch_true_mean_diversity\n        )\n\n        # Y-statistic for (0/12)\n        A = [[0], [1, 2]]\n\n        def f(x):\n            return np.array(\n                [\n                    float(((x[0] == 1) and (x[1] == 0)) or ((x[0] == 0) and (x[1] == 2)))\n                    / 2.0\n                ]\n            )\n\n        # tree lengths:\n        bts_Y = ts.Y3([[0], [1], [2]], mode=mode)\n        py_bsc_Y = Y3(ts, [[0], [1], [2]], [(0, 1, 2)], windows=[0.0, 1.0], mode=mode)\n        self.assertArrayAlmostEqual(bts_Y, branch_true_Y)\n        self.assertArrayAlmostEqual(py_bsc_Y, branch_true_Y)\n        self.assertArrayAlmostEqual(\n            ts.sample_count_stat(A, f, 1, mode=mode)[0], branch_true_Y\n        )\n\n        mode = \"site\"\n        # sites, Y:\n        sts_Y = ts.Y3([[0], [1], [2]], mode=mode)\n        py_ssc_Y = Y3(ts, [[0], [1], [2]], [(0, 1, 2)], windows=[0.0, 1.0], mode=mode)\n        self.assertArrayAlmostEqual(sts_Y, site_true_Y)\n        self.assertArrayAlmostEqual(py_ssc_Y, site_true_Y)\n        self.assertArrayAlmostEqual(\n            ts.sample_count_stat(A, f, 1, mode=mode)[0], site_true_Y\n        )\n\n        A = [[0, 1, 2]]\n        n = 3\n        W = np.array([[u in A[0]] for u in ts.samples()], dtype=float)\n\n        def f(x):\n            return np.array([x[0] * (n - x[0]) / (n * (n - 1))])\n\n        mode = \"node\"\n        # nodes, diversity in [0,1,2]\n        nodes_div_012 = ts.diversity([[0, 1, 2]], mode=mode).reshape((1, 7))\n        py_nodes_div_012 = diversity(ts, [[0, 1, 2]], mode=mode).reshape((1, 7))\n        py_general_nodes_div_012 = general_stat(ts, W, f, mode=mode).reshape((1, 7))\n        self.assertArrayAlmostEqual(py_nodes_div_012, node_true_diversity_012)\n        self.assertArrayAlmostEqual(py_general_nodes_div_012, node_true_diversity_012)\n        self.assertArrayAlmostEqual(nodes_div_012, node_true_diversity_012)\n\n        # nodes, divergence [0] to [1,2]\n        nodes_div_0_12 = ts.divergence([[0], [1, 2]], mode=mode).reshape((1, 7))\n        py_nodes_div_0_12 = divergence(ts, [[0], [1, 2]], mode=mode).reshape((1, 7))\n        self.assertArrayAlmostEqual(nodes_div_0_12, node_true_divergence_0_12)\n        self.assertArrayAlmostEqual(py_nodes_div_0_12, node_true_divergence_0_12)\n\n        # covariance and correlation\n        ts_sitewise_cov = ts.trait_covariance(\n            traits, mode=\"site\", windows=\"sites\", span_normalise=False\n        )\n        py_sitewise_cov = site_trait_covariance(\n            ts, traits, windows=\"sites\", span_normalise=False\n        )\n        self.assertArrayAlmostEqual(py_sitewise_cov, true_cov)\n        self.assertArrayAlmostEqual(ts_sitewise_cov, true_cov)\n        ts_sitewise_cor = ts.trait_correlation(\n            traits, mode=\"site\", windows=\"sites\", span_normalise=False\n        )\n        py_sitewise_cor = site_trait_correlation(\n            ts, traits, windows=\"sites\", span_normalise=False\n        )\n        self.assertArrayAlmostEqual(py_sitewise_cor, true_cor)\n        self.assertArrayAlmostEqual(ts_sitewise_cor, true_cor)\n        # mean\n        ts_mean_cov = ts.trait_covariance(\n            traits, mode=\"site\", windows=[0, ts.sequence_length]\n        )\n        py_mean_cov = site_trait_covariance(ts, traits)\n        self.assertArrayAlmostEqual(ts_mean_cov, np.array([np.sum(true_cov, axis=0)]))\n        self.assertArrayAlmostEqual(ts_mean_cov, py_mean_cov)\n        ts_mean_cor = ts.trait_correlation(\n            traits, mode=\"site\", windows=[0, ts.sequence_length]\n        )\n        py_mean_cor = site_trait_correlation(ts, traits)\n        self.assertArrayAlmostEqual(ts_mean_cor, np.array([np.sum(true_cor, axis=0)]))\n        self.assertArrayAlmostEqual(ts_mean_cor, py_mean_cor)\n        # mode = 'branch'\n        ts_mean_cov = ts.trait_covariance(\n            traits, mode=\"branch\", windows=[0, ts.sequence_length]\n        )\n        py_mean_cov = branch_trait_covariance(ts, traits)\n        self.assertArrayAlmostEqual(ts_mean_cov, true_branch_cov)\n        self.assertArrayAlmostEqual(ts_mean_cov, py_mean_cov)\n        ts_mean_cor = ts.trait_correlation(\n            traits, mode=\"branch\", windows=[0, ts.sequence_length]\n        )\n        py_mean_cor = branch_trait_correlation(ts, traits)\n        self.assertArrayAlmostEqual(ts_mean_cor, true_branch_cor)\n        self.assertArrayAlmostEqual(ts_mean_cor, py_mean_cor)\n\n        # trait linear_model:\n        # r = cor * sd(y) / sd(x) = cov / var(x)\n        # geno_var = allele_freqs * (1 - allele_freqs) * (3 / (3 - 1))\n        geno_var = np.var(haplotypes, axis=1) * (3 / (3 - 1))\n        trait_var = np.var(traits, axis=0) * (3 / (3 - 1))\n        py_r = trait_linear_model(\n            ts, traits, None, mode=\"site\", windows=\"sites\", span_normalise=False\n        )\n        ts_r = ts.trait_linear_model(\n            traits, None, mode=\"site\", windows=\"sites\", span_normalise=False\n        )\n        self.assertArrayAlmostEqual(py_r, ts_r)\n        self.assertArrayAlmostEqual(true_cov, py_r * (geno_var[:, np.newaxis] ** 2))\n        self.assertArrayAlmostEqual(true_cor, ts_r * geno_var[:, np.newaxis] / trait_var)\n\n    def test_case_odds_and_ends(self):\n        # Tests having (a) the first site after the first window, and\n        # (b) no samples having the ancestral state.\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       0           0.5\n        3       0           1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.0     0.5     2       0,1\n        0.5     1.0     3       0,1\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.65        0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       0       1               -1\n        0       1       2               -1\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n\n        mode = \"site\"\n        py_div = divergence(\n            ts, [[0], [1]], indexes=[(0, 1)], windows=[0.0, 0.5, 1.0], mode=mode\n        )\n        div = ts.divergence(\n            [[0], [1]], indexes=[(0, 1)], windows=[0.0, 0.5, 1.0], mode=mode\n        )\n        self.assertArrayEqual(py_div, div)\n\n    def test_case_four_taxa(self, four_taxa_test_case):\n        #\n        # 1.0          7\n        # 0.7         / \\                                    6\n        #            /   \\                                  / \\\n        # 0.5       /     5              5                 /   5\n        #          /     / \\            / \\__             /   / \\\n        # 0.4     /     8   \\          8     4           /   8   \\\n        #        /     / \\   \\        / \\   / \\         /   / \\   \\\n        # 0.0   0     1   3   2      1   3 0   2       0   1   3   2\n        #          (0.0, 0.2),        (0.2, 0.8),       (0.8, 2.5)\n        ts = four_taxa_test_case\n\n        # f4(0, 1, 2, 3): (0 -> 1)(2 -> 3)\n        branch_true_f4_0123 = (0.1 * 0.2 + (0.1 + 0.1) * 0.6 + 0.1 * 1.7) / 2.5\n        windows = [0.0, 0.4, 2.5]\n        branch_true_f4_0123_windowed = np.array(\n            [\n                (0.1 * 0.2 + (0.1 + 0.1) * 0.2) / 0.4,\n                ((0.1 + 0.1) * 0.4 + 0.1 * 1.7) / 2.1,\n            ]\n        )\n        # f4(0, 3, 2, 1): (0 -> 3)(2 -> 1)\n        branch_true_f4_0321 = (0.1 * 0.2 + (0.1 + 0.1) * 0.6 + 0.1 * 1.7) / 2.5\n        # f2([0,2], [1,3]) = (1/2) (f4(0,1,2,3) + f4(0,3,2,1))\n        branch_true_f2_02_13 = (branch_true_f4_0123 + branch_true_f4_0321) / 2\n        # diversity([0,1,2,3])\n        branch_true_diversity_windowed = (2 / 6) * np.array(\n            [\n                [\n                    (\n                        0.2 * (1 + 1 + 1 + 0.5 + 0.4 + 0.5)\n                        + (0.4 - 0.2) * (0.5 + 0.4 + 0.5 + 0.5 + 0.4 + 0.5)\n                    )\n                    / 0.4\n                ],\n                [\n                    (\n                        (0.8 - 0.4) * (0.5 + 0.4 + 0.5 + 0.5 + 0.4 + 0.5)\n                        + (2.5 - 0.8) * (0.7 + 0.7 + 0.7 + 0.5 + 0.4 + 0.5)\n                    )\n                    / (2.5 - 0.4)\n                ],\n            ]\n        )\n\n        mode = \"branch\"\n        A = [[0], [1], [2], [3]]\n        self.assertAlmostEqual(branch_true_f4_0123, f4(ts, A, mode=mode)[0][0])\n        self.assertAlmostEqual(branch_true_f4_0123, ts.f4(A, mode=mode))\n        self.assertArrayAlmostEqual(\n            branch_true_f4_0123_windowed, ts.f4(A, windows=windows, mode=mode).flatten()\n        )\n        A = [[0], [3], [2], [1]]\n        self.assertAlmostEqual(\n            branch_true_f4_0321, f4(ts, A, [(0, 1, 2, 3)], mode=mode)[0][0]\n        )\n        self.assertAlmostEqual(branch_true_f4_0321, ts.f4(A, mode=mode))\n        A = [[0], [2], [1], [3]]\n        self.assertAlmostEqual(0.0, f4(ts, A, [(0, 1, 2, 3)], mode=mode)[0])\n        self.assertAlmostEqual(0.0, ts.f4(A, mode=mode))\n        A = [[0, 2], [1, 3]]\n        self.assertAlmostEqual(\n            branch_true_f2_02_13, f2(ts, A, [(0, 1)], mode=mode)[0][0]\n        )\n        self.assertAlmostEqual(branch_true_f2_02_13, ts.f2(A, mode=mode))\n\n        # diversity\n        A = [[0, 1, 2, 3]]\n        self.assertArrayAlmostEqual(\n            branch_true_diversity_windowed, diversity(ts, A, windows=windows, mode=mode)\n        )\n        self.assertArrayAlmostEqual(\n            branch_true_diversity_windowed, ts.diversity(A, windows=windows, mode=mode)\n        )\n\n    def test_case_recurrent_muts(self):\n        # With mutations:\n        #\n        # 1.0          6\n        # 0.7         / \\                                    5\n        #           (0)  \\                                  /(6)\n        # 0.5      (1)    4                4               /   4\n        #          /     / \\              / \\             /  (7|8)\n        # 0.4    (2)   (3)  \\           (4)  3           /   /   \\\n        #        /     /     \\          /   /(5)        /   /     \\\n        # 0.0   0     1       2        1   0   2       0   1       2\n        #          (0.0, 0.2),        (0.2, 0.8),       (0.8, 1.0)\n        # genotypes:\n        #       0     2       0        1   0   1       0   2       3\n        site_true_Y = 0 + 1 + 1\n\n        nodes = io.StringIO(\n            \"\"\"\\\n        id      is_sample   time\n        0       1           0\n        1       1           0\n        2       1           0\n        3       0           0.4\n        4       0           0.5\n        5       0           0.7\n        6       0           1.0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.2     0.8     3       0,2\n        0.0     0.2     4       1,2\n        0.2     0.8     4       1,3\n        0.8     1.0     4       1,2\n        0.8     1.0     5       0,4\n        0.0     0.2     6       0,4\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.05        0\n        1   0.3         0\n        2   0.9         0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       0       1               -1\n        0       0       2               0\n        0       0       0               1\n        0       1       2               -1\n        1       1       1               -1\n        1       2       1               -1\n        2       4       1               -1\n        2       1       2               6\n        2       2       3               6\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n\n        # Y3:\n        site_tsc_Y = ts.Y3([[0], [1], [2]], mode=\"site\")\n        py_ssc_Y = Y3(ts, [[0], [1], [2]], [(0, 1, 2)], windows=[0.0, 1.0], mode=\"site\")\n        self.assertAlmostEqual(site_tsc_Y, site_true_Y)\n        self.assertAlmostEqual(py_ssc_Y, site_true_Y)\n\n    def test_case_2(self):\n        # Here are the trees:\n        # t                  |              |              |             |            |\n        #\n        # 0       --3--      |     --3--    |     --3--    |    --3--    |    --3--   |\n        #        /  |  \\     |    /  |  \\   |    /     \\   |   /     \\   |   /     \\  |\n        # 1     4   |   5    |   4   |   5  |   4       5  |  4       5  |  4       5 |\n        #       |\\ / \\ /|    |   |\\   \\     |   |\\     /   |  |\\     /   |  |\\     /| |\n        # 2     | 6   7 |    |   | 6   7    |   | 6   7    |  | 6   7    |  | 6   7 | |\n        #       | |\\ /| |    |   *  \\  |    |   |  \\  |    |  |  \\       |  |  \\    | |\n        # 3     | | 8 | |    |   |   8 *    |   |   8 |    |  |   8      |  |   8   | |\n        #       | |/ \\| |    |   |  /  |    |   |  /  |    |  |  / \\     |  |  / \\  | |\n        # 4     | 9  10 |    |   * 9  10    |   | 9  10    |  | 9  10    |  | 9  10 | |\n        #       |/ \\ / \\|    |   |  \\   \\   |   |  \\   \\   |  |  \\   \\   |  |  \\    | |\n        # 5     0   1   2    |   0   1   2  |   0   1   2  |  0   1   2  |  0   1   2 |\n        #\n        #                    |   0.0 - 0.1  |   0.1 - 0.2  |  0.2 - 0.4  |  0.4 - 0.5 |\n        # ... continued:\n        # t                  |             |             |             |\n        #\n        # 0         --3--    |    --3--    |    --3--    |    --3--    |    --3--\n        #          /     \\   |   /     \\   |   /     \\   |   /     \\   |   /  |  \\\n        # 1       4       5  |  4       5  |  4       5  |  4       5  |  4   |   5\n        #         |\\     /|  |   \\     /|  |   \\     /|  |   \\     /|  |     /   /|\n        # 2       | 6   7 |  |    6   7 |  |    6   7 |  |    6   7 |  |    6   7 |\n        #         |  *    *  |     \\    |  |       *  |  |    |  /  |  |    |  /  |\n        # 3  ...  |   8   |  |      8   |  |      8   |  |    | 8   |  |    | 8   |\n        #         |  / \\  |  |     / \\  |  |     * \\  |  |    |  \\  |  |    |  \\  |\n        # 4       | 9  10 |  |    9  10 |  |    9  10 |  |    9  10 |  |    9  10 |\n        #         |    /  |  |   /   /  |  |   /   /  |  |   /   /  |  |   /   /  |\n        # 5       0   1   2  |  0   1   2  |  0   1   2  |  0   1   2  |  0   1   2\n        #\n        #         0.5 - 0.6  |  0.6 - 0.7  |  0.7 - 0.8  |  0.8 - 0.9  |  0.9 - 1.0\n        #\n        # Above, subsequent mutations are backmutations.\n\n        # divergence betw 0 and 1\n        branch_true_diversity_01 = 2 * (0.6 * 4 + 0.2 * 2 + 0.2 * 5)\n        # divergence betw 1 and 2\n        branch_true_diversity_12 = 2 * (0.2 * 5 + 0.2 * 2 + 0.3 * 5 + 0.3 * 4)\n        # divergence betw 0 and 2\n        branch_true_diversity_02 = 2 * (0.2 * 5 + 0.2 * 4 + 0.3 * 5 + 0.1 * 4 + 0.2 * 5)\n        # Y(0;1, 2)\n        branch_true_Y = 0.2 * 4 + 0.2 * (4 + 2) + 0.2 * 4 + 0.2 * 2 + 0.2 * (5 + 1)\n\n        # site stats\n        # Y(0;1, 2)\n        site_true_Y = 1\n\n        nodes = io.StringIO(\n            \"\"\"\\\n        is_sample       time    population\n        1       0.000000        0\n        1       0.000000        0\n        1       0.000000        0\n        0       5.000000        0\n        0       4.000000        0\n        0       4.000000        0\n        0       3.000000        0\n        0       3.000000        0\n        0       2.000000        0\n        0       1.000000        0\n        0       1.000000        0\n        \"\"\"\n        )\n        edges = io.StringIO(\n            \"\"\"\\\n        left    right   parent  child\n        0.500000        1.000000        10      1\n        0.000000        0.400000        10      2\n        0.600000        1.000000        9       0\n        0.000000        0.500000        9       1\n        0.800000        1.000000        8       10\n        0.200000        0.800000        8       9,10\n        0.000000        0.200000        8       9\n        0.700000        1.000000        7       8\n        0.000000        0.200000        7       10\n        0.800000        1.000000        6       9\n        0.000000        0.700000        6       8\n        0.400000        1.000000        5       2,7\n        0.100000        0.400000        5       7\n        0.600000        0.900000        4       6\n        0.000000        0.600000        4       0,6\n        0.900000        1.000000        3       4,5,6\n        0.100000        0.900000        3       4,5\n        0.000000        0.100000        3       4,5,7\n        \"\"\"\n        )\n        sites = io.StringIO(\n            \"\"\"\\\n        id  position    ancestral_state\n        0   0.0         0\n        1   0.55        0\n        2   0.75        0\n        3   0.85        0\n        \"\"\"\n        )\n        mutations = io.StringIO(\n            \"\"\"\\\n        site    node    derived_state   parent\n        0       0       1               -1\n        0       10      1               -1\n        0       0       0               0\n        1       8       1               -1\n        1       2       1               -1\n        2       8       1               -1\n        2       9       0               5\n        \"\"\"\n        )\n        ts = tskit.load_text(\n            nodes=nodes, edges=edges, sites=sites, mutations=mutations, strict=False\n        )\n\n        def f(x):\n            return np.array([float(x[0] == 1) / 2.0])\n\n        # divergence between 0 and 1\n        mode = \"branch\"\n        for A, truth in zip(\n            [[[0, 1]], [[1, 2]], [[0, 2]]],\n            [\n                branch_true_diversity_01,\n                branch_true_diversity_12,\n                branch_true_diversity_02,\n            ],\n        ):\n            self.assertAlmostEqual(diversity(ts, A, mode=mode)[0][0], truth)\n            self.assertAlmostEqual(ts.sample_count_stat(A, f, 1, mode=mode)[0], truth)\n            self.assertAlmostEqual(ts.diversity(A, mode=\"branch\")[0], truth)\n\n        # Y-statistic for (0/12)\n        A = [[0], [1, 2]]\n\n        def f(x):\n            return np.array(\n                [\n                    float(((x[0] == 1) and (x[1] == 0)) or ((x[0] == 0) and (x[1] == 2)))\n                    / 2.0\n                ]\n            )\n\n        # tree lengths:\n        self.assertArrayAlmostEqual(\n            Y3(ts, [[0], [1], [2]], [(0, 1, 2)], mode=mode), branch_true_Y\n        )\n        self.assertArrayAlmostEqual(\n            ts.Y3([[0], [1], [2]], [(0, 1, 2)], mode=mode), branch_true_Y\n        )\n        self.assertArrayAlmostEqual(\n            ts.sample_count_stat(A, f, 1, mode=mode)[0], branch_true_Y\n        )\n\n        # sites:\n        mode = \"site\"\n        site_tsc_Y = ts.Y3([[0], [1], [2]], mode=mode)\n        py_ssc_Y = Y3(ts, [[0], [1], [2]], [(0, 1, 2)], windows=[0.0, 1.0])\n        self.assertAlmostEqual(site_tsc_Y, site_true_Y)\n        self.assertAlmostEqual(py_ssc_Y, site_true_Y)\n        self.assertAlmostEqual(ts.sample_count_stat(A, f, 1, mode=mode)[0], site_true_Y)\n\n\nclass TestOutputDimensions(StatsTestCase):\n    \"\"\"\n    Tests for the dimension stripping behaviour of the stats functions.\n    \"\"\"\n\n    def test_one_way_no_window_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        x = ts.diversity()\n        assert isinstance(x, np.floating)\n\n    def test_one_way_one_list_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        x = ts.diversity(sample_sets=list(ts.samples()))\n        assert isinstance(x, np.floating)\n\n    def test_one_way_nested_list_not_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        x = ts.diversity(sample_sets=[list(ts.samples())])\n        assert x.shape == (1,)\n\n    def test_one_way_one_window_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        x = ts.diversity(windows=[0, ts.sequence_length])\n        assert x.shape == (1,)\n        for samples in (None, list(ts.samples())):\n            x = ts.diversity(sample_sets=samples, windows=[0, ts.sequence_length])\n            assert x.shape == (1,)\n\n    def test_multi_way_no_window_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        n = ts.num_samples\n        x = ts.f2(\n            sample_sets=[\n                [i for i in range(0, int(n / 2))],\n                [i for i in range(int(n / 2), n)],\n            ]\n        )\n        assert isinstance(x, np.floating)\n\n    def test_multi_way_one_window_not_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        n = ts.num_samples\n        x = ts.f2(\n            sample_sets=[\n                [i for i in range(0, int(n / 2))],\n                [i for i in range(int(n / 2), n)],\n            ],\n            windows=[0, ts.sequence_length],\n        )\n        assert x.shape == (1,)\n\n    def test_multi_way_no_indexes_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        n = ts.num_samples\n        x = ts.f2(\n            sample_sets=[\n                [i for i in range(0, int(n / 2))],\n                [i for i in range(int(n / 2), n)],\n            ],\n        )\n        assert isinstance(x, np.floating)\n\n    def test_multi_way_indexes_not_scalar_stat(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        n = ts.num_samples\n        x = ts.f2(\n            sample_sets=[\n                [i for i in range(0, int(n / 2))],\n                [i for i in range(int(n / 2), n)],\n            ],\n            indexes=[(0, 1)],\n        )\n        assert x.shape == (1,)\n\n    def test_afs_default_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        n = ts.num_samples\n        A = ts.samples()[:4]\n        B = ts.samples()[6:]\n        for mode in [\"site\", \"branch\"]:\n            x = ts.allele_frequency_spectrum(mode=mode)\n            # x is a 1D numpy array with n + 1 values\n            assert x.shape == (n + 1,)\n            self.assertArrayEqual(\n                x, ts.allele_frequency_spectrum([ts.samples()], mode=mode)\n            )\n            x = ts.allele_frequency_spectrum([A, B], mode=mode)\n            assert x.shape == (len(A) + 1, len(B) + 1)\n\n    def test_afs_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        L = ts.sequence_length\n\n        windows = [0, L / 4, L / 2, L]\n        A = ts.samples()[:4]\n        B = ts.samples()[6:]\n        for mode in [\"site\", \"branch\"]:\n            x = ts.allele_frequency_spectrum([A, B], windows=windows, mode=mode)\n            assert x.shape == (3, len(A) + 1, len(B) + 1)\n\n            x = ts.allele_frequency_spectrum([A], windows=windows, mode=mode)\n            assert x.shape == (3, len(A) + 1)\n\n            x = ts.allele_frequency_spectrum(windows=windows, mode=mode)\n            # Default returns this for all samples\n            assert x.shape == (3, ts.num_samples + 1)\n            y = ts.allele_frequency_spectrum([ts.samples()], windows=windows, mode=mode)\n            self.assertArrayEqual(x, y)\n\n    def test_one_way_stat_default_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        # Use diversity as the example one-way stat.\n        for mode in [\"site\", \"branch\"]:\n            x = ts.diversity(mode=mode)\n            # x is a zero-d numpy value\n            assert np.shape(x) == tuple()\n            assert x == float(x)\n            assert x == ts.diversity(ts.samples(), mode=mode)\n            self.assertArrayEqual([x], ts.diversity([ts.samples()], mode=mode))\n\n        mode = \"node\"\n        x = ts.diversity(mode=mode)\n        # x is a 1D numpy array with N values\n        assert x.shape == (ts.num_nodes,)\n        self.assertArrayEqual(x, ts.diversity(ts.samples(), mode=mode))\n        y = ts.diversity([ts.samples()], mode=mode)\n        # We're adding on the *last* dimension, so must reshape\n        self.assertArrayEqual(x.reshape(ts.num_nodes, 1), y)\n\n    def verify_one_way_stat_windows(self, ts, method):\n        L = ts.sequence_length\n        N = ts.num_nodes\n\n        windows = [0, L / 4, L / 2, 0.75 * L, L]\n        A = ts.samples()[:6]\n        B = ts.samples()[6:]\n        for mode in [\"site\", \"branch\"]:\n            x = method([A, B], windows=windows, mode=mode)\n            # Four windows, 2 sets.\n            assert x.shape == (4, 2)\n\n            x = method([A], windows=windows, mode=mode)\n            # Four windows, 1 sets.\n            assert x.shape == (4, 1)\n\n            x = method(A, windows=windows, mode=mode)\n            # Dropping the outer list removes the last dimension\n            assert x.shape == (4,)\n\n            x = method(windows=windows, mode=mode)\n            # Default returns this for all samples\n            assert x.shape == (4,)\n            y = method(ts.samples(), windows=windows, mode=mode)\n            self.assertArrayEqual(x, y)\n\n        mode = \"node\"\n        x = method([A, B], windows=windows, mode=mode)\n        # Four windows, N nodes and 2 sets.\n        assert x.shape == (4, N, 2)\n\n        x = method([A], windows=windows, mode=mode)\n        # Four windows, N nodes and 1 set.\n        assert x.shape == (4, N, 1)\n\n        x = method(A, windows=windows, mode=mode)\n        # Drop the outer list, so we lose the last dimension\n        assert x.shape == (4, N)\n\n        x = method(windows=windows, mode=mode)\n        # The default sample sets also drops the last dimension\n        assert x.shape == (4, N)\n\n        assert ts.num_trees == 1\n        # In this example, we know that the trees are all the same so check this\n        # for sanity.\n        self.assertArrayEqual(x[0], x[1])\n        self.assertArrayEqual(x[0], x[2])\n\n    def test_diversity_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_one_way_stat_windows(ts, ts.diversity)\n\n    def test_Tajimas_D_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_one_way_stat_windows(ts, ts.Tajimas_D)\n\n    def test_segregating_sites_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_one_way_stat_windows(ts, ts.segregating_sites)\n\n    def test_two_way_stat_default_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        # Use divergence as the example one-way stat.\n        A = ts.samples()[:6]\n        B = ts.samples()[6:]\n        for mode in [\"site\", \"branch\"]:\n            x = ts.divergence([A, B], mode=mode)\n            # x is a zero-d numpy value\n            assert np.shape(x) == tuple()\n            assert x == float(x)\n            # If indexes is a 1D array, we also drop the outer dimension\n            assert x == ts.divergence([A, B, A], indexes=[0, 1], mode=mode)\n            # But, if it's a 2D array we keep the outer dimension\n            assert [x] == ts.divergence([A, B], indexes=[[0, 1]], mode=mode)\n\n        mode = \"node\"\n        x = ts.divergence([A, B], mode=mode)\n        # x is a 1D numpy array with N values\n        assert x.shape == (ts.num_nodes,)\n        self.assertArrayEqual(x, ts.divergence([A, B], indexes=[0, 1], mode=mode))\n        y = ts.divergence([A, B], indexes=[[0, 1]], mode=mode)\n        # We're adding on the *last* dimension, so must reshape\n        self.assertArrayEqual(x.reshape(ts.num_nodes, 1), y)\n\n    def verify_two_way_stat_windows(self, ts, method):\n        L = ts.sequence_length\n        N = ts.num_nodes\n\n        windows = [0, L / 4, L / 2, L]\n        A = ts.samples()[:7]\n        B = ts.samples()[7:]\n        for mode in [\"site\", \"branch\"]:\n            x = method([A, B, A], indexes=[[0, 1], [0, 2]], windows=windows, mode=mode)\n            # Three windows, 2 pairs\n            assert x.shape == (3, 2)\n\n            x = method([A, B], indexes=[[0, 1]], windows=windows, mode=mode)\n            # Three windows, 1 pair\n            assert x.shape == (3, 1)\n\n            x = method([A, B], indexes=[0, 1], windows=windows, mode=mode)\n            # Dropping the outer list removes the last dimension\n            assert x.shape == (3,)\n\n            y = method([A, B], windows=windows, mode=mode)\n            assert y.shape == (3,)\n            self.assertArrayEqual(x, y)\n\n        mode = \"node\"\n        x = method([A, B], indexes=[[0, 1], [0, 1]], windows=windows, mode=mode)\n        # Three windows, N nodes and 2 pairs\n        assert x.shape == (3, N, 2)\n\n        x = method([A, B], indexes=[[0, 1]], windows=windows, mode=mode)\n        # Three windows, N nodes and 1 pairs\n        assert x.shape == (3, N, 1)\n\n        x = method([A, B], indexes=[0, 1], windows=windows, mode=mode)\n        # Drop the outer list, so we lose the last dimension\n        assert x.shape == (3, N)\n\n        x = method([A, B], windows=windows, mode=mode)\n        # The default sample sets also drops the last dimension\n        assert x.shape == (3, N)\n\n        assert ts.num_trees == 1\n        # In this example, we know that the trees are all the same so check this\n        # for sanity.\n        self.assertArrayEqual(x[0], x[1])\n        self.assertArrayEqual(x[0], x[2])\n\n    def test_divergence_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_two_way_stat_windows(ts, ts.divergence)\n\n    def test_Fst_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_two_way_stat_windows(ts, ts.Fst)\n\n    def test_f2_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_two_way_stat_windows(ts, ts.f2)\n\n    def verify_three_way_stat_windows(self, ts, method):\n        L = ts.sequence_length\n        N = ts.num_nodes\n\n        windows = [0, L / 4, L / 2, L]\n        A = ts.samples()[:2]\n        B = ts.samples()[2:4]\n        C = ts.samples()[4:]\n        for mode in [\"site\", \"branch\"]:\n            x = method(\n                [A, B, C], indexes=[[0, 1, 2], [0, 2, 1]], windows=windows, mode=mode\n            )\n            # Three windows, 2 triple\n            assert x.shape == (3, 2)\n\n            x = method([A, B, C], indexes=[[0, 1, 2]], windows=windows, mode=mode)\n            # Three windows, 1 triple\n            assert x.shape == (3, 1)\n\n            x = method([A, B, C], indexes=[0, 1, 2], windows=windows, mode=mode)\n            # Dropping the outer list removes the last dimension\n            assert x.shape == (3,)\n\n            y = method([A, B, C], windows=windows, mode=mode)\n            assert y.shape == (3,)\n            self.assertArrayEqual(x, y)\n\n        mode = \"node\"\n        x = method([A, B, C], indexes=[[0, 1, 2], [0, 2, 1]], windows=windows, mode=mode)\n        # Three windows, N nodes and 2 triples\n        assert x.shape == (3, N, 2)\n\n        x = method([A, B, C], indexes=[[0, 1, 2]], windows=windows, mode=mode)\n        # Three windows, N nodes and 1 triples\n        assert x.shape == (3, N, 1)\n\n        x = method([A, B, C], indexes=[0, 1, 2], windows=windows, mode=mode)\n        # Drop the outer list, so we lose the last dimension\n        assert x.shape == (3, N)\n\n        x = method([A, B, C], windows=windows, mode=mode)\n        # The default sample sets also drops the last dimension\n        assert x.shape == (3, N)\n\n        assert ts.num_trees == 1\n        # In this example, we know that the trees are all the same so check this\n        # for sanity.\n        self.assertArrayEqual(x[0], x[1])\n        self.assertArrayEqual(x[0], x[2])\n\n    def test_Y3_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_three_way_stat_windows(ts, ts.Y3)\n\n    def test_f3_windows(self, ts_10_mut_fixture):\n        ts = ts_10_mut_fixture\n        self.verify_three_way_stat_windows(ts, ts.f3)\n\n\nclass TestTimeUncalibratedErrors:\n    def test_uncalibrated_time_allele_frequency_spectrum(self, ts_fixture):\n        ts_fixture.allele_frequency_spectrum(mode=\"branch\")\n        tables = ts_fixture.dump_tables()\n        tables.time_units = tskit.TIME_UNITS_UNCALIBRATED\n        ts_uncalibrated = tables.tree_sequence()\n        ts_uncalibrated.allele_frequency_spectrum(mode=\"site\")\n        with pytest.raises(\n            tskit.LibraryError,\n            match=\"Statistics using branch lengths cannot be calculated when time_units\"\n            \" is 'uncalibrated'\",\n        ):\n            ts_uncalibrated.allele_frequency_spectrum(mode=\"branch\")\n\n    def test_uncalibrated_time_general_stat(self, ts_fixture):\n        W = np.ones((ts_fixture.num_samples, 2))\n        ts_fixture.general_stat(\n            W, lambda x: x * (x < ts_fixture.num_samples), W.shape[1], mode=\"branch\"\n        )\n        tables = ts_fixture.dump_tables()\n        tables.time_units = tskit.TIME_UNITS_UNCALIBRATED\n        ts_uncalibrated = tables.tree_sequence()\n        ts_uncalibrated.general_stat(\n            W, lambda x: x * (x < ts_uncalibrated.num_samples), W.shape[1], mode=\"site\"\n        )\n        with pytest.raises(\n            tskit.LibraryError,\n            match=\"Statistics using branch lengths cannot be calculated when time_units\"\n            \" is 'uncalibrated'\",\n        ):\n            ts_uncalibrated.general_stat(\n                W,\n                lambda x: x * (x < ts_uncalibrated.num_samples),\n                W.shape[1],\n                mode=\"branch\",\n            )\n\n\nclass TestGeneralStatCallbackErrors:\n    def test_zero_d(self, ts_fixture):\n        def f_0d(_):\n            return 0\n\n        msg = \"Array returned by general_stat callback is 0 dimensional; must be 1D\"\n        with pytest.raises(ValueError, match=msg):\n            ts_fixture.sample_count_stat(\n                sample_sets=[ts_fixture.samples()], f=f_0d, output_dim=1, strict=False\n            )\n\n    def test_two_d(self, ts_fixture):\n        def f_2d(x):\n            return np.array([x])\n\n        msg = \"Array returned by general_stat callback is 2 dimensional; must be 1D\"\n        with pytest.raises(ValueError, match=msg):\n            ts_fixture.sample_count_stat(\n                sample_sets=[ts_fixture.samples()], f=f_2d, output_dim=1, strict=False\n            )\n\n    def test_wrong_length(self, ts_fixture):\n        def f_too_long(_):\n            return np.array([0, 0])\n\n        msg = \"Array returned by general_stat callback is of length 2; must be 1\"\n        with pytest.raises(ValueError, match=msg):\n            ts_fixture.sample_count_stat(\n                sample_sets=[ts_fixture.samples()],\n                f=f_too_long,\n                output_dim=1,\n                strict=False,\n            )\n\n\nclass TestTimeWindows:\n    def test_bad_time_windows(self, four_taxa_test_case):\n        ts = four_taxa_test_case\n        for bad_windows in ([0], [-1], [math.inf]):\n            with pytest.raises(ValueError, match=\"must have at least 2\"):\n                ts.allele_frequency_spectrum(\n                    sample_sets=[[0, 1, 2, 3]], time_windows=bad_windows, mode=\"branch\"\n                )\n        for bad_windows in (\n            [0, -1],\n            [4, 2, math.inf],\n            [0, math.inf, math.inf],\n            [0, np.inf, math.inf],\n        ):\n            with pytest.raises(\n                exceptions.LibraryError, match=\"TSK_ERR_BAD_TIME_WINDOWS\"\n            ):\n                ts.allele_frequency_spectrum(\n                    sample_sets=[[0, 1, 2, 3]], time_windows=bad_windows, mode=\"branch\"\n                )\n\n    @pytest.mark.parametrize(\"mode\", [\"branch\"])\n    def test_drop_dimension(self, four_taxa_test_case, mode):\n        ts = four_taxa_test_case\n        L = ts.sequence_length\n        s = list(ts.samples())\n        n = len(s)\n        x = ts.allele_frequency_spectrum(mode=mode)\n        assert x.shape == (n + 1,)\n        x = ts.allele_frequency_spectrum(s, mode=mode)\n        assert x.shape == (n + 1,)\n        x = ts.allele_frequency_spectrum(\n            sample_sets=s,\n            time_windows=[0, math.inf],\n            mode=mode,\n        )\n        assert x.shape == (1, n + 1)\n        x = ts.allele_frequency_spectrum(\n            sample_sets=s,\n            time_windows=[0, 10, math.inf],\n            mode=mode,\n        )\n        assert x.shape == (2, n + 1)\n        x = ts.allele_frequency_spectrum(\n            sample_sets=s,\n            windows=[0, L],\n            mode=mode,\n        )\n        assert x.shape == (1, n + 1)\n        x = ts.allele_frequency_spectrum(\n            sample_sets=s,\n            windows=[0, L / 2, L],\n            mode=mode,\n        )\n        assert x.shape == (2, n + 1)\n        x = ts.allele_frequency_spectrum(\n            sample_sets=s,\n            windows=[0, L / 2, L],\n            time_windows=[0, math.inf],\n            mode=mode,\n        )\n        assert x.shape == (2, 1, n + 1)\n        x = ts.allele_frequency_spectrum(\n            sample_sets=s,\n            windows=[0, L / 2, L],\n            time_windows=[0, 10, 20, 30, 40],\n            mode=mode,\n        )\n        assert x.shape == (2, 4, n + 1)\n\n    def test_four_taxon_example(self, four_taxa_test_case_afs):\n        ts, examples = four_taxa_test_case_afs\n        for params, afs in examples:\n            ts_afs = ts.allele_frequency_spectrum(**params)\n            py_afs = allele_frequency_spectrum(ts, **params)\n            np.testing.assert_allclose(afs, ts_afs)\n            np.testing.assert_allclose(afs, py_afs)\n"
  },
  {
    "path": "python/tests/test_util.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for functions in util.py\n\"\"\"\n\nimport collections\nimport itertools\nimport math\nimport pickle\nimport textwrap\n\nimport numpy as np\nimport pytest\nfrom numpy.testing import assert_array_equal\n\nimport tests.tsutil as tsutil\nimport tskit\nimport tskit.util as util\nfrom tskit import UNKNOWN_TIME\n\n\nclass TestCanonicalJSON:\n    def test_canonical_json(self):\n        assert util.canonical_json([3, 2, 1]) == \"[3,2,1]\"\n        assert (\n            util.canonical_json(collections.OrderedDict(c=3, b=2, a=1))\n            == '{\"a\":1,\"b\":2,\"c\":3}'\n        )\n        assert (\n            util.canonical_json(\n                collections.OrderedDict(\n                    c=\"3\",\n                    b=collections.OrderedDict(\n                        {\n                            \"b\": 1,\n                            \"z\": {},\n                            \" space\": 42,\n                            \"1\": \"number\",\n                            \"_\": \"underscore\",\n                        }\n                    ),\n                    a=\"1\",\n                )\n            )\n            == '{\"a\":\"1\",\"b\":{\" space\":42,\"1\":\"number\",'\n            '\"_\":\"underscore\",\"b\":1,\"z\":{}},\"c\":\"3\"}'\n        )\n\n\nclass TestUnknownTime:\n    def test_unknown_time_bad_types(self):\n        with pytest.raises(ValueError):\n            util.is_unknown_time(\"bad\")\n        with pytest.raises(ValueError):\n            util.is_unknown_time(np.array([\"bad\"]))\n        with pytest.raises(ValueError):\n            util.is_unknown_time([\"bad\"])\n\n    def test_unknown_time_scalar(self):\n        assert math.isnan(UNKNOWN_TIME)\n        assert util.is_unknown_time(UNKNOWN_TIME)\n        assert not util.is_unknown_time(math.nan)\n        assert not util.is_unknown_time(np.nan)\n        assert not util.is_unknown_time(0)\n        assert not util.is_unknown_time(math.inf)\n        assert not util.is_unknown_time(1)\n        assert not util.is_unknown_time(None)\n        assert not util.is_unknown_time([None])\n\n    def test_unknown_time_array(self):\n        test_arrays = (\n            [],\n            [True],\n            [False],\n            [True, False] * 5,\n            [[True], [False]],\n            [[[True, False], [True, False]], [[False, True], [True, False]]],\n        )\n        for spec in test_arrays:\n            spec = np.asarray(spec, dtype=bool)\n            array = np.zeros(shape=spec.shape)\n            array[spec] = UNKNOWN_TIME\n            assert_array_equal(spec, util.is_unknown_time(array))\n\n        weird_array = [0, UNKNOWN_TIME, np.nan, 1, math.inf]\n        assert_array_equal(\n            [False, True, False, False, False], util.is_unknown_time(weird_array)\n        )\n\n\nclass TestNumpyArrayCasting:\n    \"\"\"\n    Tests that the safe_np_int_cast() function works.\n    \"\"\"\n\n    dtypes_to_test = [np.int32, np.uint32, np.int8, np.uint8]\n\n    def test_basic_arrays(self):\n        # Simple array\n        for dtype in self.dtypes_to_test:\n            target = np.array([0, 1], dtype=dtype)\n            for test_array in [[0, 1], (0, 1), np.array([0, 1]), target]:\n                converted = util.safe_np_int_cast(test_array, dtype=dtype)\n                # Use pickle to test exact equality including dtype\n                assert pickle.dumps(converted) == pickle.dumps(target)\n            # Nested array\n            target = np.array([[0, 1], [2, 3]], dtype=dtype)\n            for test_array in [[[0, 1], [2, 3]], np.array([[0, 1], [2, 3]]), target]:\n                converted = util.safe_np_int_cast(test_array, dtype=dtype)\n                assert pickle.dumps(converted) == pickle.dumps(target)\n\n    def test_copy(self):\n        # Check that a copy is not returned if copy=False & the original matches\n        # the specs\n        for dtype in self.dtypes_to_test:\n            for orig in (np.array([0, 1], dtype=dtype), np.array([], dtype=dtype)):\n                converted = util.safe_np_int_cast(orig, dtype=dtype, copy=True)\n                assert id(orig) != id(converted)\n                converted = util.safe_np_int_cast(orig, dtype=dtype, copy=False)\n                assert id(orig) == id(converted)\n        for dtype in [d for d in self.dtypes_to_test if d != np.int64]:\n            # non numpy arrays, or arrays of a different dtype don't get converted\n            for orig in ([0, 1], np.array([0, 1], dtype=np.int64)):\n                converted = util.safe_np_int_cast(orig, dtype=dtype, copy=False)\n                assert id(orig) != id(converted)\n\n    def test_empty_arrays(self):\n        # Empty arrays of any type (including float) should be allowed\n        for dtype in self.dtypes_to_test:\n            target = np.array([], dtype=dtype)\n            converted = util.safe_np_int_cast([], dtype=dtype)\n            assert pickle.dumps(converted) == pickle.dumps(target)\n            target = np.array([[]], dtype=dtype)\n            converted = util.safe_np_int_cast([[]], dtype=dtype)\n            assert pickle.dumps(converted) == pickle.dumps(target)\n\n    def test_bad_types(self):\n        # Shouldn't be able to convert a float (possibility of rounding error)\n        for dtype in self.dtypes_to_test:\n            for bad_type in [\n                [0.1],\n                [\"str\"],\n                {},\n                [{}],\n                np.array([0, 1], dtype=float),\n            ]:\n                with pytest.raises(TypeError):\n                    util.safe_np_int_cast(bad_type, dtype)\n\n    def test_overflow(self):\n        for dtype in self.dtypes_to_test:\n            for bad_node in [np.iinfo(dtype).min - 1, np.iinfo(dtype).max + 1]:\n                with pytest.raises(OverflowError):\n                    util.safe_np_int_cast([0, bad_node], dtype)\n                with pytest.raises(OverflowError):\n                    util.safe_np_int_cast(np.array([0, bad_node]), dtype)\n            for good_node in [np.iinfo(dtype).min, np.iinfo(dtype).max]:\n                target = np.array([good_node], dtype=dtype)\n                assert pickle.dumps(target) == pickle.dumps(\n                    util.safe_np_int_cast([good_node], dtype)\n                )\n                assert pickle.dumps(target) == pickle.dumps(\n                    util.safe_np_int_cast(np.array([good_node]), dtype)\n                )\n\n    def test_nonrectangular_input(self):\n        bad_inputs = [\n            [0, 1, [2]],\n            [[0, 1, 2], []],\n            [(0, 1, 2), [2, 3]],\n            [(0, 1, 2), tuple()],\n            [(0, 1, 2), (2,)],\n            [(0, 1, 2), [2, 3]],\n        ]\n        for dtype in self.dtypes_to_test:\n            for bad_input in bad_inputs:\n                # On some platforms and Python / numpy versions, a ValueError\n                # occurs instead\n                with pytest.raises((TypeError, ValueError)):\n                    util.safe_np_int_cast(bad_input, dtype)\n\n\nclass TestIntervalOps:\n    \"\"\"\n    Test cases for the interval operations used in masks and slicing operations.\n    \"\"\"\n\n    def test_bad_intervals(self):\n        for bad_type in [{}, Exception]:\n            with pytest.raises(TypeError):\n                util.intervals_to_np_array(bad_type, 0, 1)\n        for bad_depth in [[[[]]]]:\n            with pytest.raises(ValueError):\n                util.intervals_to_np_array(bad_depth, 0, 1)\n        for bad_shape in [[[0], [0]], [[[0, 1, 2], [0, 1]]]]:\n            with pytest.raises(ValueError):\n                util.intervals_to_np_array(bad_shape, 0, 1)\n\n        # Out of bounds\n        with pytest.raises(ValueError):\n            util.intervals_to_np_array([[-1, 0]], 0, 1)\n        with pytest.raises(ValueError):\n            util.intervals_to_np_array([[0, 1]], 1, 2)\n        with pytest.raises(ValueError):\n            util.intervals_to_np_array([[0, 1]], 0, 0.5)\n\n        # Overlapping intervals\n        with pytest.raises(ValueError):\n            util.intervals_to_np_array([[0, 1], [0.9, 2.0]], 0, 10)\n\n        # Empty intervals\n        for bad_interval in [[0, 0], [1, 0]]:\n            with pytest.raises(ValueError):\n                util.intervals_to_np_array([bad_interval], 0, 10)\n\n    def test_empty_interval_list(self):\n        intervals = util.intervals_to_np_array([], 0, 10)\n        assert len(intervals) == 0\n\n    def test_negate_intervals(self):\n        L = 10\n        cases = [\n            ([], [[0, L]]),\n            ([[0, 5], [6, L]], [[5, 6]]),\n            ([[0, 5]], [[5, L]]),\n            ([[5, L]], [[0, 5]]),\n            ([[0, 1], [2, 3], [3, 4], [5, 6]], [[1, 2], [4, 5], [6, L]]),\n        ]\n        for source, dest in cases:\n            assert np.array_equal(util.negate_intervals(source, 0, L), dest)\n\n\nclass TestStringPacking:\n    \"\"\"\n    Tests the code for packing and unpacking unicode string data into numpy arrays.\n    \"\"\"\n\n    def test_simple_string_case(self):\n        strings = [\"hello\", \"world\"]\n        packed, offset = util.pack_strings(strings)\n        assert list(offset) == [0, 5, 10]\n        assert packed.shape == (10,)\n        returned = util.unpack_strings(packed, offset)\n        assert returned == strings\n\n    def verify_packing(self, strings):\n        packed, offset = util.pack_strings(strings)\n        assert packed.dtype == np.int8\n        assert offset.dtype == np.uint32\n        assert packed.shape[0] == offset[-1]\n        returned = util.unpack_strings(packed, offset)\n        assert strings == returned\n\n    def test_regular_cases(self):\n        for n in range(10):\n            strings = [\"a\" * j for j in range(n)]\n            self.verify_packing(strings)\n\n    def test_random_cases(self):\n        for n in range(100):\n            strings = [tsutil.random_strings(10) for _ in range(n)]\n            self.verify_packing(strings)\n\n    def test_unicode(self):\n        self.verify_packing([\"abcdé\", \"€\"])\n\n\nclass TestBytePacking:\n    \"\"\"\n    Tests the code for packing and unpacking binary data into numpy arrays.\n    \"\"\"\n\n    def test_simple_string_case(self):\n        strings = [b\"hello\", b\"world\"]\n        packed, offset = util.pack_bytes(strings)\n        assert list(offset) == [0, 5, 10]\n        assert packed.shape == (10,)\n        returned = util.unpack_bytes(packed, offset)\n        assert returned == strings\n\n    def verify_packing(self, data):\n        packed, offset = util.pack_bytes(data)\n        assert packed.dtype == np.int8\n        assert offset.dtype == np.uint32\n        assert packed.shape[0] == offset[-1]\n        returned = util.unpack_bytes(packed, offset)\n        assert data == returned\n        return returned\n\n    def test_random_cases(self):\n        for n in range(100):\n            data = [tsutil.random_bytes(10) for _ in range(n)]\n            self.verify_packing(data)\n\n    def test_pickle_packing(self):\n        data = [list(range(j)) for j in range(10)]\n        # Pickle each of these in turn\n        pickled = [pickle.dumps(d) for d in data]\n        unpacked = self.verify_packing(pickled)\n        unpickled = [pickle.loads(p) for p in unpacked]\n        assert data == unpickled\n\n\nclass TestArrayPacking:\n    \"\"\"\n    Tests the code for packing and unpacking numpy data into numpy arrays.\n    \"\"\"\n\n    def test_simple_case(self):\n        lists = [[0], [1.125, 1.25]]\n        packed, offset = util.pack_arrays(lists)\n        assert list(offset) == [0, 1, 3]\n        assert list(packed) == [0, 1.125, 1.25]\n        returned = util.unpack_arrays(packed, offset)\n        for a1, a2 in itertools.zip_longest(lists, returned):\n            assert a1 == list(a2)\n\n    def verify_packing(self, data):\n        packed, offset = util.pack_arrays(data)\n        assert packed.dtype == np.float64\n        assert offset.dtype == np.uint32\n        assert packed.shape[0] == offset[-1]\n        returned = util.unpack_arrays(packed, offset)\n        for a1, a2 in itertools.zip_longest(data, returned):\n            assert np.array_equal(a1, a2)\n        return returned\n\n    def test_regular_cases(self):\n        for n in range(100):\n            data = [np.arange(n) for _ in range(n)]\n            self.verify_packing(data)\n            data = [1 / (1 + np.arange(n)) for _ in range(n)]\n            self.verify_packing(data)\n\n\n@pytest.mark.parametrize(\n    \"value, expected\",\n    [\n        (0, \"0 Bytes\"),\n        (1, \"1 Byte\"),\n        (300, \"300 Bytes\"),\n        (3000, \"2.9 KiB\"),\n        (3000000, \"2.9 MiB\"),\n        (10**26 * 30, \"2481.5 YiB\"),\n    ],\n)\ndef test_naturalsize(value, expected):\n    assert util.naturalsize(value) == expected\n    if value != 0:\n        assert util.naturalsize(-value) == \"-\" + expected\n    else:\n        assert util.naturalsize(-value) == expected\n\n\ndef test_format_number():\n    assert util.format_number(0) == \"0\"\n    assert util.format_number(\"1.23\") == \"1.23\"\n    assert util.format_number(3216546.34) == \"3 216 546.3\"\n    assert util.format_number(3216546.34, 9) == \"3 216 546.34\"\n    assert util.format_number(-3456.23) == \"-3 456.23\"\n    assert util.format_number(-3456.23, sep=\",\") == \"-3,456.23\"\n\n    with pytest.raises(TypeError) as e_info:\n        util.format_number(\"bad\")\n        assert str(e_info.value) == \"The string cannot be converted to a number\"\n\n\n@pytest.mark.parametrize(\n    \"obj, expected\",\n    [\n        (0, \"Test: 0\"),\n        (\n            {\"a\": 1},\n            '<div><span class=\"tskit-details-label\">Test:</span><details open>'\n            \"<summary>dict</summary>a: 1<br/></details></div>\",\n        ),\n        (\n            {\"b\": [1, 2, 3]},\n            '<div><span class=\"tskit-details-label\">Test:</span><details open>'\n            '<summary>dict</summary><div><span class=\"tskit-details-label\">b:'\n            \"</span><details ><summary>list</summary> 1<br/> 2<br/> 3<br/></\"\n            \"details></div><br/></details></div>\",\n        ),\n        (\n            {\"b\": [1, 2, {\"c\": 1}]},\n            '<div><span class=\"tskit-details-label\">Test:</span><details open>'\n            '<summary>dict</summary><div><span class=\"tskit-details-label\">b:'\n            \"</span><details ><summary>list</summary> 1<br/> 2<br/><div><span\"\n            ' class=\"tskit-details-label\"></span><details ><summary>dict</'\n            \"summary>c: 1<br/></details></div><br/></details></div><br/></\"\n            \"details></div>\",\n        ),\n        (\n            {\"a\": \"1\", \"b\": \"2\"},\n            '<div><span class=\"tskit-details-label\">Test:</span><details open>'\n            \"<summary>dict</summary>a: 1<br/>b: 2<br/></details></div>\",\n        ),\n        (\n            {\"a\": \"a very long thing that is broken in the output\"},\n            '<div><span class=\"tskit-details-label\">Test:</span><details open>'\n            \"<summary>dict</summary>a: a very long thing that is<br/>broken in\"\n            \" the output<br/></details></div>\",\n        ),\n    ],\n    ids=[\n        \"integer\",\n        \"simple_dict\",\n        \"dict_with_list\",\n        \"nested_dict_list\",\n        \"dict_with_strings\",\n        \"dict_with_multiline_strings\",\n    ],\n)\ndef test_obj_to_collapsed_html(obj, expected):\n    assert (\n        util.obj_to_collapsed_html(obj, \"Test\", 1).replace(\"  \", \"\").replace(\"\\n\", \"\")\n        == expected\n    )\n\n\ndef test_truncate_string_end():\n    assert util.truncate_string_end(\"testing\", 40) == \"testing\"\n    assert util.truncate_string_end(\"testing\", 7) == \"testing\"\n    assert util.truncate_string_end(\"testing\", 5) == \"te...\"\n\n\ndef test_render_metadata():\n    assert util.render_metadata({}) == \"{}\"\n    assert util.render_metadata(\"testing\") == \"testing\"\n    assert util.render_metadata(b\"testing\") == \"b'testing'\"\n    assert util.render_metadata(b\"testing\", 6) == \"b't...\"\n    assert util.render_metadata(b\"\") == \"\"\n\n\ndef test_unicode_table():\n    assert (\n        util.unicode_table(\n            [[\"5\", \"6\", \"7\", \"8\"], [\"90\", \"10\", \"11\", \"12\"]],\n            header=[\"1\", \"2\", \"3\", \"4\"],\n        )\n        == textwrap.dedent(\n            \"\"\"\n           ╔══╤══╤══╤══╗\n           ║1 │2 │3 │4 ║\n           ╠══╪══╪══╪══╣\n           ║5 │ 6│ 7│ 8║\n           ╟──┼──┼──┼──╢\n           ║90│10│11│12║\n           ╚══╧══╧══╧══╝\n        \"\"\"\n        )[1:]\n    )\n\n    assert (\n        util.unicode_table(\n            [\n                [\"1\", \"2\", \"3\", \"4\"],\n                [\"5\", \"6\", \"7\", \"8\"],\n                \"__skipped__\",\n                [\"90\", \"10\", \"11\", \"12\"],\n            ],\n            title=\"TITLE\",\n        )\n        == textwrap.dedent(\n            \"\"\"\n           ╔═══════════╗\n           ║TITLE      ║\n           ╠══╤══╤══╤══╣\n           ║1 │ 2│ 3│ 4║\n           ╟──┼──┼──┼──╢\n           ║5 │ 6│ 7│ 8║\n           ╟──┴──┴──┴──╢\n           ║ rows skipp║\n           ╟──┬──┬──┬──╢\n           ║90│10│11│12║\n           ╚══╧══╧══╧══╝\n        \"\"\"\n        )[1:]\n    )\n\n    assert (\n        util.unicode_table(\n            [[\"1\", \"2\", \"3\", \"4\"], [\"5\", \"6\", \"7\", \"8\"], [\"90\", \"10\", \"11\", \"12\"]],\n            title=\"TITLE\",\n            row_separator=False,\n        )\n        == textwrap.dedent(\n            \"\"\"\n           ╔═══════════╗\n           ║TITLE      ║\n           ╠══╤══╤══╤══╣\n           ║1 │ 2│ 3│ 4║\n           ║5 │ 6│ 7│ 8║\n           ║90│10│11│12║\n           ╚══╧══╧══╧══╝\n        \"\"\"\n        )[1:]\n    )\n\n\ndef test_unicode_table_column_alignments():\n    assert (\n        util.unicode_table(\n            [[\"5\", \"6\", \"7\", \"8\"], [\"90\", \"10\", \"11\", \"12\"]],\n            header=[\"1\", \"2\", \"3\", \"4\"],\n            column_alignments=\"<>><\",\n        )\n        == textwrap.dedent(\n            \"\"\"\n           ╔══╤══╤══╤══╗\n           ║1 │2 │3 │4 ║\n           ╠══╪══╪══╪══╣\n           ║5 │ 6│ 7│8 ║\n           ╟──┼──┼──┼──╢\n           ║90│10│11│12║\n           ╚══╧══╧══╧══╝\n        \"\"\"\n        )[1:]\n    )\n\n\ndef test_set_printoptions():\n    assert tskit._print_options == {\"max_lines\": 40}\n    util.set_print_options(max_lines=None)\n    assert tskit._print_options == {\"max_lines\": None}\n    util.set_print_options(max_lines=40)\n    assert tskit._print_options == {\"max_lines\": 40}\n    with pytest.raises(TypeError):\n        util.set_print_options(40)\n\n\nclass TestRandomNuceotides:\n    @pytest.mark.parametrize(\"length\", [0, 1, 10, 10.0, np.array([10])[0]])\n    def test_length(self, length):\n        s = tskit.random_nucleotides(length, seed=42)\n        assert len(s) == length\n        assert isinstance(s, str)\n\n    def test_default_alphabet(self):\n        s = tskit.random_nucleotides(100, seed=42)\n        assert \"\".join(sorted(set(s))) == \"ACGT\"\n\n    def test_length_keyword(self):\n        s1 = tskit.random_nucleotides(length=10, seed=42)\n        s2 = tskit.random_nucleotides(length=10, seed=42)\n        assert s1 == s2\n\n    def test_length_required(self):\n        with pytest.raises(TypeError, match=\"required positional\"):\n            tskit.random_nucleotides()\n\n    def test_seed_keyword_only(self):\n        with pytest.raises(TypeError, match=\"1 positional\"):\n            tskit.random_nucleotides(10, 42)\n\n    @pytest.mark.parametrize(\"seed\", [1, 2, 3])\n    def test_seed_equality(self, seed):\n        s1 = tskit.random_nucleotides(10, seed=seed)\n        s2 = tskit.random_nucleotides(10, seed=seed)\n        assert s1 == s2\n\n    def test_different_seed_not_equal(self):\n        s1 = tskit.random_nucleotides(20, seed=1)\n        s2 = tskit.random_nucleotides(20, seed=2)\n        assert s1 != s2\n\n    def test_no_seed_different_values(self):\n        s1 = tskit.random_nucleotides(20)\n        s2 = tskit.random_nucleotides(20)\n        assert s1 != s2\n\n    @pytest.mark.parametrize(\"length\", [\"0\", 0.1, np.array([1.1])[0]])\n    def test_length_bad_value(self, length):\n        with pytest.raises(ValueError, match=\"must be an integer\"):\n            tskit.random_nucleotides(length)\n\n    @pytest.mark.parametrize(\"length\", [{}, None])\n    def test_length_bad_type(self, length):\n        with pytest.raises(TypeError, match=\"argument must be a string\"):\n            tskit.random_nucleotides(length)\n"
  },
  {
    "path": "python/tests/test_utilities.py",
    "content": "# MIT License\n#\n# Copyright (c) 2019-2022 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTests for the various testing utilities.\n\"\"\"\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests.tsutil as tsutil\nimport tskit\n\n\nclass TestJukesCantor:\n    \"\"\"\n    Check that the we get useable tree sequences.\n    \"\"\"\n\n    def verify(self, ts):\n        tables = ts.dump_tables()\n        tables.compute_mutation_parents()\n        tables.assert_equals(ts.tables)\n        # This will catch inconsistent mutations.\n        assert ts.genotype_matrix() is not None\n\n    def test_n10_multiroot(self):\n        ts = msprime.simulate(10, random_seed=1)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        ts = tsutil.jukes_cantor(ts, 1, 2, seed=7)\n        self.verify(ts)\n\n    def test_n50_multiroot(self):\n        ts = msprime.simulate(50, random_seed=1)\n        ts = ts.decapitate(np.max(ts.tables.nodes.time) / 2)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)\n        self.verify(ts)\n\n    def test_silent_mutations(self):\n        ts = msprime.simulate(50, random_seed=1)\n        ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)\n        num_silent = 0\n        for m in ts.mutations():\n            if m.parent != -1 and ts.mutation(m.parent).derived_state == m.derived_state:\n                num_silent += 1\n        assert num_silent > 20\n\n\nclass TestCaterpillarTree:\n    \"\"\"\n    Tests for the caterpillar tree method.\n    \"\"\"\n\n    def verify(self, ts, n):\n        assert ts.num_trees == 1\n        assert ts.num_nodes == ts.num_samples * 2 - 1\n        tree = ts.first()\n        for j in range(1, n):\n            assert tree.parent(j) == n + j - 1\n        # This will catch inconsistent mutations.\n        assert ts.genotype_matrix() is not None\n\n    def test_n_2(self):\n        ts = tsutil.caterpillar_tree(2)\n        self.verify(ts, 2)\n\n    def test_n_3(self):\n        ts = tsutil.caterpillar_tree(3)\n        self.verify(ts, 3)\n\n    def test_n_50(self):\n        ts = tsutil.caterpillar_tree(50)\n        self.verify(ts, 50)\n\n    def test_n_5_sites(self):\n        ts = tsutil.caterpillar_tree(5, num_sites=4)\n        self.verify(ts, 5)\n        assert ts.num_sites == 4\n        assert ts.num_mutations == 4\n        assert list(ts.tables.sites.position) == [0.2, 0.4, 0.6, 0.8]\n        ts = tsutil.caterpillar_tree(5, num_sites=1, num_mutations=1)\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 1\n        site = ts.site(0)\n        assert site.mutations[0].node == 7\n\n    def test_n_5_mutations(self):\n        ts = tsutil.caterpillar_tree(5, num_sites=1, num_mutations=3)\n        self.verify(ts, 5)\n        assert ts.num_sites == 1\n        assert ts.num_mutations == 3\n        node = ts.tables.mutations.node\n        assert list(node) == [7, 6, 5]\n\n    def test_n_many_mutations(self):\n        for n in range(10, 15):\n            for num_mutations in range(0, n - 1):\n                ts = tsutil.caterpillar_tree(n, num_sites=1, num_mutations=num_mutations)\n                self.verify(ts, n)\n                assert ts.num_sites == 1\n                assert ts.num_mutations == num_mutations\n            for num_mutations in range(n - 1, n + 2):\n                with pytest.raises(ValueError):\n                    tsutil.caterpillar_tree(n, num_sites=1, num_mutations=num_mutations)\n\n\nclass TestInsertIndividuals:\n    \"\"\"\n    Test that we insert individuals correctly.\n    \"\"\"\n\n    def test_ploidy_1(self):\n        ts = msprime.simulate(10, random_seed=1)\n        assert ts.num_individuals == 0\n        ts = tsutil.insert_individuals(ts, ploidy=1)\n        assert ts.num_individuals == 10\n        for j, ind in enumerate(ts.individuals()):\n            assert list(ind.nodes) == [j]\n\n    def test_ploidy_2(self):\n        ts = msprime.simulate(10, random_seed=1)\n        assert ts.num_individuals == 0\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        assert ts.num_individuals == 5\n        for j, ind in enumerate(ts.individuals()):\n            assert list(ind.nodes) == [2 * j, 2 * j + 1]\n\n    def test_ploidy_2_reversed(self):\n        ts = msprime.simulate(10, random_seed=1)\n        assert ts.num_individuals == 0\n        samples = ts.samples()[::-1]\n        ts = tsutil.insert_individuals(ts, nodes=samples, ploidy=2)\n        assert ts.num_individuals == 5\n        for j, ind in enumerate(ts.individuals()):\n            assert list(ind.nodes) == [samples[2 * j + 1], samples[2 * j]]\n\n\nclass TestSortIndividuals:\n    def test_sort_individuals(self):\n        tables = tskit.TableCollection()\n        tables.individuals.add_row(parents=[1], metadata=b\"0\")\n        tables.individuals.add_row(parents=[-1], metadata=b\"1\")\n        tsutil.sort_individual_table(tables)\n        assert tables.individuals.metadata.tobytes() == b\"10\"\n\n        tables = tskit.TableCollection()\n        tables.individuals.add_row(parents=[2, 3], metadata=b\"0\")\n        tables.individuals.add_row(parents=[5], metadata=b\"1\")\n        tables.individuals.add_row(parents=[-1], metadata=b\"2\")\n        tables.individuals.add_row(parents=[-1], metadata=b\"3\")\n        tables.individuals.add_row(parents=[3], metadata=b\"4\")\n        tables.individuals.add_row(parents=[4], metadata=b\"5\")\n\n        tsutil.sort_individual_table(tables)\n        assert tables.individuals.metadata.tobytes() == b\"342501\"\n\n        tables = tskit.TableCollection()\n        tables.individuals.add_row(parents=[1], metadata=b\"0\")\n        tables.individuals.add_row(parents=[0], metadata=b\"1\")\n        with pytest.raises(ValueError, match=\"Individual pedigree has cycles\"):\n            tsutil.sort_individual_table(tables)\n\n\nclass TestQuintuplyLinkedTrees:\n    def test_branch_operations_num_children(self):\n        qlt = tsutil.QuintuplyLinkedTree(3)\n        assert np.sum(qlt.num_children) == 0\n        qlt.insert_branch(2, 0)\n        assert qlt.num_children[2] == 1\n        assert np.sum(qlt.num_children) == 1\n\n        qlt.remove_branch(2, 0)\n        assert qlt.num_children[2] == 0\n\n    def test_edge_operations(self):\n        tt = tskit.Tree.generate_balanced(3)\n        tts = tt.tree_sequence\n\n        for _, qlt in tsutil.algorithm_R(tts):\n            assert np.sum(qlt.edge != -1) == tt.num_edges\n            self.verify_tree_edges(qlt, tts)\n\n    def verify_tree_edges(self, quintuply_linked_tree, tts):\n        for edge in tts.edges():\n            assert quintuply_linked_tree.edge[edge.child] == edge.id\n            assert quintuply_linked_tree.parent[edge.child] == edge.parent\n"
  },
  {
    "path": "python/tests/test_vcf.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (c) 2016 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest cases for VCF output in tskit.\n\"\"\"\n\nimport contextlib\nimport io\nimport math\nimport os\nimport tempfile\nimport textwrap\nimport warnings\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tests\nimport tests.test_wright_fisher as wf\nimport tskit\nfrom tests import tsutil\n\n# Pysam is not available on windows, so we don't make it mandatory here.\n_pysam_imported = False\ntry:\n    import pysam\n\n    _pysam_imported = True\nexcept ImportError:\n    pass\n\n\n@contextlib.contextmanager\ndef ts_to_pysam(ts, *args, **kwargs):\n    \"\"\"\n    Returns a pysam VariantFile for the specified tree sequence and arguments.\n    \"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        vcf_path = os.path.join(temp_dir, \"file.vcf\")\n        with open(vcf_path, \"w\") as f:\n            ts.write_vcf(f, *args, **kwargs, allow_position_zero=True)\n        yield pysam.VariantFile(vcf_path)\n\n\ndef example_individuals(ts, ploidy=1):\n    if ts.num_individuals == 0:\n        yield None, ts.num_samples / ploidy\n    else:\n        yield None, ts.num_individuals\n        yield list(range(ts.num_individuals)), ts.num_individuals\n    if ts.num_individuals > 3:\n        n = ts.num_individuals - 2\n        yield list(range(n)), n\n        yield 2 + np.random.choice(np.arange(n), n, replace=False), n\n\n\ndef legacy_write_vcf(tree_sequence, output, ploidy, contig_id):\n    \"\"\"\n    Writes a VCF under the legacy conversion rules used in versions before 0.2.0.\n    \"\"\"\n    if tree_sequence.get_sample_size() % ploidy != 0:\n        raise ValueError(\"Sample size must a multiple of ploidy\")\n    n = tree_sequence.get_sample_size() // ploidy\n    sample_names = [f\"msp_{j}\" for j in range(n)]\n    last_pos = 0\n    positions = []\n    for variant in tree_sequence.variants():\n        pos = int(round(variant.position))\n        if pos <= last_pos:\n            pos = last_pos + 1\n        positions.append(pos)\n        last_pos = pos\n    contig_length = int(math.ceil(tree_sequence.get_sequence_length()))\n    if len(positions) > 0:\n        contig_length = max(positions[-1], contig_length)\n    print(\"##fileformat=VCFv4.2\", file=output)\n    print(f\"##source=tskit {tskit.__version__}\", file=output)\n    print('##FILTER=<ID=PASS,Description=\"All filters passed\">', file=output)\n    print(f\"##contig=<ID={contig_id},length={contig_length}>\", file=output)\n    print('##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">', file=output)\n    print(\n        \"#CHROM\",\n        \"POS\",\n        \"ID\",\n        \"REF\",\n        \"ALT\",\n        \"QUAL\",\n        \"FILTER\",\n        \"INFO\",\n        \"FORMAT\",\n        sep=\"\\t\",\n        end=\"\",\n        file=output,\n    )\n    for sample_name in sample_names:\n        print(\"\\t\", sample_name, sep=\"\", end=\"\", file=output)\n    print(file=output)\n    for variant in tree_sequence.variants():\n        pos = positions[variant.index]\n        site_id = variant.site.id\n        assert variant.num_alleles == 2\n        print(\n            contig_id,\n            pos,\n            site_id,\n            variant.alleles[0],\n            variant.alleles[1],\n            \".\",\n            \"PASS\",\n            \".\",\n            \"GT\",\n            sep=\"\\t\",\n            end=\"\",\n            file=output,\n        )\n        for j in range(n):\n            genotype = \"|\".join(\n                str(g) for g in variant.genotypes[j * ploidy : j * ploidy + ploidy]\n            )\n            print(\"\\t\", genotype, end=\"\", sep=\"\", file=output)\n        print(file=output)\n\n\nclass TestLegacyOutput:\n    \"\"\"\n    Tests if the VCF file produced by the low level code is the\n    same as one we generate here.\n    \"\"\"\n\n    def verify(self, ts, ploidy=1, contig_id=\"1\"):\n        assert ts.num_sites > 0\n        f = io.StringIO()\n        legacy_write_vcf(ts, f, ploidy=ploidy, contig_id=contig_id)\n        vcf1 = f.getvalue()\n\n        num_individuals = ts.num_samples // ploidy\n        individual_names = [f\"msp_{j}\" for j in range(num_individuals)]\n        f = io.StringIO()\n        ts.write_vcf(\n            f,\n            ploidy=ploidy,\n            contig_id=contig_id,\n            position_transform=\"legacy\",\n            individual_names=individual_names,\n        )\n        vcf2 = f.getvalue()\n        assert vcf1 == vcf2\n\n    def test_msprime_length_1(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=666)\n        self.verify(ts, ploidy=1)\n        self.verify(ts, ploidy=2)\n        self.verify(ts, ploidy=5)\n\n    def test_msprime_length_10(self):\n        ts = msprime.simulate(9, length=10, mutation_rate=0.1, random_seed=666)\n        self.verify(ts, ploidy=1)\n        self.verify(ts, ploidy=3)\n\n    def test_contig_id(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=666)\n        self.verify(ts, ploidy=1, contig_id=\"X\")\n        self.verify(ts, ploidy=2, contig_id=\"X\" * 10)\n\n\nclass ExamplesMixin:\n    \"\"\"\n    Mixin defining tests on various example tree sequences.\n    \"\"\"\n\n    def test_simple_infinite_sites_random_ploidy(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=2)\n        ts = tsutil.insert_random_ploidy_individuals(ts, min_ploidy=1, samples_only=True)\n        assert ts.num_sites > 2\n        self.verify(ts)\n\n    def test_simple_infinite_sites_ploidy_2(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=2)\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        assert ts.num_sites > 2\n        self.verify(ts)\n\n    def test_simple_infinite_sites_ploidy_2_reversed_samples(self):\n        ts = msprime.simulate(10, mutation_rate=1, random_seed=2)\n        samples = ts.samples()[::-1]\n        ts = tsutil.insert_individuals(ts, nodes=samples, ploidy=2)\n        assert ts.num_sites > 2\n        self.verify(ts)\n\n    def test_simple_jukes_cantor_random_ploidy(self):\n        ts = msprime.simulate(10, random_seed=2)\n        ts = tsutil.jukes_cantor(ts, num_sites=10, mu=1, seed=2)\n        ts = tsutil.insert_random_ploidy_individuals(ts, min_ploidy=1, samples_only=True)\n        self.verify(ts)\n\n    def test_single_tree_multichar_mutations(self):\n        ts = msprime.simulate(6, random_seed=1, mutation_rate=1)\n        ts = tsutil.insert_multichar_mutations(ts)\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        self.verify(ts)\n\n    def test_many_trees_infinite_sites(self):\n        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=2, random_seed=1)\n        assert ts.num_sites > 0\n        assert ts.num_trees > 2\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        self.verify(ts)\n\n    def test_many_trees_sequence_length_infinite_sites(self):\n        for L in [0.5, 1.5, 3.3333]:\n            ts = msprime.simulate(\n                6, length=L, recombination_rate=2, mutation_rate=1, random_seed=1\n            )\n            assert ts.num_sites > 0\n            ts = tsutil.insert_individuals(ts, ploidy=2)\n            self.verify(ts)\n\n    def test_wright_fisher_unsimplified(self):\n        tables = wf.wf_sim(\n            4,\n            5,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=10,\n        )\n        tables.sort()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.05, random_seed=234)\n        assert ts.num_sites > 0\n        ts = tsutil.insert_individuals(ts, ploidy=4)\n        self.verify(ts)\n\n    def test_wright_fisher_initial_generation(self):\n        tables = wf.wf_sim(\n            6, 5, seed=3, deep_history=True, initial_generation_samples=True, num_loci=2\n        )\n        tables.sort()\n        tables.simplify()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.08, random_seed=2)\n        assert ts.num_sites > 0\n        ts = tsutil.insert_individuals(ts, ploidy=3)\n        self.verify(ts)\n\n    def test_wright_fisher_unsimplified_multiple_roots(self):\n        tables = wf.wf_sim(\n            8,\n            15,\n            seed=1,\n            deep_history=False,\n            initial_generation_samples=False,\n            num_loci=20,\n        )\n        tables.sort()\n        ts = msprime.mutate(tables.tree_sequence(), rate=0.006, random_seed=2)\n        assert ts.num_sites > 0\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        self.verify(ts)\n\n    def test_wright_fisher_simplified(self):\n        tables = wf.wf_sim(\n            9,\n            10,\n            seed=1,\n            deep_history=True,\n            initial_generation_samples=False,\n            num_loci=5,\n        )\n        tables.sort()\n        ts = tables.tree_sequence().simplify()\n        ts = msprime.mutate(ts, rate=0.2, random_seed=1234)\n        assert ts.num_sites > 0\n        ts = tsutil.insert_individuals(ts, ploidy=3)\n        self.verify(ts)\n\n\n@pytest.mark.skipif(not _pysam_imported, reason=\"pysam not available\")\nclass TestParseHeaderPysam(ExamplesMixin):\n    \"\"\"\n    Test that pysam can parse the headers correctly.\n    \"\"\"\n\n    def verify(self, ts):\n        contig_id = \"pysam\"\n        for indivs, num_indivs in example_individuals(ts):\n            with ts_to_pysam(ts, contig_id=contig_id, individuals=indivs) as bcf_file:\n                assert bcf_file.format == \"VCF\"\n                assert bcf_file.version == (4, 2)\n                header = bcf_file.header\n                assert len(header.contigs) == 1\n                contig = header.contigs[0]\n                assert contig.name == contig_id\n                assert contig.length > 0\n                assert len(header.filters) == 1\n                p = header.filters[\"PASS\"]\n                assert p.name == \"PASS\"\n                assert p.description == \"All filters passed\"\n                assert len(header.info) == 0\n                assert len(header.formats) == 1\n                fmt = header.formats[\"GT\"]\n                assert fmt.name == \"GT\"\n                assert fmt.number == 1\n                assert fmt.type == \"String\"\n                assert fmt.description == \"Genotype\"\n                assert len(bcf_file.header.samples) == num_indivs\n\n\nclass TestInterface:\n    \"\"\"\n    Tests for the interface.\n    \"\"\"\n\n    def test_bad_ploidy(self):\n        ts = msprime.simulate(10, mutation_rate=0.1, random_seed=2)\n        for bad_ploidy in [-1, 0]:\n            with pytest.raises(ValueError, match=\"Ploidy must be a positive integer\"):\n                ts.write_vcf(io.StringIO, bad_ploidy)\n        # Non divisible\n        for bad_ploidy in [3, 7]:\n            with pytest.raises(\n                ValueError,\n                match=\"Number of sample nodes 10 is not a multiple of ploidy\",\n            ):\n                ts.write_vcf(io.StringIO, bad_ploidy)\n\n    def test_individuals_no_nodes_default_args(self):\n        ts1 = msprime.simulate(10, mutation_rate=0.1, random_seed=2)\n        tables = ts1.dump_tables()\n        tables.individuals.add_row()\n        ts2 = tables.tree_sequence()\n        # ts1 should work as it has no individuals\n        ts1.as_vcf(allow_position_zero=True)\n        # ts2 should fail as it has individuals but no nodes\n        with warnings.catch_warnings(record=True) as w:\n            with pytest.raises(ValueError, match=\"No samples in resulting VCF model\"):\n                ts2.as_vcf(allow_position_zero=True)\n            assert len(w) == 1\n            assert \"At least one sample node does not have an individual ID\" in str(\n                w[0].message\n            )\n\n    def test_individuals_no_nodes_as_argument(self):\n        ts1 = msprime.simulate(10, mutation_rate=0.1, random_seed=2)\n        tables = ts1.dump_tables()\n        tables.individuals.add_row()\n        ts2 = tables.tree_sequence()\n        with warnings.catch_warnings(record=True) as w:\n            with pytest.raises(ValueError, match=\"No samples in resulting VCF model\"):\n                ts2.as_vcf(individuals=[0])\n            assert len(w) == 1\n            assert \"At least one sample node does not have an individual ID\" in str(\n                w[0].message\n            )\n\n    def test_ploidy_with_sample_individuals(self):\n        ts = msprime.sim_ancestry(3, random_seed=2)\n        ts = tsutil.insert_branch_sites(ts)\n        with pytest.raises(ValueError, match=\"Cannot specify ploidy when individuals\"):\n            ts.write_vcf(io.StringIO(), ploidy=2)\n\n    def test_ploidy_with_no_node_individuals(self):\n        ts1 = msprime.simulate(10, mutation_rate=0.1, random_seed=2)\n        tables = ts1.dump_tables()\n        tables.individuals.add_row()\n        ts2 = tables.tree_sequence()\n        with pytest.raises(ValueError, match=\"Cannot specify ploidy when individuals\"):\n            ts2.as_vcf(ploidy=2)\n\n    def test_empty_individuals(self):\n        ts = msprime.sim_ancestry(3, random_seed=2)\n        ts = tsutil.insert_branch_sites(ts)\n        with pytest.raises(ValueError, match=\"No individuals specified\"):\n            ts.as_vcf(individuals=[])\n\n    def test_duplicate_individuals(self):\n        ts = msprime.sim_ancestry(3, random_seed=2)\n        ts = tsutil.insert_branch_sites(ts)\n        with pytest.raises(tskit.LibraryError, match=\"TSK_ERR_DUPLICATE_SAMPLE\"):\n            ts.as_vcf(individuals=[0, 0], allow_position_zero=True)\n\n    def test_samples_with_and_without_individuals(self):\n        ts = tskit.Tree.generate_balanced(3).tree_sequence\n        tables = ts.dump_tables()\n        tables.individuals.add_row()\n        # Add a reference to an individual from one sample\n        individual = tables.nodes.individual\n        individual[0] = 0\n        tables.nodes.individual = individual\n        ts = tables.tree_sequence()\n        ts = tsutil.insert_branch_sites(ts)\n        with warnings.catch_warnings(record=True) as w:\n            ts.as_vcf(allow_position_zero=True)\n            assert len(w) == 1\n            assert \"At least one sample node does not have an individual ID\" in str(\n                w[0].message\n            )\n\n    def test_bad_individuals(self):\n        ts = msprime.simulate(10, mutation_rate=0.1, random_seed=2)\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        with pytest.raises(ValueError, match=\"Invalid individual ID\"):\n            ts.write_vcf(io.StringIO(), individuals=[0, -1])\n        with pytest.raises(ValueError, match=\"Invalid individual ID\"):\n            ts.write_vcf(io.StringIO(), individuals=[1, 2, ts.num_individuals])\n\n    def test_ploidy_positional(self):\n        ts = msprime.simulate(2, mutation_rate=2, random_seed=1)\n        assert ts.as_vcf(2, allow_position_zero=True) == ts.as_vcf(\n            ploidy=2, allow_position_zero=True\n        )\n\n    def test_only_ploidy_positional(self):\n        ts = msprime.simulate(2, mutation_rate=2, random_seed=1)\n        with pytest.raises(TypeError, match=\"positional arguments\"):\n            assert ts.as_vcf(2, \"chr2\")\n\n\nclass TestLimitations:\n    \"\"\"\n    Verify the correct error behaviour in cases we don't support.\n    \"\"\"\n\n    def test_many_alleles(self):\n        ts = msprime.simulate(20, random_seed=45)\n        tables = ts.dump_tables()\n        tables.sites.add_row(0.5, \"0\")\n        # 9 alleles should be fine\n        for j in range(8):\n            tables.mutations.add_row(0, node=j, derived_state=str(j + 1))\n        ts = tables.tree_sequence()\n        ts.write_vcf(io.StringIO(), allow_position_zero=True)\n        for j in range(9, 15):\n            tables.mutations.add_row(0, node=j, derived_state=str(j))\n            ts = tables.tree_sequence()\n            with pytest.raises(\n                ValueError, match=\"More than 9 alleles not currently supported\"\n            ):\n                ts.write_vcf(io.StringIO(), allow_position_zero=True)\n\n\nclass TestPositionTransformErrors:\n    \"\"\"\n    Tests what happens when we provide bad position transforms\n    \"\"\"\n\n    def get_example_ts(self):\n        ts = msprime.simulate(11, mutation_rate=1, random_seed=11)\n        assert ts.num_sites > 1\n        return ts\n\n    def test_wrong_output_dimensions(self):\n        ts = self.get_example_ts()\n        for bad_func in [np.sum, lambda x: []]:\n            with pytest.raises(ValueError):\n                ts.write_vcf(io.StringIO(), position_transform=bad_func)\n\n    def test_bad_func(self):\n        ts = self.get_example_ts()\n        for bad_func in [\"\", Exception]:\n            with pytest.raises(TypeError):\n                ts.write_vcf(io.StringIO(), position_transform=bad_func)\n\n\nclass TestZeroPositionErrors:\n    \"\"\"\n    Tests for handling zero position sites\n    \"\"\"\n\n    def test_zero_position_error(self):\n        ts = msprime.sim_ancestry(3, random_seed=2, sequence_length=10)\n        ts = msprime.sim_mutations(ts, rate=1, random_seed=2)\n        assert ts.sites_position[0] == 0\n\n        with pytest.raises(ValueError, match=\"A variant position of 0\"):\n            ts.write_vcf(io.StringIO())\n\n        # Should succeed if we allow it, or the site is masked or transformed\n        ts.write_vcf(io.StringIO(), allow_position_zero=True)\n        ts.write_vcf(io.StringIO(), position_transform=lambda pos: [x + 1 for x in pos])\n        mask = np.zeros(ts.num_sites, dtype=bool)\n        mask[0] = True\n        ts.write_vcf(io.StringIO(), site_mask=mask)\n\n    def test_no_position_zero_ok(self):\n        ts = msprime.sim_ancestry(3, random_seed=2, sequence_length=10)\n        ts = msprime.sim_mutations(ts, rate=0.25, random_seed=4)\n        assert ts.num_sites > 0\n        assert ts.sites_position[0] != 0\n        ts.write_vcf(io.StringIO(), allow_position_zero=True)\n        ts.write_vcf(io.StringIO())\n\n\nclass TestIndividualNames:\n    \"\"\"\n    Tests for the individual names argument.\n    \"\"\"\n\n    def test_bad_length_individuals(self):\n        ts = msprime.simulate(6, mutation_rate=2, random_seed=1)\n        assert ts.num_sites > 0\n        ts = tsutil.insert_individuals(ts, ploidy=2)\n        with pytest.raises(\n            ValueError,\n            match=\"The number of individuals does not match the number of names\",\n        ):\n            ts.write_vcf(io.StringIO(), individual_names=[])\n        with pytest.raises(\n            ValueError,\n            match=\"The number of individuals does not match the number of names\",\n        ):\n            ts.write_vcf(io.StringIO(), individual_names=[\"x\" for _ in range(4)])\n        with pytest.raises(\n            ValueError,\n            match=\"The number of individuals does not match the number of names\",\n        ):\n            ts.write_vcf(\n                io.StringIO(),\n                individuals=list(range(ts.num_individuals)),\n                individual_names=[\"x\" for _ in range(ts.num_individuals - 1)],\n            )\n        with pytest.raises(\n            ValueError,\n            match=\"The number of individuals does not match the number of names\",\n        ):\n            ts.write_vcf(\n                io.StringIO(),\n                individuals=list(range(ts.num_individuals - 1)),\n                individual_names=[\"x\" for _ in range(ts.num_individuals)],\n            )\n\n    def test_bad_length_ploidy(self):\n        ts = msprime.simulate(6, mutation_rate=2, random_seed=1)\n        assert ts.num_sites > 0\n        with pytest.raises(\n            ValueError,\n            match=\"The number of individuals does not match the number of names\",\n        ):\n            ts.write_vcf(io.StringIO(), ploidy=2, individual_names=[])\n        with pytest.raises(\n            ValueError,\n            match=\"The number of individuals does not match the number of names\",\n        ):\n            ts.write_vcf(\n                io.StringIO(), ploidy=2, individual_names=[\"x\" for _ in range(4)]\n            )\n\n    def test_bad_type(self):\n        ts = msprime.simulate(2, mutation_rate=2, random_seed=1)\n        with pytest.raises(\n            TypeError, match=\"sequence item 0: expected str instance, NoneType found\"\n        ):\n            ts.write_vcf(\n                io.StringIO(), individual_names=[None, \"b\"], allow_position_zero=True\n            )\n        with pytest.raises(\n            TypeError, match=\"sequence item 0: expected str instance, bytes found\"\n        ):\n            ts.write_vcf(\n                io.StringIO(), individual_names=[b\"a\", \"b\"], allow_position_zero=True\n            )\n\n\ndef drop_header(s):\n    return \"\\n\".join(line for line in s.splitlines() if not line.startswith(\"##\"))\n\n\nclass TestMasking:\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        return ts\n\n    @pytest.mark.parametrize(\"mask\", [[True], np.zeros(5, dtype=bool), []])\n    def test_site_mask_wrong_size(self, mask):\n        with pytest.raises(ValueError, match=\"Site mask must be\"):\n            self.ts().as_vcf(site_mask=mask)\n\n    @pytest.mark.parametrize(\"mask\", [[[0, 1], [1, 0]], \"abcd\"])\n    def test_site_mask_bad_type(self, mask):\n        # converting to a bool array is pretty lax in what's allows.\n        with pytest.raises(ValueError, match=\"Site mask must be\"):\n            self.ts().as_vcf(site_mask=mask)\n\n    @pytest.mark.parametrize(\"mask\", [[[0, 1], [1, 0]], \"abcd\"])\n    def test_sample_mask_bad_type(self, mask):\n        # converting to a bool array is pretty lax in what's allows.\n        with pytest.raises(ValueError, match=\"Sample mask must be\"):\n            self.ts().as_vcf(sample_mask=mask, allow_position_zero=True)\n\n    def test_no_masks(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        assert drop_header(self.ts().as_vcf(allow_position_zero=True)) == expected\n\n    def test_no_masks_triploid(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|0|0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(self.ts().as_vcf(ploidy=3, allow_position_zero=True)) == expected\n        )\n\n    def test_site_0_masked(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(\n            site_mask=[True, False, False, False], allow_position_zero=True\n        )\n        assert drop_header(actual) == expected\n\n    def test_site_0_masked_triploid(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0|1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(\n            ploidy=3, site_mask=[True, False, False, False], allow_position_zero=True\n        )\n        assert drop_header(actual) == expected\n\n    def test_site_1_masked(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t0\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(\n            site_mask=[False, True, False, False], allow_position_zero=True\n        )\n        assert drop_header(actual) == expected\n\n    def test_all_sites_masked(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(\n            site_mask=[True, True, True, True], allow_position_zero=True\n        )\n        assert drop_header(actual) == expected\n\n    def test_all_sites_not_masked(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(\n            site_mask=[False, False, False, False], allow_position_zero=True\n        )\n        assert drop_header(actual) == expected\n\n    @pytest.mark.parametrize(\n        \"mask\",\n        [[False, False, False], [0, 0, 0], lambda _: [False, False, False]],\n    )\n    def test_all_samples_not_masked(self, mask):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(sample_mask=mask, allow_position_zero=True)\n        assert drop_header(actual) == expected\n\n    @pytest.mark.parametrize(\n        \"mask\", [[True, False, False], [1, 0, 0], lambda _: [True, False, False]]\n    )\n    def test_sample_0_masked(self, mask):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(sample_mask=mask, allow_position_zero=True)\n        assert drop_header(actual) == expected\n\n    @pytest.mark.parametrize(\n        \"mask\", [[False, True, False], [0, 1, 0], lambda _: [False, True, False]]\n    )\n    def test_sample_1_masked(self, mask):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t.\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t.\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t.\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t.\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(sample_mask=mask, allow_position_zero=True)\n        assert drop_header(actual) == expected\n\n    @pytest.mark.parametrize(\n        \"mask\", [[True, True, True], [1, 1, 1], lambda _: [True, True, True]]\n    )\n    def test_all_samples_masked(self, mask):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t.\\t.\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t.\\t.\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t.\\t.\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t.\\t.\"\"\"\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(sample_mask=mask, allow_position_zero=True)\n        assert drop_header(actual) == expected\n\n    def test_all_functional_sample_mask(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t.\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t.\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t.\\t0\\t1\"\"\"\n\n        def mask(variant):\n            a = [0, 0, 0]\n            a[variant.site.id % 3] = 1\n            return a\n\n        expected = textwrap.dedent(s)\n        actual = self.ts().as_vcf(sample_mask=mask, allow_position_zero=True)\n        assert drop_header(actual) == expected\n\n    @pytest.mark.skipif(not _pysam_imported, reason=\"pysam not available\")\n    def test_mask_ok_with_pysam(self):\n        with ts_to_pysam(self.ts(), sample_mask=[0, 0, 1]) as records:\n            variants = list(records)\n            assert len(variants) == 4\n            samples = [\"tsk_0\", \"tsk_1\", \"tsk_2\"]\n            gts = [variants[0].samples[key][\"GT\"] for key in samples]\n            assert gts == [(1,), (0,), (None,)]\n\n            gts = [variants[1].samples[key][\"GT\"] for key in samples]\n            assert gts == [(0,), (1,), (None,)]\n\n            gts = [variants[2].samples[key][\"GT\"] for key in samples]\n            assert gts == [(0,), (1,), (None,)]\n\n            gts = [variants[3].samples[key][\"GT\"] for key in samples]\n            assert gts == [(0,), (0,), (None,)]\n\n\nclass TestMissingData:\n    @tests.cached_example\n    def ts(self):\n        tables = tskit.Tree.generate_balanced(2, span=10).tree_sequence.dump_tables()\n        tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)\n        ts = tsutil.insert_branch_sites(tables.tree_sequence())\n        return ts\n\n    def test_defaults(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t.\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t.\"\"\"\n        expected = textwrap.dedent(s)\n        assert drop_header(self.ts().as_vcf(allow_position_zero=True)) == expected\n\n    def test_isolated_as_missing_true(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t.\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t.\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(\n                self.ts().as_vcf(isolated_as_missing=True, allow_position_zero=True)\n            )\n            == expected\n        )\n\n    def test_isolated_as_missing_false(self):\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(\n                self.ts().as_vcf(isolated_as_missing=False, allow_position_zero=True)\n            )\n            == expected\n        )\n\n    @pytest.mark.skipif(not _pysam_imported, reason=\"pysam not available\")\n    def test_ok_with_pysam(self):\n        with ts_to_pysam(self.ts(), sample_mask=[0, 0, 1]) as records:\n            variants = list(records)\n            assert len(variants) == 2\n            samples = [\"tsk_0\", \"tsk_1\", \"tsk_2\"]\n            gts = [variants[0].samples[key][\"GT\"] for key in samples]\n            assert gts == [(1,), (0,), (None,)]\n\n            gts = [variants[1].samples[key][\"GT\"] for key in samples]\n            assert gts == [(0,), (1,), (None,)]\n\n\ndef drop_individuals(ts):\n    tables = ts.dump_tables()\n    individual = tables.nodes.individual\n    individual[:] = -1\n    tables.individuals.clear()\n    tables.nodes.individual = individual\n    return tables.tree_sequence()\n\n\nclass TestSampleOptions:\n    @tests.cached_example\n    def ts(self):\n        ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        tables = ts.dump_tables()\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        individual = tables.nodes.individual\n        # One diploid and one haploid, not in adjacent individuals\n        individual[0] = 0\n        individual[1] = 1\n        individual[2] = 0\n        tables.nodes.individual = individual\n        return tables.tree_sequence()\n\n    def test_no_individuals_defaults(self):\n        ts = drop_individuals(self.ts())\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\\ttsk_2\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1\\t0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0\\t1\"\"\"\n        expected = textwrap.dedent(s)\n        assert drop_header(ts.as_vcf(allow_position_zero=True)) == expected\n\n    def test_no_individuals_ploidy_3(self):\n        ts = drop_individuals(self.ts())\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|0|0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert drop_header(ts.as_vcf(ploidy=3, allow_position_zero=True)) == expected\n\n    def test_no_individuals_ploidy_3_names(self):\n        ts = drop_individuals(self.ts())\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\tA\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|0|0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(\n                ts.as_vcf(ploidy=3, individual_names=[\"A\"], allow_position_zero=True)\n            )\n            == expected\n        )\n\n    def test_defaults(self):\n        ts = self.ts()\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\\ttsk_1\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|0\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\\t1\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1\\t0\"\"\"\n        expected = textwrap.dedent(s)\n        assert drop_header(ts.as_vcf(allow_position_zero=True)) == expected\n\n    def test_individual_0(self):\n        ts = self.ts()\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_0\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(ts.as_vcf(individuals=[0], allow_position_zero=True)) == expected\n        )\n\n    def test_individual_1(self):\n        ts = self.ts()\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_1\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(ts.as_vcf(individuals=[1], allow_position_zero=True)) == expected\n        )\n\n    def test_reversed(self):\n        ts = self.ts()\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\ttsk_1\\ttsk_0\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1|0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(ts.as_vcf(individuals=[1, 0], allow_position_zero=True))\n            == expected\n        )\n\n    def test_reversed_names(self):\n        ts = self.ts()\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\tA\\tB\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1|0\n        1\\t2\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0|1\n        1\\t4\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0|0\n        1\\t6\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(\n                ts.as_vcf(\n                    individuals=[1, 0],\n                    individual_names=[\"A\", \"B\"],\n                    allow_position_zero=True,\n                ),\n            )\n            == expected\n        )\n\n\nclass TestVcfMapping:\n    def test_mix_sample_non_sample(self):\n        ts = tskit.Tree.generate_balanced(5, span=10).tree_sequence\n        ts = tsutil.insert_branch_sites(ts)\n        assert ts.num_nodes >= 8\n        tables = ts.dump_tables()\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        tables.individuals.add_row()\n        individual = tables.nodes.individual\n        assert np.all(individual == -1)\n        # First has only non-sample nodes\n        individual[7] = 0\n        # Second has 2 sample nodes\n        individual[0] = 1\n        individual[1] = 1\n        # Third has 1 non-sample and 1 sample\n        individual[5] = 2\n        individual[2] = 2\n        # Fourth has sandwiched non-sample\n        individual[3] = 3\n        individual[6] = 3\n        individual[4] = 3\n        tables.nodes.individual = individual\n        ts = tables.tree_sequence()\n\n        # Individual \"A\" is redacted as has no nodes\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\tB\\tC\\tD\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|1\\t0\\t0|0\n        1\\t1\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1|0\\t0\\t0|0\n        1\\t2\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|1\\t0\\t0|0\n        1\\t3\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\\t1\\t1|1\n        1\\t4\\t4\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\\t1\\t0|0\n        1\\t6\\t5\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\\t0\\t1|1\n        1\\t7\\t6\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\\t0\\t1|0\n        1\\t8\\t7\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0|0\\t0\\t0|1\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(\n                ts.as_vcf(\n                    individual_names=[\"A\", \"B\", \"C\", \"D\"],\n                    allow_position_zero=True,\n                ),\n            )\n            == expected\n        )\n\n        # Now with non-sample nodes, so A is included, C becomes diploid\n        # and D is triploid\n        s = \"\"\"\\\n        #CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\tA\\tB\\tC\\tD\n        1\\t0\\t0\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1|1\\t0|1\\t0|0|0\n        1\\t1\\t1\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t1|0\\t0|0\\t0|0|0\n        1\\t2\\t2\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|1\\t0|0\\t0|0|0\n        1\\t3\\t3\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t1\\t0|0\\t1|0\\t1|1|1\n        1\\t4\\t4\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|0\\t1|0\\t0|0|0\n        1\\t6\\t5\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|0\\t0|0\\t1|1|1\n        1\\t7\\t6\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|0\\t0|0\\t1|0|0\n        1\\t8\\t7\\t0\\t1\\t.\\tPASS\\t.\\tGT\\t0\\t0|0\\t0|0\\t0|1|0\"\"\"\n        expected = textwrap.dedent(s)\n        assert (\n            drop_header(\n                ts.as_vcf(\n                    individual_names=[\"A\", \"B\", \"C\", \"D\"],\n                    allow_position_zero=True,\n                    include_non_sample_nodes=True,\n                    isolated_as_missing=False,\n                ),\n            )\n            == expected\n        )\n"
  },
  {
    "path": "python/tests/test_version.py",
    "content": "# MIT License\n#\n# Copyright (c) 2020-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest python package versioning\n\"\"\"\n\nfrom packaging.version import Version\n\nfrom tskit import _version\n\n\nclass TestPythonVersion:\n    \"\"\"\n    Test that the version is PEP440 compliant\n    \"\"\"\n\n    def test_version(self):\n        assert str(Version(_version.tskit_version)) == _version.tskit_version\n"
  },
  {
    "path": "python/tests/test_wright_fisher.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2021 Tskit Developers\n# Copyright (C) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTest various functions using messy tables output by a forwards-time simulator.\n\"\"\"\n\nimport itertools\nimport random\n\nimport msprime\nimport numpy as np\nimport numpy.testing as nt\nimport pytest\n\nimport tests as tests\nimport tests.tsutil as tsutil\nimport tskit\n\n\nclass WrightFisherSimulator:\n    \"\"\"\n    SIMPLE simulation of `num_pops` bisexual, haploid Wright-Fisher populations\n    of size `N` for `ngens` generations, in which each individual survives with\n    probability `survival` and only those who die are replaced. If `num_pops` is\n    greater than 1, the individual to be replaced has a chance `mig_rate` of\n    being the offspring of nodes from a different and randomly chosen\n    population. If `num_loci` is None, the chromosome is 1.0 Morgans long. If\n    `num_loci` not None, a discrete recombination model is used where\n    breakpoints are chosen uniformly from 1 to `num_loci` - 1. If\n    `deep_history` is True, a history to coalescence of just one population of\n    `self.N` samples is added at the beginning.\n    \"\"\"\n\n    def __init__(\n        self,\n        N,\n        survival=0.0,\n        seed=None,\n        deep_history=True,\n        debug=False,\n        initial_generation_samples=False,\n        num_loci=None,\n        num_pops=1,\n        mig_rate=0.0,\n        record_migrations=False,\n        record_individuals=True,\n    ):\n        self.N = N\n        self.num_pops = num_pops\n        self.num_loci = num_loci\n        self.survival = survival\n        self.mig_rate = mig_rate\n        self.record_migrations = record_migrations\n        self.record_individuals = record_individuals\n        self.deep_history = deep_history\n        self.debug = debug\n        self.initial_generation_samples = initial_generation_samples\n        self.seed = seed\n        self.rng = random.Random(seed)\n\n    def random_breakpoint(self):\n        if self.num_loci is None:\n            return min(1.0, max(0.0, 2 * self.rng.random() - 0.5))\n        else:\n            return self.rng.randint(1, self.num_loci - 1)\n\n    def run(self, ngens):\n        L = 1\n        if self.num_loci is not None:\n            L = self.num_loci\n        tables = tskit.TableCollection(sequence_length=L)\n        for _ in range(self.num_pops):\n            tables.populations.add_row()\n        if self.deep_history:\n            # initial population\n            population_configurations = [\n                msprime.PopulationConfiguration(sample_size=self.N)\n            ]\n            init_ts = msprime.simulate(\n                population_configurations=population_configurations,\n                recombination_rate=1.0,\n                length=L,\n                random_seed=self.seed,\n            )\n            init_tables = init_ts.dump_tables()\n            flags = init_tables.nodes.flags\n            if not self.initial_generation_samples:\n                flags = np.zeros_like(init_tables.nodes.flags)\n            tables.nodes.set_columns(time=init_tables.nodes.time + ngens, flags=flags)\n            tables.edges.set_columns(\n                left=init_tables.edges.left,\n                right=init_tables.edges.right,\n                parent=init_tables.edges.parent,\n                child=init_tables.edges.child,\n            )\n        else:\n            flags = 0\n            if self.initial_generation_samples:\n                flags = tskit.NODE_IS_SAMPLE\n            for p in range(self.num_pops):\n                for _ in range(self.N):\n                    individual = -1\n                    if self.record_individuals:\n                        individual = tables.individuals.add_row(parents=[-1, -1])\n                    tables.nodes.add_row(\n                        flags=flags, time=ngens, population=p, individual=individual\n                    )\n\n        pops = [\n            list(range(p * self.N, (p * self.N) + self.N)) for p in range(self.num_pops)\n        ]\n        pop_ids = list(range(self.num_pops))\n        for t in range(ngens - 1, -1, -1):\n            if self.debug:\n                print(\"t:\", t)\n                print(\"pops:\", pops)\n            dead = [[self.rng.random() > self.survival for _ in pop] for pop in pops]\n            # sample these first so that all parents are from the previous gen\n            parent_pop = []\n            new_parents = []\n            for p in pop_ids:\n                w = [\n                    1 - self.mig_rate if i == p else self.mig_rate / (self.num_pops - 1)\n                    for i in pop_ids\n                ]\n                parent_pop.append(self.rng.choices(pop_ids, w, k=sum(dead[p])))\n                new_parents.append(\n                    [\n                        self.rng.choices(pops[parent_pop[p][k]], k=2)\n                        for k in range(sum(dead[p]))\n                    ]\n                )\n\n            if self.debug:\n                for p in pop_ids:\n                    print(\"Replacing\", sum(dead[p]), \"individuals from pop\", p)\n            for p in pop_ids:\n                k = 0\n                for j in range(self.N):\n                    if dead[p][j]:\n                        lparent, rparent = new_parents[p][k]\n                        individual = -1\n                        if self.record_individuals:\n                            individual = tables.individuals.add_row(\n                                parents=[\n                                    tables.nodes[lparent].individual,\n                                    tables.nodes[rparent].individual,\n                                ]\n                            )\n                        offspring = tables.nodes.add_row(\n                            time=t, population=p, individual=individual\n                        )\n                        if parent_pop[p][k] != p and self.record_migrations:\n                            tables.migrations.add_row(\n                                left=0.0,\n                                right=L,\n                                node=offspring,\n                                source=parent_pop[p][k],\n                                dest=p,\n                                time=t,\n                            )\n                        k += 1\n                        bp = self.random_breakpoint()\n                        if self.debug:\n                            print(\"--->\", offspring, lparent, rparent, bp)\n                        pops[p][j] = offspring\n                        if bp > 0.0:\n                            tables.edges.add_row(\n                                left=0.0, right=bp, parent=lparent, child=offspring\n                            )\n                        if bp < L:\n                            tables.edges.add_row(\n                                left=bp, right=L, parent=rparent, child=offspring\n                            )\n\n        if self.debug:\n            print(\"Done! Final pop:\")\n            print(pops)\n        flags = tables.nodes.flags\n        flattened = [n for pop in pops for n in pop]\n        flags[flattened] = tskit.NODE_IS_SAMPLE\n        tables.nodes.flags = flags\n        tables.time_units = \"generations\"\n        return tables\n\n\ndef wf_sim(\n    N,\n    ngens,\n    survival=0.0,\n    deep_history=True,\n    debug=False,\n    seed=None,\n    initial_generation_samples=False,\n    num_loci=None,\n    num_pops=1,\n    mig_rate=0.0,\n    record_migrations=False,\n    record_individuals=True,\n):\n    sim = WrightFisherSimulator(\n        N,\n        survival=survival,\n        deep_history=deep_history,\n        debug=debug,\n        seed=seed,\n        initial_generation_samples=initial_generation_samples,\n        num_loci=num_loci,\n        num_pops=num_pops,\n        mig_rate=mig_rate,\n        record_migrations=record_migrations,\n        record_individuals=record_individuals,\n    )\n    return sim.run(ngens)\n\n\nclass TestSimulation:\n    \"\"\"\n    Tests that the simulations produce the output we expect.\n    \"\"\"\n\n    random_seed = 5678\n\n    def test_one_gen_multipop_mig_no_deep(self):\n        tables = wf_sim(\n            N=5,\n            ngens=1,\n            num_pops=4,\n            mig_rate=1.0,\n            deep_history=False,\n            seed=self.random_seed,\n            record_migrations=True,\n        )\n        assert tables.time_units == \"generations\"\n        assert tables.nodes.num_rows == 5 * 4 * (1 + 1)\n        assert tables.edges.num_rows > 0\n        assert tables.migrations.num_rows == 5 * 4\n        assert tables.individuals.num_rows == tables.nodes.num_rows\n\n    def test_multipop_mig_deep(self):\n        N = 10\n        ngens = 20\n        num_pops = 3\n        tables = wf_sim(\n            N=N,\n            ngens=ngens,\n            num_pops=num_pops,\n            mig_rate=1.0,\n            seed=self.random_seed,\n            record_migrations=True,\n        )\n        assert tables.nodes.num_rows > (num_pops * N * ngens) + N\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows >= N * num_pops * ngens\n        assert tables.populations.num_rows == num_pops\n        assert tables.individuals.num_rows >= num_pops * N * ngens\n\n        # sort does not support mig\n        tables.migrations.clear()\n        # making sure trees are valid\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        sample_pops = tables.nodes.population[ts.samples()]\n        assert np.unique(sample_pops).size == num_pops\n\n    def test_multipop_mig_no_deep(self):\n        N = 5\n        ngens = 5\n        num_pops = 2\n        tables = wf_sim(\n            N=N,\n            ngens=ngens,\n            num_pops=num_pops,\n            mig_rate=1.0,\n            deep_history=False,\n            seed=self.random_seed,\n            record_migrations=True,\n        )\n        assert tables.nodes.num_rows == num_pops * N * (ngens + 1)\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows == N * num_pops * ngens\n        assert tables.populations.num_rows == num_pops\n        assert tables.individuals.num_rows == tables.nodes.num_rows\n        # FIXME this is no longer needed.\n        # sort does not support mig\n        tables.migrations.clear()\n        # making sure trees are valid\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        sample_pops = tables.nodes.population[ts.samples()]\n        assert np.unique(sample_pops).size == num_pops\n\n    def test_non_overlapping_generations(self):\n        tables = wf_sim(N=10, ngens=10, survival=0.0, seed=self.random_seed)\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows == 0\n        assert tables.individuals.num_rows > 0\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        # All trees should have exactly one root and all internal nodes should\n        # have arity > 1\n        for tree in ts.trees():\n            assert tree.num_roots == 1\n            leaves = set(tree.leaves(tree.root))\n            assert leaves == set(ts.samples())\n            for u in tree.nodes():\n                if tree.is_internal(u):\n                    assert len(tree.children(u)) > 1\n\n    def test_overlapping_generations(self):\n        tables = wf_sim(N=30, ngens=10, survival=0.85, seed=self.random_seed)\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows == 0\n        assert tables.individuals.num_rows > 0\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        for tree in ts.trees():\n            assert tree.num_roots == 1\n\n    def test_one_generation_no_deep_history(self):\n        N = 20\n        tables = wf_sim(N=N, ngens=1, deep_history=False, seed=self.random_seed)\n        assert tables.nodes.num_rows == 2 * N\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows == 0\n        assert tables.individuals.num_rows > 0\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        ts = tables.tree_sequence()\n        for tree in ts.trees():\n            all_samples = set()\n            for root in tree.roots:\n                root_samples = set(tree.samples(root))\n                assert len(root_samples & all_samples) == 0\n                all_samples |= root_samples\n            assert all_samples == set(ts.samples())\n\n    def test_many_generations_no_deep_history(self):\n        N = 10\n        ngens = 100\n        tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed)\n        assert tables.nodes.num_rows == N * (ngens + 1)\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 0\n        assert tables.mutations.num_rows == 0\n        assert tables.migrations.num_rows == 0\n        assert tables.individuals.num_rows > 0\n        tables.sort()\n        tables.simplify()\n        ts = tables.tree_sequence()\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        ts = tables.tree_sequence()\n        # We are assuming that everything has coalesced and we have single-root trees\n        for tree in ts.trees():\n            assert tree.num_roots == 1\n\n    def test_with_mutations(self):\n        N = 10\n        ngens = 100\n        tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed)\n        tables.sort()\n        ts = tables.tree_sequence()\n        ts = tsutil.jukes_cantor(ts, 10, 0.1, seed=self.random_seed)\n        tables = ts.dump_tables()\n        assert tables.sites.num_rows > 0\n        assert tables.mutations.num_rows > 0\n        samples = np.where(tables.nodes.flags == tskit.NODE_IS_SAMPLE)[0].astype(\n            np.int32\n        )\n        tables.sort()\n        tables.simplify(samples)\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows > 0\n        assert tables.mutations.num_rows > 0\n        ts = tables.tree_sequence()\n        assert ts.sample_size == N\n        for hap in ts.haplotypes():\n            assert len(hap) == ts.num_sites\n\n    def test_with_recurrent_mutations(self):\n        # actually with only ONE site, at 0.0\n        N = 10\n        ngens = 100\n        tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed)\n        tables.sort()\n        ts = tables.tree_sequence()\n        ts = tsutil.jukes_cantor(ts, 1, 10, seed=self.random_seed)\n        tables = ts.dump_tables()\n        assert tables.sites.num_rows == 1\n        assert tables.mutations.num_rows > 0\n        # before simplify\n        for h in ts.haplotypes():\n            assert len(h) == 1\n        # after simplify\n        tables.sort()\n        tables.simplify()\n        assert tables.nodes.num_rows > 0\n        assert tables.edges.num_rows > 0\n        assert tables.sites.num_rows == 1\n        assert tables.mutations.num_rows > 0\n        ts = tables.tree_sequence()\n        assert ts.sample_size == N\n        for hap in ts.haplotypes():\n            assert len(hap) == ts.num_sites\n\n    def test_record_individuals_initial_state(self):\n        N = 10\n        tables = wf_sim(N=N, ngens=0, seed=12345, deep_history=False)\n        tables.sort()\n        assert len(tables.individuals) == N\n        assert len(tables.nodes) == N\n        for individual in list(tables.individuals)[:N]:\n            assert list(individual.parents) == [-1, -1]\n        for j, node in enumerate(tables.nodes):\n            assert node.individual == j\n\n    def test_record_individuals(self):\n        N = 10\n        tables = wf_sim(N=N, ngens=10, seed=12345, deep_history=False)\n        assert len(tables.individuals) == len(tables.nodes)\n        for node_id, individual in enumerate(tables.nodes.individual):\n            assert node_id == individual\n        tables.sort()\n        ts = tables.tree_sequence()\n        for tree in ts.trees():\n            for u in tree.nodes():\n                individual = ts.individual(ts.node(u).individual)\n                parent_node = tree.parent(u)\n                if parent_node != tskit.NULL:\n                    parent_individual = ts.individual(ts.node(parent_node).individual)\n                    assert parent_individual.id in individual.parents\n\n\ndef get_wf_sims(seed):\n    wf_sims = []\n    for N in [5, 10, 20]:\n        for surv in [0.0, 0.5, 0.9]:\n            for mut in [0.01, 1.0]:\n                for nloci in [1, 2, 3]:\n                    tables = wf_sim(N=N, ngens=N, survival=surv, seed=seed)\n                    tables.sort()\n                    ts = tables.tree_sequence()\n                    ts = tsutil.jukes_cantor(ts, num_sites=nloci, mu=mut, seed=seed)\n                    wf_sims.append(ts)\n    return wf_sims\n\n\n# List of simulations used to parametrize tests.\nwf_sims = get_wf_sims(1234)\n\n\nclass TestSimplify:\n    \"\"\"\n    Tests for simplify on cases generated by the Wright-Fisher simulator.\n    \"\"\"\n\n    def verify_simplify(self, ts, new_ts, samples, node_map):\n        \"\"\"\n        Check that trees in `ts` match `new_ts` using the specified node_map.\n        Modified from `verify_simplify_topology`.  Also check that the `parent`\n        and `time` column in the MutationTable is correct.\n        \"\"\"\n        # check trees agree at these points\n        locs = [random.random() for _ in range(20)]\n        locs += random.sample(list(ts.breakpoints())[:-1], min(20, ts.num_trees))\n        locs.sort()\n        old_trees = ts.trees()\n        new_trees = new_ts.trees()\n        old_right = -1\n        new_right = -1\n        for loc in locs:\n            while old_right <= loc:\n                old_tree = next(old_trees)\n                old_left, old_right = old_tree.get_interval()\n            assert old_left <= loc < old_right\n            while new_right <= loc:\n                new_tree = next(new_trees)\n                new_left, new_right = new_tree.get_interval()\n            assert new_left <= loc < new_right\n            # print(\"comparing trees\")\n            # print(\"interval:\", old_tree.interval)\n            # print(old_tree.draw(format=\"unicode\"))\n            # print(\"interval:\", new_tree.interval)\n            # print(new_tree.draw(format=\"unicode\"))\n            pairs = itertools.islice(itertools.combinations(samples, 2), 500)\n            for pair in pairs:\n                mapped_pair = [node_map[u] for u in pair]\n                mrca1 = old_tree.get_mrca(*pair)\n                assert mrca1 != tskit.NULL\n                mrca2 = new_tree.get_mrca(*mapped_pair)\n                assert mrca2 != tskit.NULL\n                assert node_map[mrca1] == mrca2\n        mut_parent = tsutil.compute_mutation_parent(ts=ts)\n        nt.assert_equal(mut_parent, ts.tables.mutations.parent)\n\n    def verify_haplotypes(self, ts, samples):\n        \"\"\"\n        Check that haplotypes are unchanged by simplify.\n        \"\"\"\n        sub_ts, node_map = ts.simplify(samples, map_nodes=True, filter_sites=False)\n        # Sites tables should be equal\n        assert ts.tables.sites == sub_ts.tables.sites\n        sub_haplotypes = dict(zip(sub_ts.samples(), sub_ts.haplotypes()))\n        all_haplotypes = dict(zip(ts.samples(), ts.haplotypes()))\n        mapped_ids = []\n        for node_id, h in all_haplotypes.items():\n            mapped_node_id = node_map[node_id]\n            if mapped_node_id in sub_haplotypes:\n                assert h == sub_haplotypes[mapped_node_id]\n                mapped_ids.append(mapped_node_id)\n        assert sorted(mapped_ids) == sorted(sub_ts.samples())\n\n    @pytest.mark.parametrize(\"ts\", wf_sims)\n    def test_python_simplify_all_samples(self, ts):\n        s = tests.Simplifier(ts, ts.samples())\n        py_full_ts, py_full_map = s.simplify()\n        full_ts, full_map = ts.simplify(ts.samples(), map_nodes=True)\n        assert all(py_full_map == full_map)\n        full_ts.tables.assert_equals(py_full_ts.tables, ignore_provenance=True)\n\n    @pytest.mark.parametrize(\"ts\", wf_sims)\n    @pytest.mark.parametrize(\"nsamples\", [2, 5, 10])\n    def test_python_simplify_sample_subset(self, ts, nsamples):\n        sub_samples = random.sample(list(ts.samples()), min(nsamples, ts.sample_size))\n        s = tests.Simplifier(ts, sub_samples)\n        py_small_ts, py_small_map = s.simplify()\n        small_ts, small_map = ts.simplify(samples=sub_samples, map_nodes=True)\n        small_ts.tables.assert_equals(py_small_ts.tables, ignore_provenance=True)\n        self.verify_simplify(ts, small_ts, sub_samples, small_map)\n        self.verify_haplotypes(ts, samples=sub_samples)\n\n    @pytest.mark.parametrize(\"ts\", wf_sims)\n    @pytest.mark.parametrize(\"nsamples\", [2, 5, 10])\n    def test_simplify_tables(self, ts, nsamples):\n        tables = ts.dump_tables()\n        sub_samples = random.sample(list(ts.samples()), min(nsamples, ts.num_samples))\n        node_map = tables.simplify(samples=sub_samples)\n        small_ts = tables.tree_sequence()\n        other_tables = small_ts.dump_tables()\n        tables.assert_equals(other_tables, ignore_provenance=True)\n        self.verify_simplify(ts, small_ts, sub_samples, node_map)\n\n    @pytest.mark.parametrize(\"ts\", wf_sims)\n    @pytest.mark.parametrize(\"nsamples\", [2, 5])\n    def test_simplify_keep_unary(self, ts, nsamples):\n        np.random.seed(123)\n        ts = tsutil.mark_metadata(ts, \"nodes\")\n        sub_samples = random.sample(list(ts.samples()), min(nsamples, ts.num_samples))\n        random_nodes = np.random.choice(ts.num_nodes, ts.num_nodes // 2)\n        ts = tsutil.insert_individuals(ts, random_nodes)\n        ts = tsutil.mark_metadata(ts, \"individuals\")\n\n        for params in [{}, {\"keep_unary\": True}, {\"keep_unary_in_individuals\": True}]:\n            sts = ts.simplify(sub_samples, **params)\n            # check samples match\n            assert sts.num_samples == len(sub_samples)\n            for n, sn in zip(sub_samples, sts.samples()):\n                assert ts.node(n).metadata == sts.node(sn).metadata\n\n            # check that nodes are correctly retained: only nodes ancestral to\n            # retained samples, and: by default, only coalescent events; if\n            # keep_unary_in_individuals then also nodes in individuals; if\n            # keep_unary then all such nodes.\n            for t in ts.trees(tracked_samples=sub_samples):\n                st = sts.at(t.interval.left)\n                visited = [False for _ in sts.nodes()]\n                for n, sn in zip(sub_samples, sts.samples()):\n                    last_n = t.num_tracked_samples(n)\n                    while n != tskit.NULL:\n                        ind = ts.node(n).individual\n                        keep = False\n                        if t.num_tracked_samples(n) > last_n:\n                            # a coalescent node\n                            keep = True\n                        if \"keep_unary_in_individuals\" in params and ind != tskit.NULL:\n                            keep = True\n                        if \"keep_unary\" in params:\n                            keep = True\n                        if (n in sub_samples) or keep:\n                            visited[sn] = True\n                            assert sn != tskit.NULL\n                            assert ts.node(n).metadata == sts.node(sn).metadata\n                            assert t.num_tracked_samples(n) == st.num_samples(sn)\n                            if ind != tskit.NULL:\n                                sind = sts.node(sn).individual\n                                assert sind != tskit.NULL\n                                assert (\n                                    ts.individual(ind).metadata\n                                    == sts.individual(sind).metadata\n                                )\n                            sn = st.parent(sn)\n                        last_n = t.num_tracked_samples(n)\n                        n = t.parent(n)\n                st_nodes = list(st.nodes())\n                for k, v in enumerate(visited):\n                    assert v == (k in st_nodes)\n"
  },
  {
    "path": "python/tests/tsutil.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (C) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nA collection of utilities to edit and construct tree sequences.\n\"\"\"\n\nimport collections\nimport dataclasses\nimport functools\nimport json\nimport random\nimport string\nimport struct\n\nimport msprime\nimport numpy as np\nimport pytest\n\nimport tskit\nimport tskit.provenance as provenance\n\n\ndef random_bytes(max_length):\n    \"\"\"\n    Returns a random bytearray of the specified maximum length.\n    \"\"\"\n    length = random.randint(0, max_length)\n    return bytearray(random.randint(0, 255) for _ in range(length))\n\n\ndef random_strings(max_length):\n    \"\"\"\n    Returns a random bytearray of the specified maximum length.\n    \"\"\"\n    length = random.randint(0, max_length)\n    return \"\".join(random.choice(string.printable) for _ in range(length))\n\n\ndef add_provenance(provenance_table, method_name):\n    d = provenance.get_provenance_dict({\"command\": f\"tsutil.{method_name}\"})\n    provenance_table.add_row(json.dumps(d))\n\n\ndef subsample_sites(ts, num_sites):\n    \"\"\"\n    Returns a copy of the specified tree sequence with a random subsample of the\n    specified number of sites.\n    \"\"\"\n    t = ts.dump_tables()\n    t.sites.reset()\n    t.mutations.reset()\n    sites_to_keep = set(random.sample(list(range(ts.num_sites)), num_sites))\n    for site in ts.sites():\n        if site.id in sites_to_keep:\n            site_id = t.sites.append(site)\n            for mutation in site.mutations:\n                t.mutations.append(mutation.replace(site=site_id))\n    add_provenance(t.provenances, \"subsample_sites\")\n    return t.tree_sequence()\n\n\ndef insert_branch_mutations(ts, mutations_per_branch=1, num_states=2):\n    \"\"\"\n    Returns a copy of the specified tree sequence with a mutation on every branch\n    in every tree.\n    \"\"\"\n    if mutations_per_branch == 0:\n        return ts\n    tables = ts.dump_tables()\n    tables.sites.clear()\n    tables.mutations.clear()\n    for tree in ts.trees():\n        site = tables.sites.add_row(position=tree.interval.left, ancestral_state=\"0\")\n        for root in tree.roots:\n            state = {tskit.NULL: 0}\n            mutation = {tskit.NULL: -1}\n            stack = [root]\n            while len(stack) > 0:\n                u = stack.pop()\n                stack.extend(tree.children(u))\n                v = tree.parent(u)\n                state[u] = state[v]\n                parent = mutation[v]\n                for _ in range(mutations_per_branch):\n                    state[u] = (state[u] + 1) % num_states\n                    metadata = f\"{len(tables.mutations)}\".encode()\n                    mutation[u] = tables.mutations.add_row(\n                        site=site,\n                        node=u,\n                        derived_state=str(state[u]),\n                        parent=parent,\n                        metadata=metadata,\n                    )\n                    parent = mutation[u]\n    add_provenance(tables.provenances, \"insert_branch_mutations\")\n    return tables.tree_sequence()\n\n\ndef remove_mutation_times(ts):\n    tables = ts.dump_tables()\n    tables.mutations.time = np.full_like(tables.mutations.time, tskit.UNKNOWN_TIME)\n    return tables.tree_sequence()\n\n\ndef insert_discrete_time_mutations(ts, num_times=4, num_sites=10):\n    \"\"\"\n    Inserts mutations in the tree sequence at regularly-spaced num_sites\n    positions, at only a discrete set of times (the same for all trees): at\n    num_times times evenly spaced between 0 and the maximum time.\n    \"\"\"\n    tables = ts.dump_tables()\n    tables.sites.clear()\n    tables.mutations.clear()\n    height = max(t.time(t.roots[0]) for t in ts.trees())\n    for j, pos in enumerate(np.linspace(0, tables.sequence_length, num_sites + 1)[:-1]):\n        anc = \"X\" * j\n        tables.sites.add_row(position=pos, ancestral_state=anc)\n        t = ts.at(pos)\n        for k, s in enumerate(np.linspace(0, height, num_times)):\n            for n in t.nodes():\n                if t.time(n) <= s and (\n                    (t.parent(n) == tskit.NULL) or (t.time(t.parent(n)) > s)\n                ):\n                    tables.mutations.add_row(\n                        site=j, node=n, derived_state=anc + str(k), time=s\n                    )\n                    k += 1\n    tables.sort()\n    tables.build_index()\n    tables.compute_mutation_parents()\n    return tables.tree_sequence()\n\n\ndef insert_branch_sites(ts, m=1):\n    \"\"\"\n    Returns a copy of the specified tree sequence with m sites on every branch\n    of every tree.\n    \"\"\"\n    if m == 0:\n        return ts\n    tables = ts.dump_tables()\n    tables.sites.clear()\n    tables.mutations.clear()\n    for tree in ts.trees():\n        left, right = tree.interval\n        delta = (right - left) / (m * len(list(tree.nodes())))\n        x = left\n        for u in tree.nodes():\n            if tree.parent(u) != tskit.NULL:\n                for _ in range(m):\n                    site = tables.sites.add_row(position=x, ancestral_state=\"0\")\n                    tables.mutations.add_row(site=site, node=u, derived_state=\"1\")\n                    x += delta\n    add_provenance(tables.provenances, \"insert_branch_sites\")\n    return tables.tree_sequence()\n\n\ndef insert_multichar_mutations(ts, seed=1, max_len=10):\n    \"\"\"\n    Returns a copy of the specified tree sequence with multiple chararacter\n    mutations on a randomly chosen branch in every tree.\n    \"\"\"\n    rng = random.Random(seed)\n    letters = [\"A\", \"C\", \"T\", \"G\"]\n    tables = ts.dump_tables()\n    tables.sites.clear()\n    tables.mutations.clear()\n    for tree in ts.trees():\n        ancestral_state = rng.choice(letters) * rng.randint(0, max_len)\n        site = tables.sites.add_row(\n            position=tree.interval.left, ancestral_state=ancestral_state\n        )\n        nodes = list(tree.nodes())\n        nodes.remove(tree.root)\n        u = rng.choice(nodes)\n        derived_state = ancestral_state\n        while ancestral_state == derived_state:\n            derived_state = rng.choice(letters) * rng.randint(0, max_len)\n        tables.mutations.add_row(site=site, node=u, derived_state=derived_state)\n    add_provenance(tables.provenances, \"insert_multichar_mutations\")\n    return tables.tree_sequence()\n\n\ndef insert_random_ploidy_individuals(\n    ts, min_ploidy=0, max_ploidy=5, max_dimension=3, samples_only=True, seed=1\n):\n    \"\"\"\n    Takes random contiguous subsets of the samples and assigns them to individuals.\n    Also creates random locations in variable dimensions in the unit interval,\n    and assigns random parents (including NULL parents). Note that resulting\n    individuals will often have nodes with inconsistent populations and/or time.\n    \"\"\"\n    rng = random.Random(seed)\n    if samples_only:\n        node_ids = np.array(ts.samples(), dtype=\"int\")\n    else:\n        node_ids = np.arange(ts.num_nodes)\n    j = 0\n    tables = ts.dump_tables()\n    tables.individuals.clear()\n    individual = tables.nodes.individual[:]\n    individual[:] = tskit.NULL\n    ind_id = -1\n    while j < len(node_ids):\n        ploidy = rng.randint(min_ploidy, max_ploidy)\n        nodes = node_ids[j : min(j + ploidy, len(node_ids))]\n        dimension = rng.randint(0, max_dimension)\n        location = [rng.random() for _ in range(dimension)]\n        parents = rng.sample(range(-1, 1 + ind_id), min(1 + ind_id, rng.randint(0, 3)))\n        ind_id = tables.individuals.add_row(location=location, parents=parents)\n        individual[nodes] = ind_id\n        j += ploidy\n    tables.nodes.individual = individual\n    return tables.tree_sequence()\n\n\ndef insert_random_consistent_individuals(\n    ts, min_ploidy=0, max_ploidy=5, min_dimension=0, max_dimension=3, seed=1\n):\n    \"\"\"\n    Takes random subsets of nodes having the same time and population and\n    assigns them to individuals.  Also creates random locations in variable\n    dimensions in the unit interval, and assigns random parents (including NULL\n    parents).\n    \"\"\"\n    rng = random.Random(seed)\n    tables = ts.dump_tables()\n    tables.individuals.clear()\n    individual = tables.nodes.individual[:]\n    individual[:] = tskit.NULL\n    ind_id = -1\n    pops = np.arange(ts.num_populations)\n    for pop in pops:\n        n = tables.nodes.population == pop\n        times = np.unique(tables.nodes.time[n])\n        for t in times:\n            nn = np.where(np.logical_and(n, tables.nodes.time == t))[0]\n            rng.shuffle(nn)\n            j = 0\n            while j < len(nn):\n                ploidy = rng.randint(min_ploidy, max_ploidy)\n                nodes = nn[j : min(j + ploidy, len(nn))]\n                dimension = rng.randint(min_dimension, max_dimension)\n                location = [rng.random() for _ in range(dimension)]\n                parents = rng.sample(\n                    range(-1, 1 + ind_id), min(1 + ind_id, rng.randint(0, 3))\n                )\n                ind_id = tables.individuals.add_row(location=location, parents=parents)\n                individual[nodes] = ind_id\n                j += ploidy\n                j += rng.randint(0, 2)  # skip a random number\n    tables.nodes.individual = individual\n    return tables.tree_sequence()\n\n\ndef insert_individuals(ts, nodes=None, ploidy=1):\n    \"\"\"\n    Inserts individuals into the tree sequence using the specified list\n    of node (or use all sample nodes if None) with the specified ploidy by combining\n    ploidy-sized chunks of the list. Add metadata to the individuals so we can\n    track them\n    \"\"\"\n    if nodes is None:\n        nodes = ts.samples()\n    assert len(nodes) % ploidy == 0  # To allow mixed ploidies we could comment this out\n    tables = ts.dump_tables()\n    tables.individuals.clear()\n    individual = tables.nodes.individual[:]\n    individual[:] = tskit.NULL\n    j = 0\n    while j < len(nodes):\n        nodes_in_individual = nodes[j : min(len(nodes), j + ploidy)]\n        # should we warn here if nodes[j : j + ploidy] are at different times?\n        # probably not, as although this is unusual, it is actually allowed\n        ind_id = tables.individuals.add_row(\n            metadata=f\"orig_id {tables.individuals.num_rows}\".encode()\n        )\n        individual[nodes_in_individual] = ind_id\n        j += ploidy\n    tables.nodes.individual = individual\n    return tables.tree_sequence()\n\n\ndef mark_metadata(ts, table_name, prefix=\"orig_id:\"):\n    \"\"\"\n    Add metadata to all rows of the form prefix + row_number\n    \"\"\"\n    tables = ts.dump_tables()\n    table = getattr(tables, table_name)\n    table.packset_metadata([(prefix + str(i)).encode() for i in range(table.num_rows)])\n    return tables.tree_sequence()\n\n\ndef permute_nodes(ts, node_map):\n    \"\"\"\n    Returns a copy of the specified tree sequence such that the nodes are\n    permuted according to the specified map.\n    \"\"\"\n    tables = ts.dump_tables()\n    tables.nodes.clear()\n    tables.edges.clear()\n    tables.mutations.clear()\n    # Mapping from nodes in the new tree sequence back to nodes in the original\n    reverse_map = [0 for _ in node_map]\n    for j in range(ts.num_nodes):\n        reverse_map[node_map[j]] = j\n    old_nodes = list(ts.nodes())\n    for j in range(ts.num_nodes):\n        old_node = old_nodes[reverse_map[j]]\n        tables.nodes.append(old_node)\n    for edge in ts.edges():\n        tables.edges.append(\n            edge.replace(parent=node_map[edge.parent], child=node_map[edge.child])\n        )\n    for site in ts.sites():\n        for mutation in site.mutations:\n            tables.mutations.append(\n                mutation.replace(site=site.id, node=node_map[mutation.node])\n            )\n    tables.sort()\n    add_provenance(tables.provenances, \"permute_nodes\")\n    return tables.tree_sequence()\n\n\ndef insert_redundant_breakpoints(ts):\n    \"\"\"\n    Builds a new tree sequence containing redundant breakpoints.\n    \"\"\"\n    tables = ts.dump_tables()\n    tables.edges.reset()\n    for r in ts.edges():\n        x = r.left + (r.right - r.left) / 2\n        tables.edges.append(r.replace(right=x))\n        tables.edges.append(r.replace(left=x))\n    add_provenance(tables.provenances, \"insert_redundant_breakpoints\")\n    new_ts = tables.tree_sequence()\n    assert new_ts.num_edges == 2 * ts.num_edges\n    return new_ts\n\n\ndef single_childify(ts):\n    \"\"\"\n    Builds a new equivalent tree sequence which contains an extra node in the\n    middle of all existing branches.\n    \"\"\"\n    tables = ts.dump_tables()\n\n    mutations_above_node = collections.defaultdict(list)\n    for mut in tables.mutations:\n        mutations_above_node[mut.node].append(mut)\n\n    mutations_on_edge = collections.defaultdict(list)\n    for edge_idx, edge in enumerate(tables.edges):\n        for mut in mutations_above_node[edge.child]:\n            if edge.left <= tables.sites[mut.site].position < edge.right:\n                mutations_on_edge[edge_idx].append(mut)\n\n    time = tables.nodes.time[:]\n    tables.edges.reset()\n    tables.mutations.reset()\n    for edge in ts.edges():\n        # Insert a new node in between the parent and child.\n        t = time[edge.child] + (time[edge.parent] - time[edge.child]) / 2\n        u = tables.nodes.add_row(time=t)\n        tables.edges.append(edge.replace(parent=u))\n        tables.edges.append(edge.replace(child=u))\n        for mut in mutations_on_edge[edge.id]:\n            if mut.time < t:\n                tables.mutations.append(mut)\n            else:\n                tables.mutations.append(mut.replace(node=u))\n    tables.sort()\n    add_provenance(tables.provenances, \"insert_redundant_breakpoints\")\n    return tables.tree_sequence()\n\n\ndef add_random_metadata(ts, seed=1, max_length=10):\n    \"\"\"\n    Returns a copy of the specified tree sequence with random metadata assigned\n    to the nodes, sites and mutations.\n    \"\"\"\n    tables = ts.dump_tables()\n    np.random.seed(seed)\n\n    length = np.random.randint(0, max_length, ts.num_nodes)\n    offset = np.cumsum(np.hstack(([0], length)), dtype=np.uint32)\n    # Older versions of numpy didn't have a dtype argument for randint, so\n    # must use astype instead.\n    metadata = np.random.randint(-127, 127, offset[-1]).astype(np.int8)\n    nodes = tables.nodes\n    nodes.set_columns(\n        flags=nodes.flags,\n        population=nodes.population,\n        time=nodes.time,\n        metadata_offset=offset,\n        metadata=metadata,\n        individual=nodes.individual,\n    )\n\n    length = np.random.randint(0, max_length, ts.num_sites)\n    offset = np.cumsum(np.hstack(([0], length)), dtype=np.uint32)\n    metadata = np.random.randint(-127, 127, offset[-1]).astype(np.int8)\n    sites = tables.sites\n    sites.set_columns(\n        position=sites.position,\n        ancestral_state=sites.ancestral_state,\n        ancestral_state_offset=sites.ancestral_state_offset,\n        metadata_offset=offset,\n        metadata=metadata,\n    )\n\n    length = np.random.randint(0, max_length, ts.num_mutations)\n    offset = np.cumsum(np.hstack(([0], length)), dtype=np.uint32)\n    metadata = np.random.randint(-127, 127, offset[-1]).astype(np.int8)\n    mutations = tables.mutations\n    mutations.set_columns(\n        site=mutations.site,\n        node=mutations.node,\n        time=mutations.time,\n        parent=mutations.parent,\n        derived_state=mutations.derived_state,\n        derived_state_offset=mutations.derived_state_offset,\n        metadata_offset=offset,\n        metadata=metadata,\n    )\n\n    length = np.random.randint(0, max_length, ts.num_individuals)\n    offset = np.cumsum(np.hstack(([0], length)), dtype=np.uint32)\n    metadata = np.random.randint(-127, 127, offset[-1]).astype(np.int8)\n    individuals = tables.individuals\n    individuals.set_columns(\n        flags=individuals.flags,\n        location=individuals.location,\n        location_offset=individuals.location_offset,\n        parents=individuals.parents,\n        parents_offset=individuals.parents_offset,\n        metadata_offset=offset,\n        metadata=metadata,\n    )\n\n    length = np.random.randint(0, max_length, ts.num_populations)\n    offset = np.cumsum(np.hstack(([0], length)), dtype=np.uint32)\n    metadata = np.random.randint(-127, 127, offset[-1]).astype(np.int8)\n    populations = tables.populations\n    populations.set_columns(metadata_offset=offset, metadata=metadata)\n\n    add_provenance(tables.provenances, \"add_random_metadata\")\n    ts = tables.tree_sequence()\n    return ts\n\n\ndef jiggle_samples(ts):\n    \"\"\"\n    Returns a copy of the specified tree sequence with the sample nodes switched\n    around. The first n / 2 existing samples become non samples, and the last\n    n / 2 node become samples.\n    \"\"\"\n    tables = ts.dump_tables()\n    nodes = tables.nodes\n    flags = nodes.flags\n    oldest_parent = tables.edges.parent[-1]\n    n = ts.sample_size\n    flags[: n // 2] = 0\n    flags[oldest_parent - n // 2 : oldest_parent] = 1\n    nodes.set_columns(flags, nodes.time)\n    add_provenance(tables.provenances, \"jiggle_samples\")\n    return tables.tree_sequence()\n\n\ndef generate_site_mutations(\n    tree, position, mu, site_table, mutation_table, multiple_per_node=True\n):\n    \"\"\"\n    Generates mutations for the site at the specified position on the specified\n    tree. Mutations happen at rate mu along each branch. The site and mutation\n    information are recorded in the specified tables.  Note that this records\n    more than one mutation per edge.\n    \"\"\"\n    assert tree.interval.left <= position < tree.interval.right\n    states = [\"A\", \"C\", \"G\", \"T\"]\n    ancestral_state = random.choice(states)\n    site_table.add_row(position, ancestral_state)\n    site = site_table.num_rows - 1\n    for root in tree.roots:\n        stack = [(root, ancestral_state, tskit.NULL)]\n        while len(stack) != 0:\n            u, state, parent = stack.pop()\n            if u != root:\n                branch_length = tree.branch_length(u)\n                x = random.expovariate(mu)\n                new_state = state\n                while x < branch_length:\n                    new_state = random.choice(states)\n                    if multiple_per_node:\n                        mutation_table.add_row(site, u, new_state, parent)\n                        parent = mutation_table.num_rows - 1\n                        state = new_state\n                    x += random.expovariate(mu)\n                else:\n                    if not multiple_per_node:\n                        mutation_table.add_row(site, u, new_state, parent)\n                        parent = mutation_table.num_rows - 1\n                        state = new_state\n            stack.extend(reversed([(v, state, parent) for v in tree.children(u)]))\n\n\ndef jukes_cantor(ts, num_sites, mu, multiple_per_node=True, seed=None):\n    \"\"\"\n    Returns a copy of the specified tree sequence with Jukes-Cantor mutations\n    applied at the specified rate at the specified number of sites. Site positions\n    are chosen uniformly.\n    \"\"\"\n    random.seed(seed)\n    positions = [ts.sequence_length * random.random() for _ in range(num_sites)]\n    positions.sort()\n    tables = ts.dump_tables()\n    tables.sites.clear()\n    tables.mutations.clear()\n    trees = ts.trees()\n    t = next(trees)\n    for position in positions:\n        while position >= t.interval.right:\n            t = next(trees)\n        generate_site_mutations(\n            t,\n            position,\n            mu,\n            tables.sites,\n            tables.mutations,\n            multiple_per_node=multiple_per_node,\n        )\n    add_provenance(tables.provenances, \"jukes_cantor\")\n    new_ts = tables.tree_sequence()\n    return new_ts\n\n\ndef caterpillar_tree(n, num_sites=0, num_mutations=1):\n    \"\"\"\n    Returns caterpillar tree with n samples. For each of the sites and\n    path of at most n - 2 mutations are put down along the internal\n    nodes. Each site gets exactly the same set of mutations.\n    \"\"\"\n    if num_sites > 0 and num_mutations > n - 2:\n        raise ValueError(\"At most n - 2 mutations allowed\")\n    tables = tskit.TableCollection(1)\n    for _ in range(n):\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n    last_node = 0\n    # Add the internal nodes\n    for j in range(n - 1):\n        u = tables.nodes.add_row(time=j + 1)\n        tables.edges.add_row(0, tables.sequence_length, u, last_node)\n        tables.edges.add_row(0, tables.sequence_length, u, j + 1)\n        last_node = u\n    for j in range(num_sites):\n        tables.sites.add_row(position=(j + 1) / n, ancestral_state=\"0\")\n        node = 2 * n - 3\n        state = 0\n        for _ in range(num_mutations):\n            state = (state + 1) % 2\n            tables.mutations.add_row(site=j, derived_state=str(state), node=node)\n            node -= 1\n\n    tables.sort()\n    tables.build_index()\n    tables.compute_mutation_parents()\n    return tables.tree_sequence()\n\n\ndef compute_mutation_parent(ts):\n    \"\"\"\n    Compute the `parent` column of a MutationTable. Correct computation uses\n    topological information in the nodes and edges, as well as the fact that\n    each mutation must be listed after the mutation on whose background it\n    occurred (i.e., its parent).\n\n    :param TreeSequence ts: The tree sequence to compute for.  Need not\n        have a valid mutation parent column.\n    \"\"\"\n    mutation_parent = np.zeros(ts.num_mutations, dtype=np.int32) - 1\n    # Maps nodes to the bottom mutation on each branch\n    bottom_mutation = np.zeros(ts.num_nodes, dtype=np.int32) - 1\n    for tree in ts.trees():\n        for site in tree.sites():\n            # Go forward through the mutations creating a mapping from the\n            # mutations to the nodes. If we see more than one mutation\n            # at a node, then these must be parents since we're assuming\n            # they are in order.\n            for mutation in site.mutations:\n                if bottom_mutation[mutation.node] != tskit.NULL:\n                    mutation_parent[mutation.id] = bottom_mutation[mutation.node]\n                bottom_mutation[mutation.node] = mutation.id\n            # There's no point in checking the first mutation, since this cannot\n            # have a parent.\n            for mutation in site.mutations[1:]:\n                if mutation_parent[mutation.id] == tskit.NULL:\n                    v = tree.parent(mutation.node)\n                    # Traverse upwards until we find a another mutation or root.\n                    while v != tskit.NULL and bottom_mutation[v] == tskit.NULL:\n                        v = tree.parent(v)\n                    if v != tskit.NULL:\n                        mutation_parent[mutation.id] = bottom_mutation[v]\n            # Reset the maps for the next site.\n            for mutation in site.mutations:\n                bottom_mutation[mutation.node] = tskit.NULL\n            assert np.all(bottom_mutation == -1)\n    return mutation_parent\n\n\ndef py_subset(\n    tables,\n    nodes,\n    record_provenance=True,\n    reorder_populations=True,\n    remove_unreferenced=True,\n):\n    \"\"\"\n    Naive implementation of the TableCollection.subset method using the Python API.\n    \"\"\"\n    if np.any(nodes > tables.nodes.num_rows) or np.any(nodes < 0):\n        raise ValueError(\"Nodes out of bounds.\")\n    full = tables.copy()\n    tables.clear()\n    # mapping from old to new ids\n    node_map = {}\n    ind_map = {tskit.NULL: tskit.NULL}\n    pop_map = {tskit.NULL: tskit.NULL}\n    if not reorder_populations:\n        for j, pop in enumerate(full.populations):\n            pop_map[j] = j\n            tables.populations.append(pop)\n    # first build individual map\n    if not remove_unreferenced:\n        keep_ind = [True for _ in full.individuals]\n    else:\n        keep_ind = [False for _ in full.individuals]\n        for old_id in nodes:\n            i = full.nodes[old_id].individual\n            if i != tskit.NULL:\n                keep_ind[i] = True\n    new_ind_id = 0\n    for j, k in enumerate(keep_ind):\n        if k:\n            ind_map[j] = new_ind_id\n            new_ind_id += 1\n    # now the individual table\n    for j, k in enumerate(keep_ind):\n        if k:\n            ind = full.individuals[j]\n            new_ind_id = tables.individuals.append(\n                ind.replace(parents=[ind_map[i] for i in ind.parents if i in ind_map])\n            )\n\n            assert new_ind_id == ind_map[j]\n\n    for old_id in nodes:\n        node = full.nodes[old_id]\n        if node.population not in pop_map and node.population != tskit.NULL:\n            pop = full.populations[node.population]\n            new_pop_id = tables.populations.append(pop)\n            pop_map[node.population] = new_pop_id\n        new_id = tables.nodes.append(\n            node.replace(\n                population=pop_map[node.population],\n                individual=ind_map[node.individual],\n            )\n        )\n        node_map[old_id] = new_id\n    if not remove_unreferenced:\n        for j, ind in enumerate(full.populations):\n            if j not in pop_map:\n                pop_map[j] = tables.populations.append(ind)\n    for edge in full.edges:\n        if edge.child in nodes and edge.parent in nodes:\n            tables.edges.append(\n                edge.replace(parent=node_map[edge.parent], child=node_map[edge.child])\n            )\n    if full.migrations.num_rows > 0:\n        raise ValueError(\"Migrations are currently not supported in this operation.\")\n    site_map = {}\n    if not remove_unreferenced:\n        for j, site in enumerate(full.sites):\n            site_map[j] = tables.sites.append(site)\n    mutation_map = {tskit.NULL: tskit.NULL}\n    for i, mut in enumerate(full.mutations):\n        if mut.node in nodes:\n            if mut.site not in site_map:\n                site = full.sites[mut.site]\n                new_site = tables.sites.append(site)\n                site_map[mut.site] = new_site\n            new_mut = tables.mutations.append(\n                mut.replace(\n                    site=site_map[mut.site],\n                    node=node_map[mut.node],\n                    parent=mutation_map.get(mut.parent, tskit.NULL),\n                )\n            )\n            mutation_map[i] = new_mut\n\n    tables.sort()\n\n\ndef py_union(tables, other, nodes, record_provenance=True, add_populations=True):\n    \"\"\"\n    Python implementation of TableCollection.union().\n    \"\"\"\n    # mappings of id in other to new id in tables\n    # the +1 is to take care of mapping tskit.NULL(-1) to tskit.NULL\n    pop_map = [tskit.NULL for _ in range(other.populations.num_rows + 1)]\n    ind_map = [tskit.NULL for _ in range(other.individuals.num_rows + 1)]\n    node_map = [tskit.NULL for _ in range(other.nodes.num_rows + 1)]\n    site_map = [tskit.NULL for _ in range(other.sites.num_rows + 1)]\n    mut_map = [tskit.NULL for _ in range(other.mutations.num_rows + 1)]\n    original_num_individuals = tables.individuals.num_rows\n\n    for other_id, node in enumerate(other.nodes):\n        if nodes[other_id] != tskit.NULL and node.individual != tskit.NULL:\n            ind_map[node.individual] = tables.nodes[nodes[other_id]].individual\n\n    for other_id, node in enumerate(other.nodes):\n        if nodes[other_id] != tskit.NULL:\n            node_map[other_id] = nodes[other_id]\n        else:\n            if ind_map[node.individual] == tskit.NULL and node.individual != tskit.NULL:\n                ind = other.individuals[node.individual]\n                ind_id = tables.individuals.append(ind)\n                ind_map[node.individual] = ind_id\n            if pop_map[node.population] == tskit.NULL and node.population != tskit.NULL:\n                if not add_populations:\n                    pop_map[node.population] = node.population\n                else:\n                    pop = other.populations[node.population]\n                    pop_id = tables.populations.append(pop)\n                    pop_map[node.population] = pop_id\n            node_id = tables.nodes.append(\n                node.replace(\n                    population=pop_map[node.population],\n                    individual=ind_map[node.individual],\n                )\n            )\n            node_map[other_id] = node_id\n    individuals = tables.individuals\n    new_parents = individuals.parents\n    for i in range(\n        individuals.parents_offset[original_num_individuals], len(individuals.parents)\n    ):\n        new_parents[i] = ind_map[individuals.parents[i]]\n    individuals.parents = new_parents\n    for edge in other.edges:\n        if (nodes[edge.parent] == tskit.NULL) or (nodes[edge.child] == tskit.NULL):\n            tables.edges.append(\n                edge.replace(parent=node_map[edge.parent], child=node_map[edge.child])\n            )\n    for other_id, mut in enumerate(other.mutations):\n        if nodes[mut.node] == tskit.NULL:\n            # add site: may already be in tables, but we deduplicate\n            if site_map[mut.site] == tskit.NULL:\n                site = other.sites[mut.site]\n                site_id = tables.sites.append(site)\n                site_map[mut.site] = site_id\n            mut_id = tables.mutations.append(\n                mut.replace(\n                    site=site_map[mut.site],\n                    node=node_map[mut.node],\n                    parent=tskit.NULL,\n                )\n            )\n            mut_map[other_id] = mut_id\n    # migration table\n    # grafting provenance table\n    if record_provenance:\n        pass\n    # sorting, deduplicating sites, and re-computing mutation parents\n    tables.sort()\n    tables.deduplicate_sites()\n    # need to sort again since after deduplicating sites, mutations may not be\n    # sorted by time within sites\n    tables.sort()\n    tables.build_index()\n    tables.compute_mutation_parents()\n\n\ndef compute_mutation_times(ts):\n    \"\"\"\n    Compute the `time` column of a MutationTable in a TableCollection.\n    Finds the set of mutations on an edge that share a site and spreads\n    the times evenly over the edge.\n\n    :param TreeSequence ts: The tree sequence to compute for.  Need not\n        have a valid mutation time column.\n    \"\"\"\n    tables = ts.dump_tables()\n    mutations = tables.mutations\n\n    mutations_above_node = collections.defaultdict(list)\n    for mut_idx, mut in enumerate(mutations):\n        mutations_above_node[mut.node].append((mut_idx, mut))\n\n    mutations_at_site_on_edge = collections.defaultdict(list)\n    for edge_idx, edge in enumerate(tables.edges):\n        for mut_idx, mut in mutations_above_node[edge.child]:\n            if edge.left <= tables.sites[mut.site].position < edge.right:\n                mutations_at_site_on_edge[(mut.site, edge_idx)].append(mut_idx)\n\n    edges = tables.edges\n    nodes = tables.nodes\n    times = np.full(len(mutations), -1, dtype=np.float64)\n    for (_, edge_idx), edge_mutations in mutations_at_site_on_edge.items():\n        start_time = nodes[edges[edge_idx].child].time\n        end_time = nodes[edges[edge_idx].parent].time\n        duration = end_time - start_time\n        for i, mut_idx in enumerate(edge_mutations):\n            times[mut_idx] = end_time - (\n                duration * ((i + 1) / (len(edge_mutations) + 1))\n            )\n\n    # Mutations not on a edge (i.e. above a root) get given their node's time\n    for i in range(len(mutations)):\n        if times[i] == -1:\n            times[i] = nodes[mutations[i].node].time\n    tables.mutations.time = times\n    tables.sort()\n    return tables.mutations.time\n\n\ndef shuffle_tables(\n    tables,\n    seed,\n    shuffle_edges=True,\n    shuffle_populations=True,\n    shuffle_individuals=True,\n    shuffle_sites=True,\n    shuffle_mutations=True,\n    shuffle_migrations=True,\n    keep_mutation_parent_order=False,\n):\n    \"\"\"\n    Randomizes the order of rows in (possibly) all except the Node table.  Note\n    that if mutations are completely shuffled, then TableCollection.sort() will\n    not necessarily produce valid tables (unless all mutation times are present\n    and distinct), since currently only canonicalise puts parent mutations\n    before children.  However, setting keep_mutation_parent_order to True will\n    maintain the order of mutations within each site.\n\n    :param TableCollection tables: The table collection that is shuffled (in place).\n    \"\"\"\n    rng = random.Random(seed)\n    orig = tables.copy()\n    tables.nodes.clear()\n    tables.individuals.clear()\n    tables.populations.clear()\n    tables.edges.clear()\n    tables.sites.clear()\n    tables.mutations.clear()\n    tables.drop_index()\n    # populations\n    randomised_pops = list(enumerate(orig.populations))\n    if shuffle_populations:\n        rng.shuffle(randomised_pops)\n    pop_id_map = {tskit.NULL: tskit.NULL}\n    for j, p in randomised_pops:\n        pop_id_map[j] = tables.populations.append(p)\n    # individuals\n    randomised_inds = list(enumerate(orig.individuals))\n    if shuffle_individuals:\n        rng.shuffle(randomised_inds)\n    ind_id_map = {tskit.NULL: tskit.NULL}\n    for j, i in randomised_inds:\n        ind_id_map[j] = tables.individuals.append(i)\n    tables.individuals.parents = [\n        tskit.NULL if i == tskit.NULL else ind_id_map[i]\n        for i in tables.individuals.parents\n    ]\n    # nodes (same order, but remapped populations and individuals)\n    for n in orig.nodes:\n        tables.nodes.append(\n            n.replace(\n                population=pop_id_map[n.population],\n                individual=ind_id_map[n.individual],\n            )\n        )\n    # edges\n    randomised_edges = list(orig.edges)\n    if shuffle_edges:\n        rng.shuffle(randomised_edges)\n    for e in randomised_edges:\n        tables.edges.append(e)\n    # migrations\n    randomised_migrations = list(orig.migrations)\n    if shuffle_migrations:\n        rng.shuffle(randomised_migrations)\n    for m in randomised_migrations:\n        tables.migrations.append(\n            m.replace(source=pop_id_map[m.source], dest=pop_id_map[m.dest])\n        )\n    # sites\n    randomised_sites = list(enumerate(orig.sites))\n    if shuffle_sites:\n        rng.shuffle(randomised_sites)\n    site_id_map = {}\n    for j, s in randomised_sites:\n        site_id_map[j] = tables.sites.append(s)\n    # mutations\n    randomised_mutations = list(enumerate(orig.mutations))\n    if shuffle_mutations:\n        if keep_mutation_parent_order:\n            # randomise *except* keeping parent mutations before children\n            mut_site_order = [mut.site for mut in orig.mutations]\n            rng.shuffle(mut_site_order)\n            mut_by_site = {s: [] for s in mut_site_order}\n            for j, m in enumerate(orig.mutations):\n                mut_by_site[m.site].insert(0, (j, m))\n            randomised_mutations = []\n            for s in mut_site_order:\n                randomised_mutations.append(mut_by_site[s].pop())\n        else:\n            rng.shuffle(randomised_mutations)\n    mut_id_map = {tskit.NULL: tskit.NULL}\n    for j, (k, _) in enumerate(randomised_mutations):\n        mut_id_map[k] = j\n    for _, m in randomised_mutations:\n        tables.mutations.append(\n            m.replace(site=site_id_map[m.site], parent=mut_id_map[m.parent])\n        )\n    if keep_mutation_parent_order:\n        assert np.all(tables.mutations.parent < np.arange(tables.mutations.num_rows))\n    return tables\n\n\ndef cmp_site(i, j, tables):\n    ret = tables.sites.position[i] - tables.sites.position[j]\n    if ret == 0:\n        ret = i - j\n    return ret\n\n\ndef cmp_mutation(i, j, tables, site_order, num_descendants=None):\n    site_i = tables.mutations.site[i]\n    site_j = tables.mutations.site[j]\n    ret = site_order[site_i] - site_order[site_j]\n    # Within a particular site sort by time if known, then node time fallback\n    if (\n        ret == 0\n        and (not tskit.is_unknown_time(tables.mutations.time[i]))\n        and (not tskit.is_unknown_time(tables.mutations.time[j]))\n    ):\n        ret = tables.mutations.time[j] - tables.mutations.time[i]\n    if ret == 0:\n        # Use node times as fallback when mutation times are unknown or equal\n        node_time_i = tables.nodes.time[tables.mutations.node[i]]\n        node_time_j = tables.nodes.time[tables.mutations.node[j]]\n        ret = node_time_j - node_time_i\n    if ret == 0:\n        ret = num_descendants[j] - num_descendants[i]\n    # Tiebreaker: node\n    if ret == 0:\n        ret = tables.mutations.node[i] - tables.mutations.node[j]\n    # Final tiebreaker: ID\n    if ret == 0:\n        ret = i - j\n    return ret\n\n\ndef cmp_edge(i, j, tables):\n    ret = (\n        tables.nodes.time[tables.edges.parent[i]]\n        - tables.nodes.time[tables.edges.parent[j]]\n    )\n    if ret == 0:\n        ret = tables.edges.parent[i] - tables.edges.parent[j]\n    if ret == 0:\n        ret = tables.edges.child[i] - tables.edges.child[j]\n    if ret == 0:\n        ret = tables.edges.left[i] - tables.edges.left[j]\n    return ret\n\n\ndef cmp_migration(i, j, tables):\n    ret = tables.migrations.time[i] - tables.migrations.time[j]\n    if ret == 0:\n        ret = tables.migrations.source[i] - tables.migrations.source[j]\n    if ret == 0:\n        ret = tables.migrations.dest[i] - tables.migrations.dest[j]\n    if ret == 0:\n        ret = tables.migrations.left[i] - tables.migrations.left[j]\n    if ret == 0:\n        ret = tables.migrations.node[i] - tables.migrations.node[j]\n    return ret\n\n\ndef cmp_individual_canonical(i, j, tables, num_descendants):\n    ret = num_descendants[j] - num_descendants[i]\n    if ret == 0:\n        node_i = node_j = tables.nodes.num_rows\n        ni = np.where(tables.nodes.individual == i)[0]\n        if len(ni) > 0:\n            node_i = np.min(ni)\n        nj = np.where(tables.nodes.individual == j)[0]\n        if len(nj) > 0:\n            node_j = np.min(nj)\n        ret = node_i - node_j\n    if ret == 0:\n        ret = i - j\n    return ret\n\n\ndef compute_mutation_num_descendants(tables):\n    mutations = tables.mutations\n    num_descendants = np.zeros(mutations.num_rows)\n    for p in mutations.parent:\n        while p != tskit.NULL:\n            num_descendants[p] += 1\n            p = mutations.parent[p]\n    return num_descendants\n\n\ndef compute_individual_num_descendants(tables):\n    # adapted from sort_individual_table\n    individuals = tables.individuals\n    num_individuals = individuals.num_rows\n    num_descendants = np.zeros((num_individuals,), np.int64)\n\n    # First find the set of individuals that have no children\n    # by creating an array of incoming edge counts\n    incoming_edge_count = np.zeros((num_individuals,), np.int64)\n    for parent in individuals.parents:\n        if parent != tskit.NULL:\n            incoming_edge_count[parent] += 1\n    todo = np.full((num_individuals + 1,), -1, np.int64)\n    current_todo = 0\n    todo_insertion_point = 0\n    for individual, num_edges in enumerate(incoming_edge_count):\n        if num_edges == 0:\n            todo[todo_insertion_point] = individual\n            todo_insertion_point += 1\n\n    # Now process individuals from the set that have no children, updating their\n    # parents' information as we go, and adding their parents to the list if\n    # this was their last child\n    while todo[current_todo] != -1:\n        individual = todo[current_todo]\n        current_todo += 1\n        for parent in individuals.parents[\n            individuals.parents_offset[individual] : individuals.parents_offset[\n                individual + 1\n            ]\n        ]:\n            if parent != tskit.NULL:\n                incoming_edge_count[parent] -= 1\n                num_descendants[parent] += 1 + num_descendants[individual]\n                if incoming_edge_count[parent] == 0:\n                    todo[todo_insertion_point] = parent\n                    todo_insertion_point += 1\n\n    if num_individuals > 0:\n        assert np.min(incoming_edge_count) >= 0\n        if np.max(incoming_edge_count) > 0:\n            raise ValueError(\"Individual pedigree has cycles\")\n    return num_descendants\n\n\ndef py_canonicalise(tables, remove_unreferenced=True):\n    tables.subset(\n        np.arange(tables.nodes.num_rows),\n        record_provenance=False,\n        remove_unreferenced=remove_unreferenced,\n    )\n    py_sort(tables, canonical=True)\n\n\ndef py_sort(tables, canonical=False):\n    copy = tables.copy()\n    tables.edges.clear()\n    tables.sites.clear()\n    tables.mutations.clear()\n    tables.migrations.clear()\n    edge_key = functools.cmp_to_key(lambda a, b: cmp_edge(a, b, tables=copy))\n    sorted_edges = sorted(range(copy.edges.num_rows), key=edge_key)\n    site_key = functools.cmp_to_key(lambda a, b: cmp_site(a, b, tables=copy))\n    sorted_sites = sorted(range(copy.sites.num_rows), key=site_key)\n    site_id_map = {k: j for j, k in enumerate(sorted_sites)}\n    site_order = np.argsort(sorted_sites)\n    # Canonical sort, and regular sort are the same for mutations\n    mut_num_descendants = compute_mutation_num_descendants(copy)\n    mut_key = functools.cmp_to_key(\n        lambda a, b: cmp_mutation(\n            a,\n            b,\n            tables=copy,\n            site_order=site_order,\n            num_descendants=mut_num_descendants,\n        )\n    )\n    sorted_muts = sorted(range(copy.mutations.num_rows), key=mut_key)\n    mut_id_map = {k: j for j, k in enumerate(sorted_muts)}\n    mut_id_map[tskit.NULL] = tskit.NULL\n    mig_key = functools.cmp_to_key(lambda a, b: cmp_migration(a, b, tables=copy))\n    sorted_migs = sorted(range(copy.migrations.num_rows), key=mig_key)\n    for edge_id in sorted_edges:\n        tables.edges.append(copy.edges[edge_id])\n    for site_id in sorted_sites:\n        tables.sites.append(copy.sites[site_id])\n    for mut_id in sorted_muts:\n        tables.mutations.append(\n            copy.mutations[mut_id].replace(\n                site=site_id_map[copy.mutations[mut_id].site],\n                parent=mut_id_map[copy.mutations[mut_id].parent],\n            )\n        )\n    for mig_id in sorted_migs:\n        tables.migrations.append(copy.migrations[mig_id])\n\n    # individuals\n    if canonical:\n        tables.individuals.clear()\n        ind_num_descendants = compute_individual_num_descendants(copy)\n        ind_key = functools.cmp_to_key(\n            lambda a, b: cmp_individual_canonical(\n                a,\n                b,\n                tables=copy,\n                num_descendants=ind_num_descendants,\n            )\n        )\n        sorted_inds = sorted(range(copy.individuals.num_rows), key=ind_key)\n        ind_id_map = {k: j for j, k in enumerate(sorted_inds)}\n        ind_id_map[tskit.NULL] = tskit.NULL\n        for ind_id in sorted_inds:\n            tables.individuals.append(\n                copy.individuals[ind_id].replace(\n                    parents=[ind_id_map[p] for p in copy.individuals[ind_id].parents],\n                )\n            )\n        tables.nodes.individual = [ind_id_map[i] for i in tables.nodes.individual]\n\n\ndef algorithm_T(ts):\n    \"\"\"\n    Simple implementation of algorithm T from the PLOS paper, taking into\n    account tree sequences with gaps and other complexities.\n    \"\"\"\n    sequence_length = ts.sequence_length\n    edges = list(ts.edges())\n    M = len(edges)\n    time = [ts.node(edge.parent).time for edge in edges]\n    in_order = sorted(\n        range(M),\n        key=lambda j: (edges[j].left, time[j], edges[j].parent, edges[j].child),\n    )\n    out_order = sorted(\n        range(M),\n        key=lambda j: (edges[j].right, -time[j], -edges[j].parent, -edges[j].child),\n    )\n    j = 0\n    k = 0\n    left = 0\n    parent = [-1 for _ in range(ts.num_nodes)]\n    while j < M or left < sequence_length:\n        while k < M and edges[out_order[k]].right == left:\n            edge = edges[out_order[k]]\n            parent[edge.child] = -1\n            k += 1\n        while j < M and edges[in_order[j]].left == left:\n            edge = edges[in_order[j]]\n            parent[edge.child] = edge.parent\n            j += 1\n        right = sequence_length\n        if j < M:\n            right = min(right, edges[in_order[j]].left)\n        if k < M:\n            right = min(right, edges[out_order[k]].right)\n        yield (left, right), parent\n        left = right\n\n\nclass QuintuplyLinkedTree:\n    def __init__(self, n, root_threshold=1):\n        self.root_threshold = root_threshold\n        self.parent = np.zeros(n + 1, dtype=np.int32) - 1\n        self.left_child = np.zeros(n + 1, dtype=np.int32) - 1\n        self.right_child = np.zeros(n + 1, dtype=np.int32) - 1\n        self.left_sib = np.zeros(n + 1, dtype=np.int32) - 1\n        self.right_sib = np.zeros(n + 1, dtype=np.int32) - 1\n        self.num_samples = np.zeros(n + 1, dtype=np.int32)\n        self.num_edges = 0\n        self.num_children = np.zeros(n + 1, dtype=np.int32)\n        self.edge = np.zeros(n + 1, dtype=np.int32) - 1\n\n    def __str__(self):\n        s = \"id\\tparent\\tlchild\\trchild\\tlsib\\trsib\\tnsamp\\tnchild\\tedge\\n\"\n        for j in range(len(self.parent)):\n            s += (\n                f\"{j}\\t{self.parent[j]}\\t\"\n                f\"{self.left_child[j]}\\t{self.right_child[j]}\\t\"\n                f\"{self.left_sib[j]}\\t{self.right_sib[j]}\\t\"\n                f\"{self.num_samples[j]}\\t\"\n                f\"{self.num_children[j]}\\t\"\n                f\"{self.edge[j]}\\n\"\n            )\n        return s\n\n    def roots(self):\n        roots = []\n        u = self.left_child[-1]\n        while u != -1:\n            roots.append(u)\n            u = self.right_sib[u]\n        return roots\n\n    def remove_branch(self, p, c):\n        lsib = self.left_sib[c]\n        rsib = self.right_sib[c]\n        if lsib == -1:\n            self.left_child[p] = rsib\n        else:\n            self.right_sib[lsib] = rsib\n        if rsib == -1:\n            self.right_child[p] = lsib\n        else:\n            self.left_sib[rsib] = lsib\n        self.parent[c] = -1\n        self.left_sib[c] = -1\n        self.right_sib[c] = -1\n        self.num_children[p] -= 1\n\n    def insert_branch(self, p, c):\n        assert self.parent[c] == -1, \"contradictory edges\"\n        self.parent[c] = p\n        u = self.right_child[p]\n        if u == -1:\n            self.left_child[p] = c\n            self.left_sib[c] = -1\n            self.right_sib[c] = -1\n        else:\n            self.right_sib[u] = c\n            self.left_sib[c] = u\n            self.right_sib[c] = -1\n        self.right_child[p] = c\n        self.num_children[p] += 1\n\n    def is_potential_root(self, u):\n        return self.num_samples[u] >= self.root_threshold\n\n    # Note we cheat a bit here and use the -1 == last element semantics from Python.\n    # We could use self.insert_branch(N, root) and then set self.parent[root] = -1.\n    def insert_root(self, root):\n        self.insert_branch(-1, root)\n\n    def remove_root(self, root):\n        self.remove_branch(-1, root)\n\n    def remove_edge(self, edge):\n        self.remove_branch(edge.parent, edge.child)\n        self.num_edges -= 1\n        self.edge[edge.child] = -1\n\n        u = edge.parent\n        while u != -1:\n            path_end = u\n            path_end_was_root = self.is_potential_root(u)\n            self.num_samples[u] -= self.num_samples[edge.child]\n            u = self.parent[u]\n\n        if path_end_was_root and not self.is_potential_root(path_end):\n            self.remove_root(path_end)\n        if self.is_potential_root(edge.child):\n            self.insert_root(edge.child)\n\n    def insert_edge(self, edge):\n        u = edge.parent\n        while u != -1:\n            path_end = u\n            path_end_was_root = self.is_potential_root(u)\n            self.num_samples[u] += self.num_samples[edge.child]\n            u = self.parent[u]\n\n        if self.is_potential_root(edge.child):\n            self.remove_root(edge.child)\n        if self.is_potential_root(path_end) and not path_end_was_root:\n            self.insert_root(path_end)\n\n        self.insert_branch(edge.parent, edge.child)\n        self.num_edges += 1\n        self.edge[edge.child] = edge.id\n\n\ndef algorithm_R(ts, root_threshold=1):\n    \"\"\"\n    Quintuply linked tree with root tracking.\n    \"\"\"\n    sequence_length = ts.sequence_length\n    N = ts.num_nodes\n    M = ts.num_edges\n    tree = QuintuplyLinkedTree(N, root_threshold=root_threshold)\n    edges = list(ts.edges())\n    in_order = ts.tables.indexes.edge_insertion_order\n    out_order = ts.tables.indexes.edge_removal_order\n\n    # Initialise the tree\n    for u in ts.samples():\n        tree.num_samples[u] = 1\n        if tree.is_potential_root(u):\n            tree.insert_root(u)\n\n    j = 0\n    k = 0\n    left = 0\n    while j < M or left < sequence_length:\n        while k < M and edges[out_order[k]].right == left:\n            tree.remove_edge(edges[out_order[k]])\n            k += 1\n        while j < M and edges[in_order[j]].left == left:\n            tree.insert_edge(edges[in_order[j]])\n            j += 1\n        right = sequence_length\n        if j < M:\n            right = min(right, edges[in_order[j]].left)\n        if k < M:\n            right = min(right, edges[out_order[k]].right)\n        yield (left, right), tree\n        left = right\n\n\nclass SampleListTree:\n    \"\"\"\n    Straightforward implementation of the quintuply linked tree for developing\n    and testing the sample lists feature.\n\n    NOTE: The interface is pretty awkward; it's not intended for anything other\n    than testing.\n    \"\"\"\n\n    def __init__(self, tree_sequence, tracked_samples=None):\n        self.tree_sequence = tree_sequence\n        num_nodes = tree_sequence.num_nodes\n        # Quintuply linked tree.\n        self.parent = [-1 for _ in range(num_nodes)]\n        self.left_sib = [-1 for _ in range(num_nodes)]\n        self.right_sib = [-1 for _ in range(num_nodes)]\n        self.left_child = [-1 for _ in range(num_nodes)]\n        self.right_child = [-1 for _ in range(num_nodes)]\n        self.left_sample = [-1 for _ in range(num_nodes)]\n        self.right_sample = [-1 for _ in range(num_nodes)]\n        # This is too long, but it's convenient for printing.\n        self.next_sample = [-1 for _ in range(num_nodes)]\n\n        self.sample_index_map = [-1 for _ in range(num_nodes)]\n        samples = tracked_samples\n        if tracked_samples is None:\n            samples = list(tree_sequence.samples())\n        for j in range(len(samples)):\n            u = samples[j]\n            self.sample_index_map[u] = j\n            self.left_sample[u] = j\n            self.right_sample[u] = j\n\n    def __str__(self):\n        fmt = \"{:<5}{:>8}{:>8}{:>8}{:>8}{:>8}{:>8}{:>8}{:>8}\\n\"\n        s = fmt.format(\n            \"node\",\n            \"parent\",\n            \"lsib\",\n            \"rsib\",\n            \"lchild\",\n            \"rchild\",\n            \"nsamp\",\n            \"lsamp\",\n            \"rsamp\",\n        )\n        for u in range(self.tree_sequence.num_nodes):\n            s += fmt.format(\n                u,\n                self.parent[u],\n                self.left_sib[u],\n                self.right_sib[u],\n                self.left_child[u],\n                self.right_child[u],\n                self.next_sample[u],\n                self.left_sample[u],\n                self.right_sample[u],\n            )\n        # Strip off trailing newline\n        return s[:-1]\n\n    def remove_edge(self, edge):\n        p = edge.parent\n        c = edge.child\n        lsib = self.left_sib[c]\n        rsib = self.right_sib[c]\n        if lsib == -1:\n            self.left_child[p] = rsib\n        else:\n            self.right_sib[lsib] = rsib\n        if rsib == -1:\n            self.right_child[p] = lsib\n        else:\n            self.left_sib[rsib] = lsib\n        self.parent[c] = -1\n        self.left_sib[c] = -1\n        self.right_sib[c] = -1\n\n    def insert_edge(self, edge):\n        p = edge.parent\n        c = edge.child\n        assert self.parent[c] == -1, \"contradictory edges\"\n        self.parent[c] = p\n        u = self.right_child[p]\n        if u == -1:\n            self.left_child[p] = c\n            self.left_sib[c] = -1\n            self.right_sib[c] = -1\n        else:\n            self.right_sib[u] = c\n            self.left_sib[c] = u\n            self.right_sib[c] = -1\n        self.right_child[p] = c\n\n    def update_sample_list(self, parent):\n        # This can surely be done more efficiently and elegantly. We are iterating\n        # up the tree and iterating over all the siblings of the nodes we visit,\n        # rebuilding the links as we go. This results in visiting the same nodes\n        # over again, which if we have nodes with many siblings will surely be\n        # expensive. Another consequence of the current approach is that the\n        # next pointer contains an arbitrary value for the rightmost sample of\n        # every root. This should point to NULL ideally, but it's quite tricky\n        # to do in practise. It's easier to have a slightly uglier iteration\n        # over samples.\n        #\n        # In the future it would be good have a more efficient version of this\n        # algorithm using next and prev pointers that we keep up to date at all\n        # times, and which we use to patch the lists together more efficiently.\n        u = parent\n        while u != -1:\n            sample_index = self.sample_index_map[u]\n            if sample_index != -1:\n                self.right_sample[u] = self.left_sample[u]\n            else:\n                self.right_sample[u] = -1\n                self.left_sample[u] = -1\n            v = self.left_child[u]\n            while v != -1:\n                if self.left_sample[v] != -1:\n                    assert self.right_sample[v] != -1\n                    if self.left_sample[u] == -1:\n                        self.left_sample[u] = self.left_sample[v]\n                        self.right_sample[u] = self.right_sample[v]\n                    else:\n                        self.next_sample[self.right_sample[u]] = self.left_sample[v]\n                        self.right_sample[u] = self.right_sample[v]\n                v = self.right_sib[v]\n            u = self.parent[u]\n\n    def sample_lists(self):\n        \"\"\"\n        Iterate over the the trees in this tree sequence, yielding the (left, right)\n        interval tuples. The tree state is maintained internally.\n\n        See note above about the cruddiness of this interface.\n        \"\"\"\n        ts = self.tree_sequence\n        sequence_length = ts.sequence_length\n        edges = list(ts.edges())\n        M = len(edges)\n        in_order = ts.tables.indexes.edge_insertion_order\n        out_order = ts.tables.indexes.edge_removal_order\n        j = 0\n        k = 0\n        left = 0\n\n        while j < M or left < sequence_length:\n            while k < M and edges[out_order[k]].right == left:\n                edge = edges[out_order[k]]\n                self.remove_edge(edge)\n                self.update_sample_list(edge.parent)\n                k += 1\n            while j < M and edges[in_order[j]].left == left:\n                edge = edges[in_order[j]]\n                self.insert_edge(edge)\n                self.update_sample_list(edge.parent)\n                j += 1\n            right = sequence_length\n            if j < M:\n                right = min(right, edges[in_order[j]].left)\n            if k < M:\n                right = min(right, edges[out_order[k]].right)\n            yield left, right\n            left = right\n\n\nclass LegacyRootThresholdTree:\n    \"\"\"\n    Implementation of the quintuply linked tree with root tracking using the\n    pre C 1.0/Python 0.4.0 algorithm. We keep this version around to make sure\n    that we can be clear what the differences in the semantics of the new\n    and old versions are.\n\n    NOTE: The interface is pretty awkward; it's not intended for anything other\n    than testing.\n    \"\"\"\n\n    def __init__(self, tree_sequence, root_threshold=1):\n        self.tree_sequence = tree_sequence\n        self.root_threshold = root_threshold\n        num_nodes = tree_sequence.num_nodes\n        # Quintuply linked tree.\n        self.parent = [-1 for _ in range(num_nodes)]\n        self.left_sib = [-1 for _ in range(num_nodes)]\n        self.right_sib = [-1 for _ in range(num_nodes)]\n        self.left_child = [-1 for _ in range(num_nodes)]\n        self.right_child = [-1 for _ in range(num_nodes)]\n        self.num_samples = [0 for _ in range(num_nodes)]\n        self.left_root = -1\n        for u in tree_sequence.samples()[::-1]:\n            self.num_samples[u] = 1\n            if self.root_threshold == 1:\n                self.add_root(u)\n\n    def __str__(self):\n        fmt = \"{:<5}{:>8}{:>8}{:>8}{:>8}{:>8}{:>8}\\n\"\n        s = f\"roots = {self.roots()}\\n\"\n        s += fmt.format(\"node\", \"parent\", \"lsib\", \"rsib\", \"lchild\", \"rchild\", \"nsamp\")\n        for u in range(self.tree_sequence.num_nodes):\n            s += fmt.format(\n                u,\n                self.parent[u],\n                self.left_sib[u],\n                self.right_sib[u],\n                self.left_child[u],\n                self.right_child[u],\n                self.num_samples[u],\n            )\n        # Strip off trailing newline\n        return s[:-1]\n\n    def is_root(self, u):\n        return self.num_samples[u] >= self.root_threshold\n\n    def roots(self):\n        roots = []\n        u = self.left_root\n        while u != -1:\n            roots.append(u)\n            u = self.right_sib[u]\n        return roots\n\n    def add_root(self, root):\n        if self.left_root != tskit.NULL:\n            lroot = self.left_sib[self.left_root]\n            if lroot != tskit.NULL:\n                self.right_sib[lroot] = root\n            self.left_sib[root] = lroot\n            self.left_sib[self.left_root] = root\n        self.right_sib[root] = self.left_root\n        self.left_root = root\n\n    def remove_root(self, root):\n        lroot = self.left_sib[root]\n        rroot = self.right_sib[root]\n        self.left_root = tskit.NULL\n        if lroot != tskit.NULL:\n            self.right_sib[lroot] = rroot\n            self.left_root = lroot\n        if rroot != tskit.NULL:\n            self.left_sib[rroot] = lroot\n            self.left_root = rroot\n        self.left_sib[root] = tskit.NULL\n        self.right_sib[root] = tskit.NULL\n\n    def remove_edge(self, edge):\n        p = edge.parent\n        c = edge.child\n        lsib = self.left_sib[c]\n        rsib = self.right_sib[c]\n        if lsib == -1:\n            self.left_child[p] = rsib\n        else:\n            self.right_sib[lsib] = rsib\n        if rsib == -1:\n            self.right_child[p] = lsib\n        else:\n            self.left_sib[rsib] = lsib\n        self.parent[c] = -1\n        self.left_sib[c] = -1\n        self.right_sib[c] = -1\n\n        u = edge.parent\n        while u != -1:\n            path_end = u\n            path_end_was_root = self.is_root(u)\n            self.num_samples[u] -= self.num_samples[c]\n            u = self.parent[u]\n        if path_end_was_root and not self.is_root(path_end):\n            self.remove_root(path_end)\n        if self.is_root(c):\n            self.add_root(c)\n\n    def insert_edge(self, edge):\n        p = edge.parent\n        c = edge.child\n        assert self.parent[c] == -1, \"contradictory edges\"\n        self.parent[c] = p\n        u = self.right_child[p]\n        lsib = self.left_sib[c]\n        rsib = self.right_sib[c]\n        if u == -1:\n            self.left_child[p] = c\n            self.left_sib[c] = -1\n            self.right_sib[c] = -1\n        else:\n            self.right_sib[u] = c\n            self.left_sib[c] = u\n            self.right_sib[c] = -1\n        self.right_child[p] = c\n\n        u = edge.parent\n        while u != -1:\n            path_end = u\n            path_end_was_root = self.is_root(u)\n            self.num_samples[u] += self.num_samples[c]\n            u = self.parent[u]\n\n        if self.is_root(c):\n            if path_end_was_root:\n                # Remove c from root list.\n                # Note: we don't use the remove_root function here because\n                # it assumes that the node is at the end of a path\n                self.left_root = tskit.NULL\n                if lsib != tskit.NULL:\n                    self.right_sib[lsib] = rsib\n                    self.left_root = lsib\n                if rsib != tskit.NULL:\n                    self.left_sib[rsib] = lsib\n                    self.left_root = rsib\n            else:\n                # Replace c with path_end in the root list\n                if lsib != tskit.NULL:\n                    self.right_sib[lsib] = path_end\n                if rsib != tskit.NULL:\n                    self.left_sib[rsib] = path_end\n                self.left_sib[path_end] = lsib\n                self.right_sib[path_end] = rsib\n                self.left_root = path_end\n        else:\n            if self.is_root(path_end) and not path_end_was_root:\n                self.add_root(path_end)\n\n    def iterate(self):\n        \"\"\"\n        Iterate over the the trees in this tree sequence, yielding the (left, right)\n        interval tuples. The tree state is maintained internally.\n        \"\"\"\n        ts = self.tree_sequence\n        sequence_length = ts.sequence_length\n        edges = list(ts.edges())\n        M = len(edges)\n        in_order = ts.tables.indexes.edge_insertion_order\n        out_order = ts.tables.indexes.edge_removal_order\n        j = 0\n        k = 0\n        left = 0\n\n        while j < M or left < sequence_length:\n            while k < M and edges[out_order[k]].right == left:\n                edge = edges[out_order[k]]\n                self.remove_edge(edge)\n                k += 1\n            while j < M and edges[in_order[j]].left == left:\n                edge = edges[in_order[j]]\n                self.insert_edge(edge)\n                j += 1\n            if self.left_root != tskit.NULL:\n                while self.left_sib[self.left_root] != tskit.NULL:\n                    self.left_root = self.left_sib[self.left_root]\n            right = sequence_length\n            if j < M:\n                right = min(right, edges[in_order[j]].left)\n            if k < M:\n                right = min(right, edges[out_order[k]].right)\n            yield left, right\n            left = right\n\n\nFORWARD = 1\nREVERSE = -1\n\n\n@dataclasses.dataclass\nclass Interval:\n    left: float\n    right: float\n\n    def __iter__(self):\n        yield self.left\n        yield self.right\n\n\n@dataclasses.dataclass\nclass EdgeRange:\n    start: int\n    stop: int\n    order: list\n\n\nclass TreeIndexes:\n    def __init__(self, ts):\n        self.ts = ts\n        self.index = -1\n        self.direction = 0\n        self.interval = Interval(0, 0)\n        self.in_range = EdgeRange(0, 0, None)\n        self.out_range = EdgeRange(0, 0, None)\n\n    def __str__(self):\n        s = f\"index: {self.index}\\ninterval: {self.interval}\\n\"\n        s += f\"direction: {self.direction}\\n\"\n        s += f\"in_range: {self.in_range}\\n\"\n        s += f\"out_range: {self.out_range}\\n\"\n        return s\n\n    def assert_equal(self, other):\n        assert self.index == other.index\n        assert self.direction == other.direction\n        assert self.interval == other.interval\n\n    def set_null(self):\n        self.index = -1\n        self.interval.left = 0\n        self.interval.right = 0\n\n    def next(self):  # NOQA: A003\n        M = self.ts.num_edges\n        breakpoints = self.ts.breakpoints(as_array=True)\n        left_coords = self.ts.edges_left\n        left_order = self.ts.indexes_edge_insertion_order\n        right_coords = self.ts.edges_right\n        right_order = self.ts.indexes_edge_removal_order\n\n        if self.index == -1:\n            self.interval.right = 0\n            self.out_range.stop = 0\n            self.in_range.stop = 0\n            self.direction = FORWARD\n\n        if self.direction == FORWARD:\n            left_current_index = self.in_range.stop\n            right_current_index = self.out_range.stop\n        else:\n            left_current_index = self.out_range.stop + 1\n            right_current_index = self.in_range.stop + 1\n\n        left = self.interval.right\n\n        j = right_current_index\n        self.out_range.start = j\n        while j < M and right_coords[right_order[j]] == left:\n            j += 1\n        self.out_range.stop = j\n        self.out_range.order = right_order\n\n        j = left_current_index\n        self.in_range.start = j\n        while j < M and left_coords[left_order[j]] == left:\n            j += 1\n        self.in_range.stop = j\n        self.in_range.order = left_order\n\n        self.direction = FORWARD\n        self.index += 1\n        if self.index == self.ts.num_trees:\n            self.set_null()\n        else:\n            self.interval.left = left\n            self.interval.right = breakpoints[self.index + 1]\n        return self.index != -1\n\n    def prev(self):\n        M = self.ts.num_edges\n        breakpoints = self.ts.breakpoints(as_array=True)\n        right_coords = self.ts.edges_right\n        right_order = self.ts.indexes_edge_removal_order\n        left_coords = self.ts.edges_left\n        left_order = self.ts.indexes_edge_insertion_order\n\n        if self.index == -1:\n            self.index = self.ts.num_trees\n            self.interval.left = self.ts.sequence_length\n            self.in_range.stop = M - 1\n            self.out_range.stop = M - 1\n            self.direction = REVERSE\n\n        if self.direction == REVERSE:\n            left_current_index = self.out_range.stop\n            right_current_index = self.in_range.stop\n        else:\n            left_current_index = self.in_range.stop - 1\n            right_current_index = self.out_range.stop - 1\n\n        right = self.interval.left\n\n        j = left_current_index\n        self.out_range.start = j\n        while j >= 0 and left_coords[left_order[j]] == right:\n            j -= 1\n        self.out_range.stop = j\n        self.out_range.order = left_order\n\n        j = right_current_index\n        self.in_range.start = j\n        while j >= 0 and right_coords[right_order[j]] == right:\n            j -= 1\n        self.in_range.stop = j\n        self.in_range.order = right_order\n\n        self.direction = REVERSE\n        self.index -= 1\n        if self.index == -1:\n            self.set_null()\n        else:\n            self.interval.left = breakpoints[self.index]\n            self.interval.right = right\n        return self.index != -1\n\n    def seek_forward(self, index):\n        # NOTE this is still in development and not fully tested.\n        assert index >= self.index and index < self.ts.num_trees\n        M = self.ts.num_edges\n        breakpoints = self.ts.breakpoints(as_array=True)\n        left_coords = self.ts.edges_left\n        left_order = self.ts.indexes_edge_insertion_order\n        right_coords = self.ts.edges_right\n        right_order = self.ts.indexes_edge_removal_order\n\n        if self.index == -1:\n            self.interval.right = 0\n            self.out_range.stop = 0\n            self.in_range.stop = 0\n            self.direction = FORWARD\n\n        if self.direction == FORWARD:\n            left_current_index = self.in_range.stop\n            right_current_index = self.out_range.stop\n        else:\n            left_current_index = self.out_range.stop + 1\n            right_current_index = self.in_range.stop + 1\n\n        self.direction = FORWARD\n        left = breakpoints[index]\n\n        # The range of edges we need consider for removal starts\n        # at the current right index and ends at the first edge\n        # where the right coordinate is equal to the new tree's\n        # left coordinate.\n        j = right_current_index\n        self.out_range.start = j\n        # TODO This could be done with binary search\n        while j < M and right_coords[right_order[j]] <= left:\n            j += 1\n        self.out_range.stop = j\n\n        if self.index == -1:\n            # No edges, so out_range should be empty\n            self.out_range.start = self.out_range.stop\n\n        # The range of edges we need to consider for the new tree\n        # must have right coordinate > left\n        j = left_current_index\n        while j < M and right_coords[left_order[j]] <= left:\n            j += 1\n        self.in_range.start = j\n        # TODO this could be done with a binary search\n        while j < M and left_coords[left_order[j]] <= left:\n            j += 1\n        self.in_range.stop = j\n\n        self.interval.left = left\n        self.interval.right = breakpoints[index + 1]\n        self.out_range.order = right_order\n        self.in_range.order = left_order\n        self.index = index\n\n    def seek_backward(self, index):\n        # NOTE this is still in development and not fully tested.\n        assert index >= 0\n        M = self.ts.num_edges\n        breakpoints = self.ts.breakpoints(as_array=True)\n        left_coords = self.ts.edges_left\n        left_order = self.ts.indexes_edge_insertion_order\n        right_coords = self.ts.edges_right\n        right_order = self.ts.indexes_edge_removal_order\n\n        if self.index == -1:\n            assert index < self.ts.num_trees\n            self.index = self.ts.num_trees\n            self.interval.left = self.ts.sequence_length\n            self.in_range.stop = M - 1\n            self.out_range.stop = M - 1\n            self.direction = REVERSE\n        else:\n            assert index <= self.index\n\n        if self.direction == REVERSE:\n            left_current_index = self.out_range.stop\n            right_current_index = self.in_range.stop\n        else:\n            left_current_index = self.in_range.stop - 1\n            right_current_index = self.out_range.stop - 1\n\n        self.direction = REVERSE\n        right = breakpoints[index + 1]\n\n        # The range of edges we need consider for removal starts\n        # at the current left index and ends at the first edge\n        # where the left coordinate is equal to the new tree's\n        # right coordinate.\n        j = left_current_index\n        self.out_range.start = j\n        # TODO This could be done with binary search\n        while j >= 0 and left_coords[left_order[j]] >= right:\n            j -= 1\n        self.out_range.stop = j\n\n        if self.index == self.ts.num_trees:\n            # No edges, so out_range should be empty\n            self.out_range.start = self.out_range.stop\n\n        # The range of edges we need to consider for the new tree\n        # must have left coordinate < right\n        j = right_current_index\n        while j >= 0 and left_coords[right_order[j]] >= right:\n            j -= 1\n        self.in_range.start = j\n        # We stop at the first edge with right coordinate < right\n        while j >= 0 and right_coords[right_order[j]] >= right:\n            j -= 1\n        self.in_range.stop = j\n\n        self.interval.right = right\n        self.interval.left = breakpoints[index]\n        self.out_range.order = left_order\n        self.in_range.order = right_order\n        self.index = index\n\n    def step(self, direction):\n        if direction == FORWARD:\n            return self.next()\n        elif direction == REVERSE:\n            return self.prev()\n        else:\n            raise ValueError(\"Direction must be FORWARD (+1) or REVERSE (-1)\")\n\n\ndef mean_descendants(ts, reference_sets):\n    \"\"\"\n    Returns the mean number of nodes from the specified reference sets\n    where the node is ancestral to at least one of the reference nodes. Returns a\n    ``(ts.num_nodes, len(reference_sets))`` dimensional numpy array.\n    \"\"\"\n    # Check the inputs (could be done more efficiently here)\n    all_reference_nodes = set()\n    for reference_set in reference_sets:\n        U = set(reference_set)\n        if len(U) != len(reference_set):\n            raise ValueError(\"Cannot have duplicate values within set\")\n        if len(all_reference_nodes & U) != 0:\n            raise ValueError(\"Sample sets must be disjoint\")\n        all_reference_nodes |= U\n\n    K = len(reference_sets)\n    C = np.zeros((ts.num_nodes, K))\n    parent = np.zeros(ts.num_nodes, dtype=int) - 1\n    # The -1th element of ref_count is for all nodes in the reference set.\n    ref_count = np.zeros((ts.num_nodes, K + 1), dtype=int)\n    last_update = np.zeros(ts.num_nodes)\n    total_span = np.zeros(ts.num_nodes)\n\n    def update_counts(edge, left, sign):\n        # Update the counts and statistics for a given node. Before we change the\n        # node counts in the given direction, check to see if we need to update\n        # statistics for that node. When a node count changes, we add the\n        # accumulated statistic value for the span since that node was last updated.\n        v = edge.parent\n        while v != -1:\n            if last_update[v] != left:\n                if ref_count[v, K] > 0:\n                    span = left - last_update[v]\n                    C[v] += span * ref_count[v, :K]\n                    total_span[v] += span\n                last_update[v] = left\n            ref_count[v] += sign * ref_count[edge.child]\n            v = parent[v]\n\n    # Set the intitial conditions.\n    for j in range(K):\n        ref_count[reference_sets[j], j] = 1\n    ref_count[ts.samples(), K] = 1\n\n    for (left, _right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            parent[edge.child] = -1\n            update_counts(edge, left, -1)\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            update_counts(edge, left, +1)\n\n    # Finally, add the stats for the last tree and divide by the total\n    # span that each node was an ancestor to > 0 samples.\n    for v in range(ts.num_nodes):\n        if ref_count[v, K] > 0:\n            span = ts.sequence_length - last_update[v]\n            total_span[v] += span\n            C[v] += span * ref_count[v, :K]\n        if total_span[v] != 0:\n            C[v] /= total_span[v]\n    return C\n\n\ndef genealogical_nearest_neighbours(ts, focal, reference_sets):\n    reference_set_map = np.zeros(ts.num_nodes, dtype=int) - 1\n    for k, reference_set in enumerate(reference_sets):\n        for u in reference_set:\n            if reference_set_map[u] != -1:\n                raise ValueError(\"Duplicate value in reference sets\")\n            reference_set_map[u] = k\n\n    K = len(reference_sets)\n    A = np.zeros((len(focal), K))\n    L = np.zeros(len(focal))\n    parent = np.zeros(ts.num_nodes, dtype=int) - 1\n    sample_count = np.zeros((ts.num_nodes, K), dtype=int)\n\n    # Set the initial conditions.\n    for j in range(K):\n        sample_count[reference_sets[j], j] = 1\n\n    for (left, right), edges_out, edges_in in ts.edge_diffs():\n        for edge in edges_out:\n            parent[edge.child] = -1\n            v = edge.parent\n            while v != -1:\n                sample_count[v] -= sample_count[edge.child]\n                v = parent[v]\n        for edge in edges_in:\n            parent[edge.child] = edge.parent\n            v = edge.parent\n            while v != -1:\n                sample_count[v] += sample_count[edge.child]\n                v = parent[v]\n\n        # Process this tree.\n        for j, u in enumerate(focal):\n            focal_reference_set = reference_set_map[u]\n            delta = int(focal_reference_set != -1)\n            p = u\n            while p != tskit.NULL:\n                total = np.sum(sample_count[p])\n                if total > delta:\n                    break\n                p = parent[p]\n            if p != tskit.NULL:\n                span = right - left\n                L[j] += span\n                scale = span / (total - delta)\n                for k, _reference_set in enumerate(reference_sets):\n                    n = sample_count[p, k] - int(focal_reference_set == k)\n                    A[j, k] += n * scale\n\n    # Avoid division by zero\n    L[L == 0] = 1\n    A /= L.reshape((len(focal), 1))\n    return A\n\n\ndef sort_individual_table(tables):\n    \"\"\"\n    Sorts the individual table by parents-before-children.\n    \"\"\"\n\n    individuals = tables.individuals\n    num_individuals = individuals.num_rows\n\n    # First find the set of individuals that have no children\n    # by creating an array of incoming edge counts\n    incoming_edge_count = np.zeros((num_individuals,), np.int64)\n    for parent in individuals.parents:\n        if parent != tskit.NULL:\n            incoming_edge_count[parent] += 1\n\n    todo = collections.deque()\n    sorted_order = []\n    for individual, num_edges in reversed(list(enumerate(incoming_edge_count))):\n        if num_edges == 0:\n            todo.append(individual)\n            sorted_order.append(individual)\n    # Now emit individuals from the set that have no children, removing their edges\n    # as we go adding new individuals to the no children set.\n    while len(todo) > 0:\n        individual = todo.popleft()\n        for parent in individuals[individual].parents:\n            if parent != tskit.NULL:\n                incoming_edge_count[parent] -= 1\n                if incoming_edge_count[parent] == 0:\n                    todo.append(parent)\n                    sorted_order.append(parent)\n\n    if np.sum(incoming_edge_count) > 0:\n        raise ValueError(\"Individual pedigree has cycles\")\n\n    ind_id_map = {tskit.NULL: tskit.NULL}\n\n    individuals_copy = tables.copy().individuals\n    tables.individuals.clear()\n    for row in reversed(sorted_order):\n        ind_id_map[row] = tables.individuals.append(individuals_copy[row])\n    tables.individuals.parents = [ind_id_map[i] for i in tables.individuals.parents]\n    tables.nodes.individual = [ind_id_map[i] for i in tables.nodes.individual]\n\n    return tables\n\n\ndef insert_unique_metadata(tables, table=None, offset=0):\n    if isinstance(tables, tskit.TreeSequence):\n        tables = tables.dump_tables()\n    else:\n        tables = tables.copy()\n    if table is None:\n        table = [t for t in tskit.TABLE_NAMES if t != \"provenances\"]\n    for t in table:\n        getattr(tables, t).packset_metadata(\n            [struct.pack(\"I\", offset + i) for i in range(getattr(tables, t).num_rows)]\n        )\n    return tables.tree_sequence()\n\n\ndef metadata_map(tables):\n    # builds a mapping from metadata (as produced by insert_unique_metadata)\n    # to ID for all the tables (except provenance)\n    if isinstance(tables, tskit.TreeSequence):\n        tables = tables.dump_tables()\n    out = {}\n    for t in [t for t in tskit.TABLE_NAMES if t != \"provenances\"]:\n        out[t] = {}\n        for j, x in enumerate(getattr(tables, t)):\n            out[t][x.metadata] = j\n    return out\n\n\n@functools.cache\ndef all_trees_ts(n):\n    \"\"\"\n    Generate a tree sequence that corresponds to the lexicographic listing\n    of all trees with n leaves (i.e. from tskit.all_trees(n)).\n\n    Note: it would be nice to include a version of this in the combinatorics\n    module at some point but the implementation is quite inefficient. Also\n    it's not entirely clear that the way we're allocating node times is\n    guaranteed to work.\n    \"\"\"\n    tables = tskit.TableCollection(0)\n    for _ in range(n):\n        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n    for j in range(1, n):\n        tables.nodes.add_row(flags=0, time=j)\n\n    L = 0\n    for tree in tskit.all_trees(n):\n        for u in tree.preorder()[1:]:\n            tables.edges.add_row(L, L + 1, tree.parent(u), u)\n        L += 1\n    tables.sequence_length = L\n    tables.sort()\n    tables.simplify()\n    return tables.tree_sequence()\n\n\ndef all_fields_ts(edge_metadata=True, migrations=True):\n    \"\"\"\n    A tree sequence with data in all fields (except edge metadata is not set if\n    edge_metadata is False and migrations are not defined if migrations is False\n    (this is needed to test simplify, which doesn't allow either)\n\n    \"\"\"\n    demography = msprime.Demography()\n    demography.add_population(name=\"A\", initial_size=10_000)\n    demography.add_population(name=\"B\", initial_size=5_000)\n    demography.add_population(name=\"C\", initial_size=1_000)\n    demography.add_population(name=\"D\", initial_size=500)\n    demography.add_population(name=\"E\", initial_size=100)\n    demography.add_population_split(time=1000, derived=[\"A\", \"B\"], ancestral=\"C\")\n    ts = msprime.sim_ancestry(\n        samples={\"A\": 10, \"B\": 10},\n        demography=demography,\n        sequence_length=5,\n        random_seed=42,\n        recombination_rate=1,\n        record_migrations=migrations,\n        record_provenance=True,\n    )\n    ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42)\n    tables = ts.dump_tables()\n    # Add locations to individuals\n    individuals_copy = tables.individuals.copy()\n    tables.individuals.clear()\n    for i, individual in enumerate(individuals_copy):\n        tables.individuals.append(\n            individual.replace(flags=i, location=[i, i + 1], parents=[i - 1, i - 1])\n        )\n    # Ensure all columns have unique values\n    nodes_copy = tables.nodes.copy()\n    tables.nodes.clear()\n    for i, node in enumerate(nodes_copy):\n        tables.nodes.append(\n            node.replace(\n                flags=i,\n                time=node.time + 0.00001 * i,\n                individual=i % len(tables.individuals),\n                population=i % len(tables.populations),\n            )\n        )\n    if migrations:\n        tables.migrations.add_row(left=0, right=1, node=21, source=1, dest=3, time=1001)\n\n    # Add metadata\n    for name, table in tables.table_name_map.items():\n        if name == \"provenances\":\n            continue\n        if name == \"migrations\" and not migrations:\n            continue\n        if name == \"edges\" and not edge_metadata:\n            continue\n        table.metadata_schema = tskit.MetadataSchema.permissive_json()\n        metadatas = [f'{{\"foo\":\"n_{name}_{u}\"}}' for u in range(len(table))]\n        metadata, metadata_offset = tskit.pack_strings(metadatas)\n        table.set_columns(\n            **{\n                **table.asdict(),\n                \"metadata\": metadata,\n                \"metadata_offset\": metadata_offset,\n            }\n        )\n    tables.metadata_schema = tskit.MetadataSchema.permissive_json()\n    tables.metadata = \"Test metadata\"\n    tables.time_units = \"Test time units\"\n\n    tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json()\n    tables.reference_sequence.metadata = \"Test reference metadata\"\n    tables.reference_sequence.data = \"A\" * int(ts.sequence_length)\n    # NOTE: it's unclear whether we'll want to have this set at the same time as\n    # 'data', but it's useful to have something in all columns for now.\n    tables.reference_sequence.url = \"http://example.com/a_reference\"\n\n    # Add some more rows to provenance to have enough for testing.\n    for i in range(3):\n        tables.provenances.add_row(record=\"A\", timestamp=str(i))\n\n    return tables.tree_sequence()\n\n\ndef insert_uniform_mutations(tables, num_mutations, nodes):\n    \"\"\"\n    Returns n evenly mutations over the specified list of nodes.\n    \"\"\"\n    for j in range(num_mutations):\n        tables.sites.add_row(\n            position=j * (tables.sequence_length / num_mutations),\n            ancestral_state=\"0\",\n            metadata=json.dumps({\"index\": j}).encode(),\n        )\n        tables.mutations.add_row(\n            site=j,\n            derived_state=\"1\",\n            node=nodes[j % len(nodes)],\n            metadata=json.dumps({\"index\": j}).encode(),\n        )\n\n\ndef get_table_collection_copy(tables, sequence_length):\n    \"\"\"\n    Returns a copy of the specified table collection with the specified\n    sequence length.\n    \"\"\"\n    table_dict = tables.asdict()\n    table_dict[\"sequence_length\"] = sequence_length\n    return tskit.TableCollection.fromdict(table_dict)\n\n\ndef insert_gap(ts, position, length):\n    \"\"\"\n    Inserts a gap of the specified size into the specified tree sequence.\n    This involves: (1) breaking all edges that intersect with this point;\n    and (2) shifting all coordinates greater than this value up by the\n    gap length.\n    \"\"\"\n    new_edges = []\n    for e in ts.edges():\n        if e.left < position < e.right:\n            new_edges.append([e.left, position, e.parent, e.child])\n            new_edges.append([position, e.right, e.parent, e.child])\n        else:\n            new_edges.append([e.left, e.right, e.parent, e.child])\n\n    # Now shift up all coordinates.\n    for e in new_edges:\n        # Left coordinates == position get shifted\n        if e[0] >= position:\n            e[0] += length\n        # Right coordinates == position do not get shifted\n        if e[1] > position:\n            e[1] += length\n    tables = ts.dump_tables()\n    L = ts.sequence_length + length\n    tables = get_table_collection_copy(tables, L)\n    tables.edges.clear()\n    tables.sites.clear()\n    tables.mutations.clear()\n    for left, right, parent, child in new_edges:\n        tables.edges.add_row(left, right, parent, child)\n    tables.sort()\n    # Throw in a bunch of mutations over the whole sequence on the samples.\n    insert_uniform_mutations(tables, 100, list(ts.samples()))\n    return tables.tree_sequence()\n\n\n@functools.lru_cache\ndef get_decapitated_examples(custom_max=None):\n    \"\"\"\n    Returns example tree sequences in which the oldest edges have been removed.\n    \"\"\"\n    ret = []\n    if custom_max is None:\n        n_list = [10, 20]\n    else:\n        n_list = [custom_max // 2, custom_max]\n    ts = msprime.simulate(n_list[0], random_seed=1234)\n    # yield ts.decapitate(ts.tables.nodes.time[-1] / 2)\n    ts = msprime.simulate(n_list[1], recombination_rate=1, random_seed=1234)\n    assert ts.num_trees > 2\n    ret.append((\"decapitate_recomb\", ts.decapitate(ts.tables.nodes.time[-1] / 4)))\n    return ret\n\n\n@functools.lru_cache\ndef get_gap_examples(custom_max=None):\n    \"\"\"\n    Returns example tree sequences that contain gaps within the list of\n    edges.\n    \"\"\"\n    ret = []\n    if custom_max is None:\n        n_list = [20, 10]\n    else:\n        n_list = [custom_max, custom_max // 2]\n\n    ts = msprime.simulate(n_list[0], random_seed=56, recombination_rate=1)\n\n    assert ts.num_trees > 1\n\n    gap = 0.0125\n    for x in [0, 0.1, 0.5, 0.75]:\n        ts = insert_gap(ts, x, gap)\n        found = False\n        for t in ts.trees():\n            if t.interval.left == x:\n                assert t.interval.right == x + gap\n                assert len(t.parent_dict) == 0\n                found = True\n        assert found\n        ret.append((f\"gap_{x}\", ts))\n    # Give an example with a gap at the end.\n    ts = msprime.simulate(n_list[1], random_seed=5, recombination_rate=1)\n    tables = get_table_collection_copy(ts.dump_tables(), 2)\n    tables.sites.clear()\n    tables.mutations.clear()\n    insert_uniform_mutations(tables, 100, list(ts.samples()))\n    ret.append((\"gap_at_end\", tables.tree_sequence()))\n    return ret\n\n\n@functools.lru_cache\ndef get_internal_samples_examples():\n    \"\"\"\n    Returns example tree sequences with internal samples.\n    \"\"\"\n    ret = []\n    n = 5\n    ts = msprime.simulate(n, random_seed=10, mutation_rate=5)\n    assert ts.num_mutations > 0\n    tables = ts.dump_tables()\n    nodes = tables.nodes\n    flags = nodes.flags\n    # Set all nodes to be samples.\n    flags[:] = tskit.NODE_IS_SAMPLE\n    nodes.flags = flags\n    ret.append((\"all_nodes_samples\", tables.tree_sequence()))\n\n    # Set just internal nodes to be samples.\n    flags[:] = 0\n    flags[n:] = tskit.NODE_IS_SAMPLE\n    nodes.flags = flags\n    ret.append((\"internal_nodes_samples\", tables.tree_sequence()))\n\n    # Set a mixture of internal and leaf samples.\n    flags[:] = 0\n    flags[n // 2 : n + n // 2] = tskit.NODE_IS_SAMPLE\n    nodes.flags = flags\n    ret.append((\"mixed_internal_leaf_samples\", tables.tree_sequence()))\n    return ret\n\n\n@functools.lru_cache\ndef get_bottleneck_examples(custom_max=None):\n    \"\"\"\n    Returns an iterator of example tree sequences with nonbinary trees.\n    \"\"\"\n    bottlenecks = [\n        msprime.SimpleBottleneck(0.01, 0, proportion=0.05),\n        msprime.SimpleBottleneck(0.02, 0, proportion=0.25),\n        msprime.SimpleBottleneck(0.03, 0, proportion=1),\n    ]\n    if custom_max is None:\n        n_list = [3, 10, 100]\n    else:\n        n_list = [i * custom_max // 3 for i in range(1, 4)]\n    for n in n_list:\n        ts = msprime.simulate(\n            n,\n            length=100,\n            recombination_rate=1,\n            demographic_events=bottlenecks,\n            random_seed=n,\n        )\n        yield (f\"bottleneck_n={n}\", ts)\n\n\n@functools.lru_cache\ndef get_back_mutation_examples():\n    \"\"\"\n    Returns an iterator of example tree sequences with nonbinary trees.\n    \"\"\"\n    ts = msprime.simulate(10, random_seed=1)\n    for j in [1, 2, 3]:\n        yield insert_branch_mutations(ts, mutations_per_branch=j)\n    for ts in get_bottleneck_examples():\n        yield insert_branch_mutations(ts)\n\n\ndef make_example_tree_sequences(custom_max=None):\n    yield from get_decapitated_examples(custom_max=custom_max)\n    yield from get_gap_examples(custom_max=custom_max)\n    yield from get_internal_samples_examples()\n    seed = 1\n    if custom_max is None:\n        n_list = [2, 3, 10, 100]\n    else:\n        n_list = [i * custom_max // 4 for i in range(1, 5)]\n    for n in n_list:\n        for m in [1, 2, 32]:\n            for rho in [0, 0.1, 0.5]:\n                recomb_map = msprime.RecombinationMap.uniform_map(m, rho, num_loci=m)\n                ts = msprime.simulate(\n                    recombination_map=recomb_map,\n                    mutation_rate=0.1,\n                    random_seed=seed,\n                    population_configurations=[\n                        msprime.PopulationConfiguration(n),\n                        msprime.PopulationConfiguration(0),\n                    ],\n                    migration_matrix=[[0, 1], [1, 0]],\n                )\n                ts = insert_random_ploidy_individuals(ts, 4, seed=seed)\n                yield (\n                    f\"n={n}_m={m}_rho={rho}\",\n                    add_random_metadata(ts, seed=seed),\n                )\n                seed += 1\n    for name, ts in get_bottleneck_examples(custom_max=custom_max):\n        yield (\n            f\"{name}_mutated\",\n            msprime.mutate(\n                ts,\n                rate=0.1,\n                random_seed=seed,\n                model=msprime.InfiniteSites(msprime.NUCLEOTIDES),\n            ),\n        )\n    ts = tskit.Tree.generate_balanced(8).tree_sequence\n    yield (\"rev_node_order\", ts.subset(np.arange(ts.num_nodes - 1, -1, -1)))\n    ts = msprime.sim_ancestry(\n        8, sequence_length=40, recombination_rate=0.1, random_seed=seed\n    )\n    tables = ts.dump_tables()\n    tables.populations.metadata_schema = tskit.MetadataSchema(None)\n    ts = tables.tree_sequence()\n    assert ts.num_trees > 1\n    yield (\n        \"back_mutations\",\n        insert_branch_mutations(ts, mutations_per_branch=2),\n    )\n    ts = insert_multichar_mutations(ts)\n    yield (\"multichar\", ts)\n    yield (\"multichar_no_metadata\", add_random_metadata(ts))\n    tables = ts.dump_tables()\n    tables.nodes.flags = np.zeros_like(tables.nodes.flags)\n    yield (\"no_samples\", tables.tree_sequence())  # no samples\n    tables = ts.dump_tables()\n    tables.edges.clear()\n    yield (\"empty_tree\", tables.tree_sequence())  # empty tree\n    yield (\n        \"empty_ts\",\n        tskit.TableCollection(sequence_length=1).tree_sequence(),\n    )  # empty tree seq\n    yield (\"all_fields\", all_fields_ts())\n\n\n_examples = tuple(make_example_tree_sequences(custom_max=None))\n\n\ndef get_example_tree_sequences(pytest_params=True, custom_max=None):\n    if pytest_params:\n        return [pytest.param(ts, id=name) for name, ts in _examples]\n    else:\n        return [ts for _, ts in _examples]\n"
  },
  {
    "path": "python/tskit/__init__.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\nimport _tskit\n\n#: Special reserved value representing a null ID.\nNULL = _tskit.NULL\n\n#: Special value representing missing data in a genotype array\nMISSING_DATA = _tskit.MISSING_DATA\n\n#: Node flag value indicating that it is a sample.\nNODE_IS_SAMPLE = _tskit.NODE_IS_SAMPLE\n\n#: Constant representing the forward direction of travel (i.e.,\n#: increasing genomic coordinate values).\nFORWARD = _tskit.FORWARD\n\n#: Constant representing the reverse direction of travel (i.e.,\n#: decreasing genomic coordinate values).\nREVERSE = _tskit.REVERSE\n\n#: The allele mapping where the strings \"0\" and \"1\" map to genotype\n#: values 0 and 1.\nALLELES_01 = (\"0\", \"1\")\n\n#: The allele mapping where the four nucleotides A, C, G and T map to\n#: the genotype integers 0, 1, 2, and 3, respectively.\nALLELES_ACGT = (\"A\", \"C\", \"G\", \"T\")\n\n#: Special NAN value used to indicate unknown mutation times. Since this is a\n#: NAN value, you cannot use `==` to test for it. Use :func:`is_unknown_time` instead.\nUNKNOWN_TIME = _tskit.UNKNOWN_TIME\n\n#: Default value of ts.time_units\nTIME_UNITS_UNKNOWN = _tskit.TIME_UNITS_UNKNOWN\n\n#: ts.time_units value when dimension is uncalibrated\nTIME_UNITS_UNCALIBRATED = _tskit.TIME_UNITS_UNCALIBRATED\n\n#: Options for printing to strings and HTML, modify with tskit.set_print_options.\n_print_options = {\"max_lines\": 40}\n\nTABLE_NAMES = [\n    \"individuals\",\n    \"nodes\",\n    \"edges\",\n    \"migrations\",\n    \"sites\",\n    \"mutations\",\n    \"populations\",\n    \"provenances\",\n]\n\n\nfrom tskit.provenance import __version__  # NOQA\nfrom tskit.provenance import validate_provenance  # NOQA\nfrom tskit.trees import *  # NOQA\nfrom tskit.genotypes import Variant  # NOQA\nfrom tskit.tables import *  # NOQA\nfrom tskit.stats import *  # NOQA\nfrom tskit.combinatorics import (  # NOQA\n    all_trees,\n    all_tree_shapes,\n    all_tree_labellings,\n    TopologyCounter,\n    Rank,\n)\nfrom tskit.drawing import SVGString  # NOQA\nfrom tskit.exceptions import *  # NOQA\nfrom tskit.util import *  # NOQA\nfrom tskit.metadata import *  # NOQA\nfrom tskit.text_formats import *  # NOQA\nfrom tskit.intervals import RateMap  # NOQA\n"
  },
  {
    "path": "python/tskit/__main__.py",
    "content": "from . import cli\n\nif __name__ == \"__main__\":\n    cli.tskit_main()\n"
  },
  {
    "path": "python/tskit/_version.py",
    "content": "# Definitive location for the version number.\n# During development, should be x.y.z.devN\n# For beta should be x.y.zbN\ntskit_version = \"1.0.3.dev1\"\n"
  },
  {
    "path": "python/tskit/cli.py",
    "content": "#\n# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2015-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nCommand line utilities for tskit.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport signal\nimport sys\n\nimport tskit\n\n\ndef set_sigpipe_handler():\n    if os.name == \"posix\":\n        # Set signal handler for SIGPIPE to quietly kill the program.\n        signal.signal(signal.SIGPIPE, signal.SIG_DFL)\n\n\ndef sys_exit(message):\n    sys.exit(message)\n\n\ndef load_tree_sequence(path):\n    try:\n        return tskit.load(path)\n    except OSError as e:\n        sys_exit(f\"Load error: {e}\")\n\n\ndef run_info(args):\n    print(load_tree_sequence(args.tree_sequence))\n\n\ndef run_trees(args):\n    ts = load_tree_sequence(args.tree_sequence)\n    for tree in ts.trees():\n        print(f\"tree {tree.index}:\")\n        print(f\"  num_sites: {tree.num_sites}\")\n        print(\n            \"  interval:  ({0:.{2}f}, {1:.{2}f})\".format(\n                tree.interval.left, tree.interval.right, args.precision\n            )\n        )\n        if args.draw:\n            print(tree.draw(format=\"unicode\"))\n\n\ndef run_individuals(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(individuals=sys.stdout, precision=args.precision)\n\n\ndef run_nodes(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(nodes=sys.stdout, precision=args.precision)\n\n\ndef run_edges(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(edges=sys.stdout, precision=args.precision)\n\n\ndef run_sites(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(sites=sys.stdout, precision=args.precision)\n\n\ndef run_mutations(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(mutations=sys.stdout, precision=args.precision)\n\n\ndef run_populations(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(populations=sys.stdout)\n\n\ndef run_migrations(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.dump_text(migrations=sys.stdout, precision=args.precision)\n\n\ndef run_provenances(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    if args.human:\n        for provenance in tree_sequence.provenances():\n            d = json.loads(provenance.record)\n            print(\n                \"id={}, timestamp={}, record={}\".format(\n                    provenance.id, provenance.timestamp, json.dumps(d, indent=4)\n                )\n            )\n    else:\n        tree_sequence.dump_text(provenances=sys.stdout)\n\n\ndef run_fasta(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.write_fasta(sys.stdout, wrap_width=args.wrap)\n\n\ndef run_vcf(args):\n    tree_sequence = load_tree_sequence(args.tree_sequence)\n    tree_sequence.write_vcf(\n        sys.stdout,\n        ploidy=args.ploidy,\n        contig_id=args.contig_id,\n        allow_position_zero=args.allow_position_zero,\n    )\n\n\ndef add_tree_sequence_argument(parser):\n    parser.add_argument(\"tree_sequence\", help=\"The tskit tree sequence file\")\n\n\ndef add_precision_argument(parser):\n    parser.add_argument(\n        \"--precision\",\n        \"-p\",\n        type=int,\n        default=6,\n        help=\"The number of decimal places to print in records\",\n    )\n\n\ndef get_tskit_parser():\n    top_parser = argparse.ArgumentParser(\n        prog=\"python3 -m tskit\", description=\"Command line interface for tskit.\"\n    )\n    top_parser.add_argument(\n        \"-V\", \"--version\", action=\"version\", version=f\"%(prog)s {tskit.__version__}\"\n    )\n    subparsers = top_parser.add_subparsers(dest=\"subcommand\")\n    subparsers.required = True\n\n    parser = subparsers.add_parser(\n        \"info\", help=\"Print summary information about a tree sequence.\"\n    )\n    add_tree_sequence_argument(parser)\n    parser.set_defaults(runner=run_info)\n\n    parser = subparsers.add_parser(\"trees\", help=\"Print information about trees.\")\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.add_argument(\n        \"--draw\", \"-d\", action=\"store_true\", default=False, help=\"Draw the trees\"\n    )\n    parser.set_defaults(runner=run_trees)\n\n    # suppress fasta visibility until we have a reference sequence\n    # See https://github.com/tskit-dev/tskit/issues/1888\n    # parser = subparsers.add_parser(\n    #    \"fasta\",\n    #     help=\"Convert the tree sequence haplotypes to fasta format\")\n    # add_tree_sequence_argument(parser)\n    # parser.add_argument(\n    #     \"--wrap\", \"-w\", type=int, default=60,\n    #     help=(\"line-wrapping width for printed sequences\"))\n    # parser.set_defaults(runner=run_fasta)\n    parser = subparsers.add_parser(\n        \"vcf\", help=\"Convert the tree sequence genotypes to VCF format.\"\n    )\n    add_tree_sequence_argument(parser)\n    parser.add_argument(\n        \"--ploidy\",\n        \"-P\",\n        type=int,\n        default=None,\n        help=(\n            \"If the tree sequence does not contain information about \"\n            \"individuals, create them by combining adjacent samples nodes \"\n            \"into individuals of the specified ploidy. It is an error \"\n            \"to provide this argument if the tree sequence does contain \"\n            \"individuals\"\n        ),\n    )\n    parser.add_argument(\n        \"--contig-id\", \"-c\", type=str, default=\"1\", help=\"Specify the contig id\"\n    )\n    parser.add_argument(\n        \"--allow-position-zero\",\n        \"-0\",\n        action=\"store_true\",\n        default=False,\n        help=\"Allow position 0 sites\",\n    )\n    parser.set_defaults(runner=run_vcf)\n\n    parser = subparsers.add_parser(\n        \"individuals\", help=\"Output individuals in tabular format.\"\n    )\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.set_defaults(runner=run_individuals)\n\n    parser = subparsers.add_parser(\"nodes\", help=\"Output nodes in tabular format.\")\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.set_defaults(runner=run_nodes)\n\n    parser = subparsers.add_parser(\"edges\", help=\"Output edges in tabular format.\")\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.set_defaults(runner=run_edges)\n\n    parser = subparsers.add_parser(\"sites\", help=\"Output sites in tabular format.\")\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.set_defaults(runner=run_sites)\n\n    parser = subparsers.add_parser(\n        \"mutations\", help=\"Output mutations in tabular format.\"\n    )\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.set_defaults(runner=run_mutations)\n\n    parser = subparsers.add_parser(\n        \"populations\", help=\"Output population information in tabular format.\"\n    )\n    add_tree_sequence_argument(parser)\n    parser.set_defaults(runner=run_populations)\n\n    parser = subparsers.add_parser(\n        \"migrations\", help=\"Output migration information in tabular format.\"\n    )\n    add_tree_sequence_argument(parser)\n    add_precision_argument(parser)\n    parser.set_defaults(runner=run_migrations)\n\n    parser = subparsers.add_parser(\n        \"provenances\", help=\"Output provenance information in tabular format.\"\n    )\n    add_tree_sequence_argument(parser)\n    parser.add_argument(\n        \"-H\",\n        \"--human\",\n        action=\"store_true\",\n        help=\"Print out the provenances in a human readable format\",\n    )\n    parser.set_defaults(runner=run_provenances)\n\n    return top_parser\n\n\ndef tskit_main(arg_list=None):\n    set_sigpipe_handler()\n    parser = get_tskit_parser()\n    args = parser.parse_args(arg_list)\n    args.runner(args)\n"
  },
  {
    "path": "python/tskit/combinatorics.py",
    "content": "#\n# MIT License\n#\n# Copyright (c) 2020-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nModule for ranking and unranking trees. Trees are considered only\nleaf-labelled and unordered, so order of children does not influence equality.\n\"\"\"\n\nimport collections\nimport functools\nimport heapq\nimport itertools\nimport json\nimport random\nfrom typing import NamedTuple\n\nimport attr\nimport numpy as np\n\nimport tskit\n\n\nclass Rank(NamedTuple):\n    \"\"\"\n    A tuple of 2 numbers, ``(shape, label)``, together defining a unique\n    topology for a labeled tree. See :ref:`sec_combinatorics`.\n    \"\"\"\n\n    shape: int\n    \"\"\"\n    A non-negative integer representing the (unlabelled) topology of a tree with a\n    defined number of tips.\n    \"\"\"\n    label: int\n    \"\"\"\n    A non-negative integer representing the order of labels for a given tree topology.\n    \"\"\"\n\n\ndef equal_chunks(lst, k):\n    \"\"\"\n    Yield k successive equally sized chunks from lst of size n.\n\n    If k >= n, we return n chunks of size 1.\n\n    Otherwise, we always return k chunks. The first k - 1 chunks will\n    contain exactly n // k items, and the last chunk the remainder.\n    \"\"\"\n    n = len(lst)\n    if k <= 0 or int(k) != k:\n        raise ValueError(\"Number of chunks must be a positive integer\")\n\n    if n > 0:\n        chunk_size = max(1, n // k)\n        offset = 0\n        j = 0\n        while offset < n - chunk_size and j < k - 1:\n            yield lst[offset : offset + chunk_size]\n            offset += chunk_size\n            j += 1\n        yield lst[offset:]\n\n\n@attr.s(eq=False)\nclass TreeNode:\n    \"\"\"\n    Simple linked tree class used to generate tree topologies.\n    \"\"\"\n\n    parent = attr.ib(default=None)\n    children = attr.ib(factory=list)\n    label = attr.ib(default=None)\n\n    def as_tables(self, *, num_leaves, span, branch_length):\n        \"\"\"\n        Convert the tree rooted at this node into an equivalent\n        TableCollection. Internal nodes are allocated in postorder.\n        \"\"\"\n        tables = tskit.TableCollection(span)\n        for _ in range(num_leaves):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n\n        def assign_internal_labels(node):\n            if len(node.children) == 0:\n                node.time = 0\n            else:\n                max_child_time = 0\n                for child in node.children:\n                    assign_internal_labels(child)\n                    max_child_time = max(max_child_time, child.time)\n                node.time = max_child_time + branch_length\n                node.label = tables.nodes.add_row(time=node.time)\n                for child in node.children:\n                    tables.edges.add_row(0, span, node.label, child.label)\n\n        # Do a postorder traversal to assign the internal node labels and times.\n        assign_internal_labels(self)\n        tables.sort()\n        return tables\n\n    @staticmethod\n    def random_binary_tree(leaf_labels, rng):\n        \"\"\"\n        Returns a random binary tree where the leaves have the specified\n        labels using the specified random.Random instance. The root node\n        of this tree is returned.\n\n        Based on the description of Remy's method of generating \"decorated\"\n        random binary trees in TAOCP 7.2.1.6. This is not a direct\n        implementation of Algorithm R, because we are interested in\n        the leaf node labellings.\n\n        The pre-fascicle text is available here, page 16:\n        http://www.cs.utsa.edu/~wagner/knuth/fasc4a.pdf\n        \"\"\"\n        nodes = [TreeNode(label=leaf_labels[0])]\n        for label in leaf_labels[1:]:\n            # Choose a node x randomly and insert a new internal node above\n            # it with the (n + 1)th labelled leaf as its sibling.\n            x = rng.choice(nodes)\n            new_leaf = TreeNode(label=label)\n            new_internal = TreeNode(parent=x.parent, children=[x, new_leaf])\n            if x.parent is not None:\n                index = x.parent.children.index(x)\n                x.parent.children[index] = new_internal\n            rng.shuffle(new_internal.children)\n            x.parent = new_internal\n            new_leaf.parent = new_internal\n            nodes.extend([new_leaf, new_internal])\n\n        root = nodes[0]\n        while root.parent is not None:\n            root = root.parent\n\n        # Canonicalise the order of the children within a node. This\n        # is given by (num_leaves, min_label). See also the\n        # RankTree.canonical_order function for the definition of\n        # how these are ordered during rank/unrank.\n\n        def reorder_children(node):\n            if len(node.children) == 0:\n                return 1, node.label\n            keys = [reorder_children(child) for child in node.children]\n            if keys[0] > keys[1]:\n                node.children = node.children[::-1]\n            return (\n                sum(leaf_count for leaf_count, _ in keys),\n                min(min_label for _, min_label in keys),\n            )\n\n        reorder_children(root)\n        return root\n\n    @classmethod\n    def balanced_tree(cls, leaf_labels, arity):\n        \"\"\"\n        Returns a balanced tree of the specified arity. At each node the\n        leaf labels are split equally among the arity children using the\n        equal_chunks method.\n        \"\"\"\n        assert len(leaf_labels) > 0\n        if len(leaf_labels) == 1:\n            root = cls(label=leaf_labels[0])\n        else:\n            children = [\n                cls.balanced_tree(chunk, arity)\n                for chunk in equal_chunks(leaf_labels, arity)\n            ]\n            root = cls(children=children)\n            for child in children:\n                child.parent = root\n        return root\n\n\ndef generate_star(num_leaves, *, span, branch_length, record_provenance, **kwargs):\n    \"\"\"\n    Generate a star tree for the specified number of leaves.\n\n    See the documentation for :meth:`Tree.generate_star` for more details.\n    \"\"\"\n    if num_leaves < 2:\n        raise ValueError(\"The number of leaves must be 2 or greater\")\n    tables = tskit.TableCollection(sequence_length=span)\n    tables.nodes.set_columns(\n        flags=np.full(num_leaves, tskit.NODE_IS_SAMPLE, dtype=np.uint32),\n        time=np.zeros(num_leaves),\n    )\n    root = tables.nodes.add_row(time=branch_length)\n    tables.edges.set_columns(\n        left=np.full(num_leaves, 0),\n        right=np.full(num_leaves, span),\n        parent=np.full(num_leaves, root, dtype=np.int32),\n        child=np.arange(num_leaves, dtype=np.int32),\n    )\n    if record_provenance:\n        # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n        # TODO also make sure we convert all the arguments so that they are\n        # definitely JSON encodable.\n        parameters = {\"command\": \"generate_star\", \"TODO\": \"add parameters\"}\n        tables.provenances.add_row(\n            record=json.dumps(tskit.provenance.get_provenance_dict(parameters))\n        )\n    return tables.tree_sequence().first(**kwargs)\n\n\ndef generate_comb(num_leaves, *, span, branch_length, record_provenance, **kwargs):\n    \"\"\"\n    Generate a comb tree for the specified number of leaves.\n\n    See the documentation for :meth:`Tree.generate_comb` for more details.\n    \"\"\"\n    if num_leaves < 2:\n        raise ValueError(\"The number of leaves must be 2 or greater\")\n    tables = tskit.TableCollection(sequence_length=span)\n    tables.nodes.set_columns(\n        flags=np.full(num_leaves, tskit.NODE_IS_SAMPLE, dtype=np.uint32),\n        time=np.zeros(num_leaves),\n    )\n    right_child = num_leaves - 1\n    time = branch_length\n    for left_child in range(num_leaves - 2, -1, -1):\n        parent = tables.nodes.add_row(time=time)\n        time += branch_length\n        tables.edges.add_row(0, span, parent, left_child)\n        tables.edges.add_row(0, span, parent, right_child)\n        right_child = parent\n\n    if record_provenance:\n        # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n        # TODO also make sure we convert all the arguments so that they are\n        # definitely JSON encodable.\n        parameters = {\"command\": \"generate_comb\", \"TODO\": \"add parameters\"}\n        tables.provenances.add_row(\n            record=json.dumps(tskit.provenance.get_provenance_dict(parameters))\n        )\n    return tables.tree_sequence().first(**kwargs)\n\n\ndef generate_balanced(\n    num_leaves, *, arity, span, branch_length, record_provenance, **kwargs\n):\n    \"\"\"\n    Generate a balanced tree for the specified number of leaves.\n\n    See the documentation for :meth:`Tree.generate_balanced` for more details.\n    \"\"\"\n    if num_leaves < 1:\n        raise ValueError(\"The number of leaves must be at least 1\")\n    if arity < 2:\n        raise ValueError(\"The arity must be at least 2\")\n\n    root = TreeNode.balanced_tree(range(num_leaves), arity)\n    tables = root.as_tables(\n        num_leaves=num_leaves, span=span, branch_length=branch_length\n    )\n\n    if record_provenance:\n        # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n        # TODO also make sure we convert all the arguments so that they are\n        # definitely JSON encodable.\n        parameters = {\"command\": \"generate_balanced\", \"TODO\": \"add parameters\"}\n        tables.provenances.add_row(\n            record=json.dumps(tskit.provenance.get_provenance_dict(parameters))\n        )\n\n    return tables.tree_sequence().first(**kwargs)\n\n\ndef generate_random_binary(\n    num_leaves, *, span, branch_length, random_seed, record_provenance, **kwargs\n):\n    \"\"\"\n    Sample a leaf-labelled binary tree uniformly.\n\n    See the documentation for :meth:`Tree.generate_random_binary` for more details.\n    \"\"\"\n    if num_leaves < 1:\n        raise ValueError(\"The number of leaves must be at least 1\")\n\n    rng = random.Random(random_seed)\n    root = TreeNode.random_binary_tree(range(num_leaves), rng)\n    tables = root.as_tables(\n        num_leaves=num_leaves, span=span, branch_length=branch_length\n    )\n\n    if record_provenance:\n        # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n        # TODO also make sure we convert all the arguments so that they are\n        # definitely JSON encodable.\n        parameters = {\"command\": \"generate_random_binary\", \"TODO\": \"add parameters\"}\n        tables.provenances.add_row(\n            record=json.dumps(tskit.provenance.get_provenance_dict(parameters))\n        )\n    ts = tables.tree_sequence()\n    return ts.first(**kwargs)\n\n\ndef split_polytomies(\n    tree,\n    *,\n    epsilon=None,\n    method=None,\n    record_provenance=True,\n    random_seed=None,\n    **kwargs,\n):\n    \"\"\"\n    Return a new tree where extra nodes and edges have been inserted\n    so that any any node with more than two children is resolved into\n    a binary tree.\n\n    See the documentation for :meth:`Tree.split_polytomies` for more details.\n    \"\"\"\n    allowed_methods = [\"random\"]\n    if method is None:\n        method = \"random\"\n    if method not in allowed_methods:\n        raise ValueError(f\"Method must be chosen from {allowed_methods}\")\n\n    tables = tree.tree_sequence.dump_tables()\n    tables.keep_intervals([tree.interval], simplify=False)\n    tables.edges.clear()\n    rng = random.Random(random_seed)\n\n    for u in tree.nodes():\n        if tree.num_children(u) > 2:\n            root = TreeNode.random_binary_tree(tree.children(u), rng)\n            root.label = u\n            root_time = tree.time(u)\n            stack = [(child, root_time) for child in root.children]\n            while len(stack) > 0:\n                node, parent_time = stack.pop()\n                if node.label is None:\n                    if epsilon is None:\n                        child_time = np.nextafter(parent_time, -np.inf)\n                    else:\n                        child_time = parent_time - epsilon\n                    node.label = tables.nodes.add_row(time=child_time)\n                else:\n                    assert len(node.children) == 0\n                    # This is a leaf node connecting back into the original tree\n                    child_time = tree.time(node.label)\n                if parent_time <= child_time:\n                    u = root.label\n                    min_child_time = min(tree.time(v) for v in tree.children(u))\n                    min_time = root_time - min_child_time\n                    message = (\n                        f\"Cannot resolve the degree {tree.num_children(u)} \"\n                        f\"polytomy rooted at node {u} with minimum time difference \"\n                        f\"of {min_time} to the resolved leaves.\"\n                    )\n                    if epsilon is None:\n                        message += (\n                            \" The time difference between nodes is so small that \"\n                            \"more nodes cannot be inserted between within the limits \"\n                            \"of floating point precision.\"\n                        )\n                    else:\n                        # We can also have parent_time == child_time if epsilon is\n                        # chosen such that we exactly divide up the branch in the\n                        # original tree. We avoid saying this is caused by a\n                        # too-small epsilon by noting it can only happen when we\n                        # are at leaf node in the randomly generated tree.\n                        if parent_time == child_time and len(node.children) > 0:\n                            message += (\n                                f\" The fixed epsilon value of {epsilon} is too small, \"\n                                \"resulting in the parent and child times being equal \"\n                                \"within the limits of numerical precision.\"\n                            )\n                        else:\n                            message += (\n                                f\" The fixed epsilon value of {epsilon} is too large, \"\n                                \"resulting in the parent time being less than the child \"\n                                \"time.\"\n                            )\n                    raise tskit.LibraryError(message)\n                tables.edges.add_row(*tree.interval, node.parent.label, node.label)\n                for child in node.children:\n                    stack.append((child, child_time))\n        else:\n            for v in tree.children(u):\n                tables.edges.add_row(*tree.interval, u, v)\n\n    if record_provenance:\n        parameters = {\"command\": \"split_polytomies\"}\n        tables.provenances.add_row(\n            record=json.dumps(tskit.provenance.get_provenance_dict(parameters))\n        )\n    try:\n        tables.sort()\n        ts = tables.tree_sequence()\n    except tskit.LibraryError as e:\n        msg = str(e)\n        # We should have caught all topology time travel above.\n        assert not msg.startswith(\"time[parent] must be greater than time[child]\")\n        if msg.startswith(\n            \"A mutation's time must be < the parent node of the edge on which it occurs\"\n        ):\n            if epsilon is not None:\n                msg = (\n                    f\"epsilon={epsilon} not small enough to create new nodes below a \"\n                    \"polytomy, due to the time of a mutation above a child of the \"\n                    \"polytomy.\"\n                )\n            else:\n                msg = (\n                    \"Cannot split polytomy: mutation with numerical precision \"\n                    \"of the parent time.\"\n                )\n            e.args += (msg,)\n        raise e\n    return ts.at(tree.interval.left, **kwargs)\n\n\ndef treeseq_count_topologies(ts, sample_sets):\n    topology_counter = np.full(ts.num_nodes, None, dtype=object)\n    parent = np.full(ts.num_nodes, -1)\n\n    def update_state(tree, u):\n        stack = [u]\n        while len(stack) > 0:\n            v = stack.pop()\n            children = []\n            for c in tree.children(v):\n                if topology_counter[c] is not None:\n                    children.append(topology_counter[c])\n            if len(children) > 0:\n                topology_counter[v] = combine_child_topologies(children)\n            else:\n                topology_counter[v] = None\n            p = parent[v]\n            if p != -1:\n                stack.append(p)\n\n    for sample_set_index, sample_set in enumerate(sample_sets):\n        for u in sample_set:\n            if not ts.node(u).is_sample():\n                raise ValueError(f\"Node {u} in sample_sets is not a sample.\")\n            topology_counter[u] = TopologyCounter.from_sample(sample_set_index)\n\n    for tree, (_, edges_out, edges_in) in zip(ts.trees(), ts.edge_diffs()):\n        # Avoid recomputing anything for the parent until all child edges\n        # for that parent are inserted/removed\n        for p, sibling_edges in itertools.groupby(edges_out, key=lambda e: e.parent):\n            for e in sibling_edges:\n                parent[e.child] = -1\n            update_state(tree, p)\n        for p, sibling_edges in itertools.groupby(edges_in, key=lambda e: e.parent):\n            if tree.is_sample(p):\n                raise ValueError(\"Internal samples not supported.\")\n            for e in sibling_edges:\n                parent[e.child] = p\n            update_state(tree, p)\n\n        counters = []\n        for root in tree.roots:\n            if topology_counter[root] is not None:\n                counters.append(topology_counter[root])\n        yield TopologyCounter.merge(counters)\n\n\ndef tree_count_topologies(tree, sample_sets):\n    for u in tree.samples():\n        if not tree.is_leaf(u):\n            raise ValueError(\"Internal samples not supported.\")\n\n    topology_counter = np.full(tree.tree_sequence.num_nodes, None, dtype=object)\n    for sample_set_index, sample_set in enumerate(sample_sets):\n        for u in sample_set:\n            if not tree.is_sample(u):\n                raise ValueError(f\"Node {u} in sample_sets is not a sample.\")\n            topology_counter[u] = TopologyCounter.from_sample(sample_set_index)\n\n    for u in tree.nodes(order=\"postorder\"):\n        children = []\n        for v in tree.children(u):\n            if topology_counter[v] is not None:\n                children.append(topology_counter[v])\n        if len(children) > 0:\n            topology_counter[u] = combine_child_topologies(children)\n\n    counters = []\n    for root in tree.roots:\n        if topology_counter[root] is not None:\n            counters.append(topology_counter[root])\n    return TopologyCounter.merge(counters)\n\n\ndef combine_child_topologies(topology_counters):\n    \"\"\"\n    Select all combinations of topologies from different\n    counters in ``topology_counters`` that are capable of\n    being combined into a single topology. This includes\n    any combination of at least two topologies, all from\n    different children, where no topologies share a\n    sample set index.\n    \"\"\"\n    partial_topologies = PartialTopologyCounter()\n    for tc in topology_counters:\n        partial_topologies.add_sibling_topologies(tc)\n\n    return partial_topologies.join_all_combinations()\n\n\nclass TopologyCounter:\n    \"\"\"\n    Contains the distributions of embedded topologies for every combination\n    of the sample sets used to generate the ``TopologyCounter``. It is\n    indexable by a combination of sample set indexes and returns a\n    ``collections.Counter`` whose keys are topology ranks\n    (see :ref:`sec_tree_ranks`). See :meth:`Tree.count_topologies` for more\n    detail on how this structure is used.\n    \"\"\"\n\n    def __init__(self):\n        self.topologies = collections.defaultdict(collections.Counter)\n\n    def __getitem__(self, sample_set_indexes):\n        k = TopologyCounter._to_key(sample_set_indexes)\n        return self.topologies[k]\n\n    def __setitem__(self, sample_set_indexes, counter):\n        k = TopologyCounter._to_key(sample_set_indexes)\n        self.topologies[k] = counter\n\n    def __iter__(self):\n        raise TypeError(\n            \"TopologyCounter object is not iterable, iterate over '.topologies' instead\"\n        )\n\n    @staticmethod\n    def _to_key(sample_set_indexes):\n        if not isinstance(sample_set_indexes, collections.abc.Iterable):\n            sample_set_indexes = (sample_set_indexes,)\n        return tuple(sorted(sample_set_indexes))\n\n    def __eq__(self, other):\n        return self.__class__ == other.__class__ and self.topologies == other.topologies\n\n    @staticmethod\n    def merge(topology_counters):\n        \"\"\"\n        Union together independent topology counters into one.\n        \"\"\"\n        total = TopologyCounter()\n        for tc in topology_counters:\n            for k, v in tc.topologies.items():\n                total.topologies[k] += v\n\n        return total\n\n    @staticmethod\n    def from_sample(sample_set_index):\n        \"\"\"\n        Generate the topologies covered by a single sample. This\n        is the single-leaf topology representing the single sample\n        set.\n        \"\"\"\n        rank_tree = RankTree(children=[], label=sample_set_index)\n        tc = TopologyCounter()\n        tc[sample_set_index][rank_tree.rank()] = 1\n        return tc\n\n\nclass PartialTopologyCounter:\n    \"\"\"\n    Represents the possible combinations of children under a node in a tree\n    and the combinations of embedded topologies that are rooted at the node.\n    This allows an efficient way of calculating which unique embedded\n    topologies arise by only every storing a given pairing of sibling topologies\n    once.\n    ``partials`` is a dictionary where a key is a tuple of sample set indexes,\n    and the value is a ``collections.Counter`` that counts combinations of\n    sibling topologies whose tips represent the sample sets in the key.\n    Each element of the counter is a homogeneous tuple where each element represents\n    a topology. The topology is itself a tuple of the sample set indexes in that\n    topology and the rank.\n    \"\"\"\n\n    def __init__(self):\n        self.partials = collections.defaultdict(collections.Counter)\n\n    def add_sibling_topologies(self, topology_counter):\n        \"\"\"\n        Combine each topology in the given TopologyCounter with every existing\n        combination of topologies whose sample set indexes are disjoint from the\n        topology from the counter. This also includes adding the topologies from\n        the counter without joining them to any existing combinations.\n        \"\"\"\n        merged = collections.defaultdict(collections.Counter)\n        for sample_set_indexes, topologies in topology_counter.topologies.items():\n            for rank, count in topologies.items():\n                topology = ((sample_set_indexes, rank),)\n                # Cross with existing topology combinations\n                for sibling_sample_set_indexes, siblings in self.partials.items():\n                    if isdisjoint(sample_set_indexes, sibling_sample_set_indexes):\n                        for sib_topologies, sib_count in siblings.items():\n                            merged_topologies = merge_tuple(sib_topologies, topology)\n                            merged_sample_set_indexes = merge_tuple(\n                                sibling_sample_set_indexes, sample_set_indexes\n                            )\n                            merged[merged_sample_set_indexes][merged_topologies] += (\n                                count * sib_count\n                            )\n                # Propagate without combining\n                merged[sample_set_indexes][topology] += count\n\n        for sample_set_indexes, counter in merged.items():\n            self.partials[sample_set_indexes] += counter\n\n    def join_all_combinations(self):\n        \"\"\"\n        For each pairing of child topologies, join them together into a new\n        tree and count the resulting topologies.\n        \"\"\"\n        topology_counter = TopologyCounter()\n        for sample_set_indexes, sibling_topologies in self.partials.items():\n            for topologies, count in sibling_topologies.items():\n                # A node must have at least two children\n                if len(topologies) >= 2:\n                    rank = PartialTopologyCounter.join_topologies(topologies)\n                    topology_counter[sample_set_indexes][rank] += count\n                else:\n                    # Pass on the single tree without adding a parent\n                    for _, rank in topologies:\n                        topology_counter[sample_set_indexes][rank] += count\n\n        return topology_counter\n\n    @staticmethod\n    def join_topologies(child_topologies):\n        children = []\n        for sample_set_indexes, rank in child_topologies:\n            n = len(sample_set_indexes)\n            t = RankTree.unrank(n, rank, list(sample_set_indexes))\n            children.append(t)\n        children.sort(key=RankTree.canonical_order)\n        return RankTree(children).rank()\n\n\ndef all_trees(num_leaves, span=1):\n    \"\"\"\n    Generates all unique leaf-labelled trees with ``num_leaves``\n    leaves. See :ref:`sec_combinatorics` on the details of this\n    enumeration. The leaf labels are selected from the set\n    ``[0, num_leaves)``. The times and labels on internal nodes are\n    chosen arbitrarily.\n\n    :param int num_leaves: The number of leaves of the tree to generate.\n    :param float span: The genomic span of each returned tree.\n    :rtype: tskit.Tree\n    \"\"\"\n    for rank_tree in RankTree.all_labelled_trees(num_leaves):\n        yield rank_tree.to_tsk_tree(span=span)\n\n\ndef all_tree_shapes(num_leaves, span=1):\n    \"\"\"\n    Generates all unique shapes of trees with ``num_leaves`` leaves.\n\n    :param int num_leaves: The number of leaves of the tree to generate.\n    :param float span: The genomic span of each returned tree.\n    :rtype: tskit.Tree\n    \"\"\"\n    for rank_tree in RankTree.all_unlabelled_trees(num_leaves):\n        default_labelling = rank_tree.label_unrank(0)\n        yield default_labelling.to_tsk_tree(span=span)\n\n\ndef all_tree_labellings(tree, span=1):\n    \"\"\"\n    Generates all unique labellings of the leaves of a\n    :class:`tskit.Tree`. Leaves are labelled from the set\n    ``[0, n)`` where ``n`` is the number of leaves of ``tree``.\n\n    :param tskit.Tree tree: The tree used to generate\n        labelled trees of the same shape.\n    :param float span: The genomic span of each returned tree.\n    :rtype: tskit.Tree\n    \"\"\"\n    rank_tree = RankTree.from_tsk_tree(tree)\n    for labelling in RankTree.all_labellings(rank_tree):\n        yield labelling.to_tsk_tree(span=span)\n\n\nclass RankTree:\n    \"\"\"\n    A tree class that maintains the topological ranks of each node in the tree.\n    This structure can be used to efficiently compute the rank of a tree of\n    n labelled leaves and produce a tree given a rank.\n    \"\"\"\n\n    def __init__(self, children, label=None):\n        # Children are assumed to be sorted by RankTree.canonical_order\n        self.children = children\n        if len(children) == 0:\n            self.num_leaves = 1\n            self.labels = [label]\n        else:\n            self.num_leaves = sum(c.num_leaves for c in children)\n            self.labels = list(heapq.merge(*(c.labels for c in children)))\n\n        self._shape_rank = None\n        self._label_rank = None\n\n    def compute_shape_rank(self):\n        \"\"\"\n        Mirroring the way in which unlabelled trees are enumerated, we must\n        first calculate the number of trees whose partitions of number of leaves\n        rank lesser than this tree's partition.\n\n        Once we reach the partition of leaves in this tree, we examine the\n        groups of child subtrees assigned to subsequences of the partition.\n        For each group of children with the same number of leaves, k, the trees\n        in that group were selected according to a combination with replacement\n        of those trees from S(k). By finding the rank of that combination,\n        we find how many combinations preceded the current one in that group.\n        That rank is then multiplied by the total number of arrangements that\n        could be made in the following groups, added to the total rank,\n        and then we recur on the rest of the group and groups.\n        \"\"\"\n        part = self.leaf_partition()\n        total = 0\n        for prev_part in partitions(self.num_leaves):\n            if prev_part == part:\n                break\n            total += num_tree_pairings(prev_part)\n\n        child_groups = self.group_children_by_num_leaves()\n        next_child_idx = 0\n        for g in child_groups:\n            next_child_idx += len(g)\n            k = g[0].num_leaves\n            S_k = num_shapes(k)\n\n            child_ranks = [c.shape_rank() for c in g]\n            g_rank = Combination.with_replacement_rank(child_ranks, S_k)\n\n            # TODO precompute vector before loop\n            rest_part = part[next_child_idx:]\n            total_rest = num_tree_pairings(rest_part)\n\n            total += g_rank * total_rest\n\n        return total\n\n    def compute_label_rank(self):\n        \"\"\"\n        Again mirroring how we've labeled a particular tree, T, we can rank the\n        labelling on T.\n\n        We group the children into symmetric groups. In the context of labelling,\n        symmetric groups contain child trees that are of the same shape. Each\n        group contains a combination of labels selected from all the labels\n        available to T.\n\n        The different variables to consider are:\n        1. How to assign a combination of labels to the first group.\n        2. Given a combination of labels assigned to the group, how can we\n            distribute those labels to each tree in the group.\n        3. Given an assignment of the labels to each tree in the group, how many\n            distinct ways could all the trees in the group be labelled.\n\n        These steps for generating labelled trees break down the stages of\n        ranking them.\n        For each group G, we can find the rank of the combination of labels\n        assigned to G. This rank times the number of ways the trees in G\n        could be labelled, times the number of possible labellings of the\n        rest of the trees, gives the number of labellings that precede those with\n        the given combination of labels assigned to G. This process repeats and\n        breaks down to give the rank of the assignment of labels to trees in G,\n        and the label ranks of the trees themselves in G.\n        \"\"\"\n        all_labels = self.labels\n        child_groups = self.group_children_by_shape()\n        total = 0\n        for i, g in enumerate(child_groups):\n            rest_groups = child_groups[i + 1 :]\n            g_labels = list(heapq.merge(*(t.labels for t in g)))\n            num_rest_labellings = num_list_of_group_labellings(rest_groups)\n\n            # Preceded by all of the ways to label all the groups\n            # with a lower ranking combination given to g.\n            comb_rank = Combination.rank(g_labels, all_labels)\n            num_g_labellings = num_group_labellings(g)\n            preceding_comb = comb_rank * num_g_labellings * num_rest_labellings\n\n            # Preceded then by all the configurations of g ranking less than\n            # the current one\n            rank_from_g = group_rank(g) * num_rest_labellings\n\n            total += preceding_comb + rank_from_g\n            all_labels = set_minus(all_labels, g_labels)\n\n        return total\n\n    # TODO I think this would boost performance if it were a field and not\n    # recomputed.\n    def num_labellings(self):\n        child_groups = self.group_children_by_shape()\n        return num_list_of_group_labellings(child_groups)\n\n    def rank(self):\n        return Rank(self.shape_rank(), self.label_rank())\n\n    def shape_rank(self):\n        if self._shape_rank is None:\n            self._shape_rank = self.compute_shape_rank()\n        return self._shape_rank\n\n    def label_rank(self):\n        if self._label_rank is None:\n            assert self.shape_rank() is not None\n            self._label_rank = self.compute_label_rank()\n        return self._label_rank\n\n    @staticmethod\n    def unrank(num_leaves, rank, labels=None):\n        \"\"\"\n        Produce a ``RankTree`` of the given ``rank`` with ``num_leaves`` leaves,\n        labelled with ``labels``. Labels must be sorted, and if ``None`` default\n        to ``[0, num_leaves)``.\n        \"\"\"\n        shape_rank, label_rank = rank\n        if shape_rank < 0 or label_rank < 0:\n            raise ValueError(\"Rank is out of bounds.\")\n        unlabelled = RankTree.shape_unrank(num_leaves, shape_rank)\n        return unlabelled.label_unrank(label_rank, labels)\n\n    @staticmethod\n    def shape_unrank(n, shape_rank):\n        \"\"\"\n        Generate an unlabelled tree with n leaves with a shape corresponding to\n        the `shape_rank`.\n        \"\"\"\n        part, child_shape_ranks = children_shape_ranks(shape_rank, n)\n        children = [\n            RankTree.shape_unrank(k, rk) for k, rk in zip(part, child_shape_ranks)\n        ]\n\n        t = RankTree(children=children)\n        t._shape_rank = shape_rank\n        return t\n\n    def label_unrank(self, label_rank, labels=None):\n        \"\"\"\n        Generate a tree with the same shape, whose leaves are labelled\n        from ``labels`` with the labelling corresponding to ``label_rank``.\n        \"\"\"\n        if labels is None:\n            labels = list(range(self.num_leaves))\n\n        if self.is_leaf():\n            if label_rank != 0:\n                raise ValueError(\"Rank is out of bounds.\")\n            return RankTree(children=[], label=labels[0])\n\n        child_groups = self.group_children_by_shape()\n        child_labels, child_label_ranks = children_label_ranks(\n            child_groups, label_rank, labels\n        )\n\n        children = self.children\n        labelled_children = [\n            RankTree.label_unrank(c, c_rank, c_labels)\n            for c, c_rank, c_labels in zip(children, child_label_ranks, child_labels)\n        ]\n\n        t = RankTree(children=labelled_children)\n        t._shape_rank = self.shape_rank()\n        t._label_rank = label_rank\n        return t\n\n    @staticmethod\n    def canonical_order(c):\n        \"\"\"\n        Defines the canonical ordering of sibling subtrees.\n        \"\"\"\n        return c.num_leaves, c.shape_rank(), c.min_label()\n\n    @staticmethod\n    def from_tsk_tree_node(tree, u):\n        if tree.is_leaf(u):\n            return RankTree(children=[], label=u)\n\n        if tree.num_children(u) == 1:\n            raise ValueError(\"Cannot rank trees with unary nodes\")\n\n        children = list(\n            sorted(\n                (RankTree.from_tsk_tree_node(tree, c) for c in tree.children(u)),\n                key=RankTree.canonical_order,\n            )\n        )\n        return RankTree(children=children)\n\n    @staticmethod\n    def from_tsk_tree(tree):\n        if tree.num_roots != 1:\n            raise ValueError(\"Cannot rank trees with multiple roots\")\n\n        return RankTree.from_tsk_tree_node(tree, tree.root)\n\n    def to_tsk_tree(self, span=1, branch_length=1):\n        \"\"\"\n        Convert a ``RankTree`` into the only tree in a new tree sequence. Internal\n        nodes and their times are assigned via a postorder traversal of the tree.\n\n        :param float span: The genomic span of the returned tree. The tree will cover\n            the interval :math:`[0, span)` and the :attr:`~Tree.tree_sequence` from which\n            the tree is taken will have its :attr:`~tskit.TreeSequence.sequence_length`\n            equal to ``span``.\n        :param float branch_length: The minimum length of a branch in the returned\n            tree.\n        \"\"\"\n        if set(self.labels) != set(range(self.num_leaves)):\n            raise ValueError(\"Labels set must be equivalent to [0, num_leaves)\")\n\n        tables = tskit.TableCollection(span)\n\n        def add_node(node):\n            if node.is_leaf():\n                assert node.label is not None\n                return node.label\n\n            child_ids = [add_node(child) for child in node.children]\n            max_child_time = max(tables.nodes.time[c] for c in child_ids)\n            parent_id = tables.nodes.add_row(time=max_child_time + branch_length)\n            for child_id in child_ids:\n                tables.edges.add_row(0, span, parent_id, child_id)\n\n            return parent_id\n\n        for _ in range(self.num_leaves):\n            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)\n        add_node(self)\n\n        # The way in which we're inserting nodes doesn't necessarily\n        # adhere to the ordering constraint on edges, so we have\n        # to sort.\n        tables.sort()\n        return tables.tree_sequence().first()\n\n    @staticmethod\n    def all_labelled_trees(n):\n        \"\"\"\n        Generate all unordered, leaf-labelled trees with n leaves.\n        \"\"\"\n        for tree in RankTree.all_unlabelled_trees(n):\n            yield from RankTree.all_labellings(tree)\n\n    @staticmethod\n    def all_unlabelled_trees(n):\n        \"\"\"\n        Generate all tree shapes with n leaves. See :ref:`sec_combinatorics`\n        for how tree shapes are enumerated.\n        \"\"\"\n        if n == 1:\n            yield RankTree(children=[])\n        else:\n            for part in partitions(n):\n                for subtree_pairing in RankTree.all_subtree_pairings(\n                    group_partition(part)\n                ):\n                    yield RankTree(children=subtree_pairing)\n\n    @staticmethod\n    def all_subtree_pairings(grouped_part):\n        if len(grouped_part) == 0:\n            yield []\n        else:\n            g = grouped_part[0]\n            k = g[0]\n            all_k_leaf_trees = RankTree.all_unlabelled_trees(k)\n            num_k_leaf_trees = len(g)\n            g_trees = itertools.combinations_with_replacement(\n                all_k_leaf_trees, num_k_leaf_trees\n            )\n            for first_trees in g_trees:\n                for rest in RankTree.all_subtree_pairings(grouped_part[1:]):\n                    yield list(first_trees) + rest\n\n    @staticmethod\n    def all_labellings(tree, labels=None):\n        \"\"\"\n        Given a tree, generate all the unique labellings of that tree.\n        See :ref:`sec_combinatorics` for how labellings of a tree are\n        enumerated.\n        \"\"\"\n        if labels is None:\n            labels = list(range(tree.num_leaves))\n\n        if tree.is_leaf():\n            assert len(labels) == 1\n            yield RankTree(children=[], label=labels[0])\n        else:\n            groups = tree.group_children_by_shape()\n            for labeled_children in RankTree.label_all_groups(groups, labels):\n                yield RankTree(children=labeled_children)\n\n    @staticmethod\n    def label_all_groups(groups, labels):\n        if len(groups) == 0:\n            yield []\n        else:\n            g, rest = groups[0], groups[1:]\n            x = len(g)\n            k = g[0].num_leaves\n            for g_labels in itertools.combinations(labels, x * k):\n                rest_labels = set_minus(labels, g_labels)\n                for labeled_g in RankTree.label_tree_group(g, g_labels):\n                    for labeled_rest in RankTree.label_all_groups(rest, rest_labels):\n                        yield labeled_g + labeled_rest\n\n    @staticmethod\n    def label_tree_group(trees, labels):\n        if len(trees) == 0:\n            assert len(labels) == 0\n            yield []\n        else:\n            first, rest = trees[0], trees[1:]\n            k = first.num_leaves\n            min_label = labels[0]\n            for first_other_labels in itertools.combinations(labels[1:], k - 1):\n                first_labels = [min_label] + list(first_other_labels)\n                rest_labels = set_minus(labels, first_labels)\n                for labeled_first in RankTree.all_labellings(first, first_labels):\n                    for labeled_rest in RankTree.label_tree_group(rest, rest_labels):\n                        yield [labeled_first] + labeled_rest\n\n    def _newick(self):\n        if self.is_leaf():\n            return str(self.label) if self.labelled() else \"\"\n        return \"(\" + \",\".join(c._newick() for c in self.children) + \")\"\n\n    def newick(self):\n        return self._newick() + \";\"\n\n    @property\n    def label(self):\n        return self.labels[0]\n\n    def labelled(self):\n        return all(label is not None for label in self.labels)\n\n    def min_label(self):\n        return self.labels[0]\n\n    def is_leaf(self):\n        return len(self.children) == 0\n\n    def leaf_partition(self):\n        return [c.num_leaves for c in self.children]\n\n    def group_children_by_num_leaves(self):\n        def same_num_leaves(c1, c2):\n            return c1.num_leaves == c2.num_leaves\n\n        return group_by(self.children, same_num_leaves)\n\n    def group_children_by_shape(self):\n        def same_shape(c1, c2):\n            return c1.num_leaves == c2.num_leaves and c1.shape_rank() == c2.shape_rank()\n\n        return group_by(self.children, same_shape)\n\n    def __eq__(self, other):\n        if self.__class__ != other.__class__:\n            return False\n\n        if self.is_leaf() and other.is_leaf():\n            return self.label == other.label\n\n        if len(self.children) != len(other.children):\n            return False\n\n        return all(c1 == c2 for c1, c2 in zip(self.children, other.children))\n\n    def __ne__(self, other):\n        return not self.__eq__(other)\n\n    def shape_equal(self, other):\n        if self.is_leaf() and other.is_leaf():\n            return True\n\n        if len(self.children) != len(other.children):\n            return False\n\n        return all(c1.shape_equal(c2) for c1, c2 in zip(self.children, other.children))\n\n    def is_canonical(self):\n        if self.is_leaf():\n            return True\n\n        children = self.children\n        for c1, c2 in zip(children, children[1:]):\n            if RankTree.canonical_order(c1) > RankTree.canonical_order(c2):\n                return False\n        return all(c.is_canonical() for c in children)\n\n    def is_symmetrical(self):\n        if self.is_leaf():\n            return True\n\n        even_split_leaves = len(set(self.leaf_partition())) == 1\n        all_same_rank = len({c.shape_rank() for c in self.children}) == 1\n\n        return even_split_leaves and all_same_rank\n\n\n# TODO This is called repeatedly in ranking and unranking and has a perfect\n# subtructure for DP. It's only every called on n in [0, num_leaves]\n# so we should compute a vector of those results up front instead of using\n# repeated calls to this function.\n# Put an lru_cache on for now as a quick replacement (cuts test time down by 80%)\n@functools.lru_cache\ndef num_shapes(n):\n    \"\"\"\n    The cardinality of the set of unlabelled trees with n leaves,\n    up to isomorphism.\n    \"\"\"\n    if n <= 1:\n        return n\n    return sum(num_tree_pairings(part) for part in partitions(n))\n\n\ndef num_tree_pairings(part):\n    \"\"\"\n    The number of unique tree shapes that could be assembled from\n    a given partition of leaves. If we group the elements of the partition\n    by number of leaves, each group can be independently enumerated and the\n    cardinalities of each group's pairings can be multiplied. Within a group,\n    subsequent trees must have equivalent or greater rank, so the number of\n    ways to select trees follows combinations with replacement from the set\n    of all possible trees for that group.\n    \"\"\"\n    total = 1\n    for g in group_partition(part):\n        k = g[0]\n        total *= Combination.comb_with_replacement(num_shapes(k), len(g))\n    return total\n\n\ndef num_labellings(n, shape_rk):\n    return RankTree.shape_unrank(n, shape_rk).num_labellings()\n\n\ndef children_shape_ranks(rank, n):\n    \"\"\"\n    Return the partition of leaves associated\n    with the children of the tree of rank `rank`, and\n    the ranks of each child tree.\n    \"\"\"\n    part = []\n    for prev_part in partitions(n):\n        num_trees_with_part = num_tree_pairings(prev_part)\n        if rank < num_trees_with_part:\n            part = prev_part\n            break\n        rank -= num_trees_with_part\n    else:\n        if n != 1:\n            raise ValueError(\"Rank is out of bounds.\")\n\n    grouped_part = group_partition(part)\n    child_ranks = []\n    next_child = 0\n    for g in grouped_part:\n        next_child += len(g)\n        k = g[0]\n\n        # TODO precompute vector up front\n        rest_children = part[next_child:]\n        rest_num_pairings = num_tree_pairings(rest_children)\n\n        shapes_comb_rank = rank // rest_num_pairings\n        g_shape_ranks = Combination.with_replacement_unrank(\n            shapes_comb_rank, num_shapes(k), len(g)\n        )\n        child_ranks += g_shape_ranks\n        rank %= rest_num_pairings\n\n    return part, child_ranks\n\n\ndef children_label_ranks(child_groups, rank, labels):\n    \"\"\"\n    Produces the subsets of labels assigned to each child\n    and the associated label rank of each child.\n    \"\"\"\n    child_labels = []\n    child_label_ranks = []\n\n    for i, g in enumerate(child_groups):\n        k = g[0].num_leaves\n        g_num_leaves = k * len(g)\n        num_g_labellings = num_group_labellings(g)\n        # TODO precompute vector of partial products outside of loop\n        rest_groups = child_groups[i + 1 :]\n        num_rest_labellings = num_list_of_group_labellings(rest_groups)\n\n        num_labellings_per_label_comb = num_g_labellings * num_rest_labellings\n        comb_rank = rank // num_labellings_per_label_comb\n        rank_given_label_comb = rank % num_labellings_per_label_comb\n        g_rank = rank_given_label_comb // num_rest_labellings\n\n        g_labels = Combination.unrank(comb_rank, labels, g_num_leaves)\n\n        g_child_labels, g_child_ranks = group_label_ranks(g_rank, g, g_labels)\n        child_labels += g_child_labels\n        child_label_ranks += g_child_ranks\n\n        labels = set_minus(labels, g_labels)\n        rank %= num_rest_labellings\n\n    return child_labels, child_label_ranks\n\n\ndef group_rank(g):\n    k = g[0].num_leaves\n    n = len(g) * k\n    # Num ways to label a single one of the trees\n    # We can do this once because all the trees in the group\n    # are of the same shape rank\n    y = g[0].num_labellings()\n    all_labels = list(heapq.merge(*(t.labels for t in g)))\n    rank = 0\n    for i, t in enumerate(g):\n        u_labels = t.labels\n        curr_trees = len(g) - i\n        # Kind of cheating here leaving the selection of min labels implicit\n        # because the rank of the comb without min labels is the same\n        comb_rank = Combination.rank(u_labels, all_labels)\n\n        # number of ways to distribute labels to rest leaves\n        num_rest_combs = 1\n        remaining_leaves = n - (i + 1) * k\n        for j in range(curr_trees - 1):\n            num_rest_combs *= Combination.comb(remaining_leaves - j * k - 1, k - 1)\n\n        preceding_combs = comb_rank * num_rest_combs * (y**curr_trees)\n        curr_comb = t.label_rank() * num_rest_combs * (y ** (curr_trees - 1))\n        rank += preceding_combs + curr_comb\n        all_labels = set_minus(all_labels, u_labels)\n    return rank\n\n\n# TODO This is only used in a few cases and mostly in a n^2 way. Would\n# be easy and useful to do this DP and produce a list of partial products\ndef num_list_of_group_labellings(groups):\n    \"\"\"\n    Given a set of labels and a list of groups, how many unique ways are there\n    to assign subsets of labels to each group in the list and subsequently\n    label all the trees in all the groups.\n    \"\"\"\n    remaining_leaves = sum(len(g) * g[0].num_leaves for g in groups)\n    total = 1\n    for g in groups:\n        k = g[0].num_leaves\n        x = len(g)\n        num_label_choices = Combination.comb(remaining_leaves, x * k)\n        total *= num_label_choices * num_group_labellings(g)\n        remaining_leaves -= x * k\n\n    return total\n\n\ndef num_group_labellings(g):\n    \"\"\"\n    Given a particular set of labels, how many unique ways are there\n    to assign subsets of labels to each tree in the group and subsequently\n    label those trees.\n    \"\"\"\n    # Shortcut because all the trees are identical and can therefore\n    # be labelled in the same ways\n    num_tree_labelings = g[0].num_labellings() ** len(g)\n    return num_assignments_in_group(g) * num_tree_labelings\n\n\ndef num_assignments_in_group(g):\n    \"\"\"\n    Given this group of identical trees, how many unique ways\n    are there to divide up a set of n labels?\n    \"\"\"\n    n = sum(t.num_leaves for t in g)\n    total = 1\n    for t in g:\n        k = t.num_leaves\n        # Choose k - 1 from n - 1 because the minimum label must be\n        # assigned to the first tree for a canonical labelling.\n        total *= Combination.comb(n - 1, k - 1)\n        n -= k\n    return total\n\n\ndef group_label_ranks(rank, child_group, labels):\n    \"\"\"\n    Given a group of trees of the same shape, a label rank and list of labels,\n    produce assignment of label subsets to each tree in the group and the\n    label rank of each tree.\n    \"\"\"\n    child_labels = []\n    child_label_ranks = []\n\n    for i, rank_tree in enumerate(child_group):\n        k = rank_tree.num_leaves\n        num_t_labellings = rank_tree.num_labellings()\n        rest_trees = child_group[i + 1 :]\n        num_rest_assignments = num_assignments_in_group(rest_trees)\n        num_rest_labellings = num_rest_assignments * (\n            num_t_labellings ** len(rest_trees)\n        )\n        num_labellings_per_label_comb = num_t_labellings * num_rest_labellings\n\n        comb_rank = rank // num_labellings_per_label_comb\n        rank_given_comb = rank % num_labellings_per_label_comb\n        t_rank = rank_given_comb // num_rest_labellings\n        rank %= num_rest_labellings\n\n        min_label = labels[0]\n        t_labels = [min_label] + Combination.unrank(comb_rank, labels[1:], k - 1)\n        labels = set_minus(labels, t_labels)\n\n        child_labels.append(t_labels)\n        child_label_ranks.append(t_rank)\n\n    return child_labels, child_label_ranks\n\n\nclass Combination:\n    @staticmethod\n    def comb(n, k):\n        \"\"\"\n        The number of times you can select k items from\n        n items without order and without replacement.\n\n        FIXME: This function will be available in `math` in Python 3.8\n        and should be replaced eventually.\n        \"\"\"\n        k = min(k, n - k)\n        res = 1\n        for i in range(1, k + 1):\n            res *= n - k + i\n            res //= i\n\n        return res\n\n    @staticmethod\n    def comb_with_replacement(n, k):\n        \"\"\"\n        Also called multichoose, the number of times you can select\n        k items from n items without order but *with* replacement.\n        \"\"\"\n        return Combination.comb(n + k - 1, k)\n\n    @staticmethod\n    def rank(combination, elements):\n        \"\"\"\n        Find the combination of k elements from the given set of elements\n        with the given rank in a lexicographic ordering.\n        \"\"\"\n        indices = [elements.index(x) for x in combination]\n        return Combination.from_range_rank(indices, len(elements))\n\n    @staticmethod\n    def from_range_rank(combination, n):\n        \"\"\"\n        Find the combination of k integers from [0, n)\n        with the given rank in a lexicographic ordering.\n        \"\"\"\n        k = len(combination)\n        if k == 0 or k == n:\n            return 0\n\n        j = combination[0]\n        combination = [x - 1 for x in combination]\n        if j == 0:\n            return Combination.from_range_rank(combination[1:], n - 1)\n\n        first_rank = Combination.comb(n - 1, k - 1)\n        rest_rank = Combination.from_range_rank(combination, n - 1)\n        return first_rank + rest_rank\n\n    @staticmethod\n    def unrank(rank, elements, k):\n        n = len(elements)\n        if k == 0:\n            return []\n        if len(elements) == 0:\n            raise ValueError(\"Rank is out of bounds.\")\n\n        n_rest_combs = Combination.comb(n - 1, k - 1)\n        if rank < n_rest_combs:\n            return elements[:1] + Combination.unrank(rank, elements[1:], k - 1)\n\n        return Combination.unrank(rank - n_rest_combs, elements[1:], k)\n\n    @staticmethod\n    def with_replacement_rank(combination, n):\n        \"\"\"\n        Find the rank of ``combination`` in the lexicographic ordering of\n        combinations with replacement of integers from [0, n).\n        \"\"\"\n        k = len(combination)\n        if k == 0:\n            return 0\n        j = combination[0]\n        if k == 1:\n            return j\n\n        if j == 0:\n            return Combination.with_replacement_rank(combination[1:], n)\n\n        rest = [x - j for x in combination[1:]]\n        preceding = 0\n        for i in range(j):\n            preceding += Combination.comb_with_replacement(n - i, k - 1)\n        return preceding + Combination.with_replacement_rank(rest, n - j)\n\n    @staticmethod\n    def with_replacement_unrank(rank, n, k):\n        \"\"\"\n        Find the combination with replacement of k integers from [0, n)\n        with the given rank in a lexicographic ordering.\n        \"\"\"\n        if k == 0:\n            return []\n\n        i = 0\n        preceding = Combination.comb_with_replacement(n, k - 1)\n        while rank >= preceding:\n            rank -= preceding\n            i += 1\n            preceding = Combination.comb_with_replacement(n - i, k - 1)\n\n        rest = Combination.with_replacement_unrank(rank, n - i, k - 1)\n        return [i] + [x + i for x in rest]\n\n\ndef set_minus(arr, subset):\n    return [x for x in arr if x not in set(subset)]\n\n\n# TODO I think we can use part-count form everywhere. Right now\n# there's a janky work-around of grouping the partition when\n# we needed in part-count form but it doesn't look like there's any\n# place that can't just accept it from the start.\ndef partitions(n):\n    \"\"\"\n    Ascending integer partitions of n, excluding the partition [n].\n    Since trees with unary nodes are uncountable, the partition of\n    leaves must be at least size two.\n    \"\"\"\n    if n > 0:\n        # last partition is guaranteed to be length 1.\n        yield from itertools.takewhile(lambda a: len(a) > 1, rule_asc(n))\n\n\ndef rule_asc(n):\n    \"\"\"\n    Produce the integer partitions of n as ascending compositions.\n    See: http://jeromekelleher.net/generating-integer-partitions.html\n    \"\"\"\n    a = [0 for _ in range(n + 1)]\n    k = 1\n    a[1] = n\n    while k != 0:\n        x = a[k - 1] + 1\n        y = a[k] - 1\n        k -= 1\n        while x <= y:\n            a[k] = x\n            y -= x\n            k += 1\n        a[k] = x + y\n        yield a[: k + 1]\n\n\ndef group_by(values, equal):\n    groups = []\n    curr_group = []\n    for x in values:\n        if len(curr_group) == 0 or equal(x, curr_group[0]):\n            curr_group.append(x)\n        else:\n            groups.append(curr_group)\n            curr_group = [x]\n\n    if len(curr_group) != 0:\n        groups.append(curr_group)\n    return groups\n\n\ndef group_partition(part):\n    return group_by(part, lambda x, y: x == y)\n\n\ndef merge_tuple(tup1, tup2):\n    return tuple(heapq.merge(tup1, tup2))\n\n\ndef isdisjoint(iterable1, iterable2):\n    return set(iterable1).isdisjoint(iterable2)\n"
  },
  {
    "path": "python/tskit/drawing.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2015-2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nModule responsible for visualisations.\n\"\"\"\n\nimport collections\nimport itertools\nimport logging\nimport math\nimport numbers\nimport operator\nimport warnings\nimport xml.dom.minidom\nfrom collections.abc import Mapping\nfrom dataclasses import dataclass\n\nimport numpy as np\n\nimport tskit\nimport tskit.util as util\nfrom _tskit import NODE_IS_SAMPLE, NULL\n\nLEFT = \"left\"\nRIGHT = \"right\"\nTOP = \"top\"\nBOTTOM = \"bottom\"\n\n# constants for whether to plot a tree in a tree sequence\nOMIT = 1\nLEFT_CLIP = 2\nRIGHT_CLIP = 4\nOMIT_MIDDLE = 8\n\n\n# Minimal SVG generation module to replace svgwrite for tskit visualization.\n# This implementation provides only the functionality needed for the visualization\n# code while maintaining the same API as svgwrite.\n\n\nclass Element:\n    def __init__(self, tag, **kwargs):\n        self.tag = tag\n        self.attrs = {}\n        self.children = []\n\n        # Process kwargs in alphabetical order\n        for key in sorted(kwargs.keys()):\n            value = kwargs[key]\n            # Handle class_ special case for class attribute\n            if key.endswith(\"_\"):\n                key = key[:-1]\n            key = key.replace(\"_\", \"-\")\n            self.attrs[key] = value\n\n    def __getitem__(self, key):\n        return self.attrs.get(key, \"\")\n\n    def __setitem__(self, key, value):\n        self.attrs[key] = value\n\n    def add(self, child):\n        self.children.append(child)\n        return child\n\n    def set_desc(self, **kwargs):\n        if \"title\" in kwargs:\n            title_elem = Element(\"title\")\n            title_elem.children.append(kwargs[\"title\"])\n            self.children.append(title_elem)\n        return self\n\n    def _attr_str(self):\n        result = []\n        for key, value in self.attrs.items():\n            if isinstance(value, (list, tuple)):\n                # Handle points lists (for polygon/polyline)\n                if key == \"points\":\n                    points_str = \" \".join(f\"{x},{y}\" for x, y in value)\n                    result.append(f'{key}=\"{points_str}\"')\n                else:\n                    result.append(f'{key}=\"{\" \".join(map(str, value))}\"')\n            else:\n                result.append(f'{key}=\"{value}\"')\n        return \" \".join(result)\n\n    def tostring(self):\n        stack = [(self, False)]\n        result = []\n\n        while stack:\n            elem, is_closing_tag = stack.pop()\n            if is_closing_tag:\n                result.append(f\"</{elem.tag}>\")\n                continue\n            attr_str = elem._attr_str()\n            start = f\"<{elem.tag}\"\n            if attr_str:\n                start += f\" {attr_str}\"\n            if not elem.children:\n                result.append(f\"{start}/>\")\n            else:\n                result.append(f\"{start}>\")\n                stack.append((elem, True))\n                for child in reversed(elem.children):\n                    if isinstance(child, Element):\n                        stack.append((child, False))\n                    else:\n                        result.append(str(child))\n\n        return \"\".join(result)\n\n\nclass Drawing:\n    def __init__(self, size=None, **kwargs):\n        kwargs = {\n            \"version\": \"1.1\",\n            \"xmlns\": \"http://www.w3.org/2000/svg\",\n            \"xmlns:ev\": \"http://www.w3.org/2001/xml-events\",\n            \"xmlns:xlink\": \"http://www.w3.org/1999/xlink\",\n            \"baseProfile\": \"full\",\n            **kwargs,\n        }\n        if size is not None:\n            kwargs[\"width\"] = size[0]\n            kwargs[\"height\"] = size[1]\n\n        self.root = Element(\"svg\", **kwargs)\n        self.root.add(\"\")  # First root elem is a blank preamble\n        self.defs = Element(\"defs\")\n        self.root.add(self.defs)\n\n    def add(self, element):\n        return self.root.add(element)\n\n    def g(self, **kwargs):\n        return Element(\"g\", **kwargs)\n\n    def rect(self, insert=None, size=None, **kwargs):\n        if insert:\n            kwargs[\"x\"] = insert[0]\n            kwargs[\"y\"] = insert[1]\n        if size:\n            kwargs[\"width\"] = size[0]\n            kwargs[\"height\"] = size[1]\n        return Element(\"rect\", **kwargs)\n\n    def circle(self, center=None, r=None, **kwargs):\n        if center:\n            kwargs[\"cx\"] = center[0]\n            kwargs[\"cy\"] = center[1]\n        if r:\n            kwargs[\"r\"] = r\n        return Element(\"circle\", **kwargs)\n\n    def line(self, start=None, end=None, **kwargs):\n        if start:\n            kwargs[\"x1\"] = start[0]\n            kwargs[\"y1\"] = start[1]\n        else:\n            kwargs[\"x1\"] = 0\n            kwargs[\"y1\"] = 0\n        if end:\n            kwargs[\"x2\"] = end[0]\n            kwargs[\"y2\"] = end[1]\n        else:\n            kwargs[\"x2\"] = 0  # pragma: not covered\n            kwargs[\"y2\"] = 0  # pragma: not covered\n        return Element(\"line\", **kwargs)\n\n    def polyline(self, points=None, **kwargs):\n        if points:\n            kwargs[\"points\"] = points\n        return Element(\"polyline\", **kwargs)\n\n    def polygon(self, points=None, **kwargs):\n        if points:\n            kwargs[\"points\"] = points\n        return Element(\"polygon\", **kwargs)\n\n    def path(self, d=None, **kwargs):\n        if isinstance(d, list):\n            # Convert path commands from tuples to string\n            path_str = \"\"\n            for cmd in d:\n                if isinstance(cmd, tuple) and len(cmd) >= 2:\n                    cmd_letter = cmd[0]\n                    # Handle nested tuples by flattening\n                    params = []\n                    for param in cmd[1:]:\n                        if isinstance(param, tuple):\n                            # Flatten tuple coordinates\n                            params.extend(str(p) for p in param)\n                        else:\n                            params.append(str(param))\n                    path_str += f\"{cmd_letter} {' '.join(params)} \"\n            kwargs[\"d\"] = path_str.strip()\n        elif d:\n            kwargs[\"d\"] = d\n        return Element(\"path\", **kwargs)\n\n    def text(self, text=None, **kwargs):\n        elem = Element(\"text\", **kwargs)\n        if text:\n            elem.children.append(text)\n        return elem\n\n    def style(self, content):\n        elem = Element(\"style\", type=\"text/css\")\n        if content:\n            # Use CDATA to avoid having to escape special characters in CSS\n            elem.children.append(f\"<![CDATA[{content}]]>\")\n        return elem\n\n    def tostring(self, pretty=False):\n        if pretty:\n            return xml.dom.minidom.parseString(self.root.tostring()).toprettyxml()\n        return self.root.tostring()\n\n    def saveas(self, path, pretty=False):\n        with open(path, \"w\", encoding=\"utf-8\") as f:\n            f.write(self.tostring(pretty=pretty))\n\n\n@dataclass\nclass Offsets:\n    \"Used when x_lim set, and displayed ts has been cut down by keep_intervals\"\n\n    tree: int = 0\n    site: int = 0\n    mutation: int = 0\n\n\n@dataclass(frozen=True)\nclass Timescaling:\n    \"Class used to transform the time axis\"\n\n    max_time: float\n    min_time: float\n    plot_min: float\n    plot_range: float\n    use_log_transform: bool\n\n    def __post_init__(self):\n        if self.plot_range < 0:\n            raise ValueError(\"Image size too small to allow space to plot tree\")\n        if self.use_log_transform:\n            if self.min_time < 0:\n                raise ValueError(\"Cannot use a log scale if there are negative times\")\n            super().__setattr__(\"transform\", self.log_transform)\n        else:\n            super().__setattr__(\"transform\", self.linear_transform)\n\n    def log_transform(self, y):\n        \"Standard log transform but allowing for values of 0 by adding 1\"\n        delta = 1 if self.min_time == 0 else 0\n        log_max = np.log(self.max_time + delta)\n        log_min = np.log(self.min_time + delta)\n        y_scale = self.plot_range / (log_max - log_min)\n        return self.plot_min - (np.log(y + delta) - log_min) * y_scale\n\n    def linear_transform(self, y):\n        y_scale = self.plot_range / (self.max_time - self.min_time)\n        return self.plot_min - (y - self.min_time) * y_scale\n\n    def with_offset(self, y_offset):\n        return Timescaling(\n            max_time=self.max_time,\n            min_time=self.min_time,\n            plot_min=self.plot_min + y_offset,\n            plot_range=self.plot_range,\n            use_log_transform=self.use_log_transform,\n        )\n\n\nclass SVGString(str):\n    \"A string containing an SVG representation\"\n\n    def _repr_svg_(self):\n        \"\"\"\n        Simply return the SVG string: called by jupyter notebooks to render trees.\n        \"\"\"\n        return self\n\n\ndef check_orientation(orientation):\n    if orientation is None:\n        orientation = TOP\n    else:\n        orientation = orientation.lower()\n        orientations = [LEFT, RIGHT, TOP, BOTTOM]\n        if orientation not in orientations:\n            raise ValueError(f\"Unknown orientiation: choose from {orientations}\")\n    return orientation\n\n\ndef check_max_time(max_time, allow_numeric=True):\n    if max_time is None:\n        max_time = \"tree\"\n    is_numeric = isinstance(max_time, numbers.Real)\n    if max_time not in [\"tree\", \"ts\"] and not allow_numeric:\n        raise ValueError(\"max_time must be 'tree' or 'ts'\")\n    if max_time not in [\"tree\", \"ts\"] and (allow_numeric and not is_numeric):\n        raise ValueError(\"max_time must be a numeric value or one of 'tree' or 'ts'\")\n    return max_time\n\n\ndef check_min_time(min_time, allow_numeric=True):\n    if min_time is None:\n        min_time = \"tree\"\n    if allow_numeric:\n        is_numeric = isinstance(min_time, numbers.Real)\n        if min_time not in [\"tree\", \"ts\"] and not is_numeric:\n            raise ValueError(\"min_time must be a numeric value or one of 'tree' or 'ts'\")\n    else:\n        if min_time not in [\"tree\", \"ts\"]:\n            raise ValueError(\"min_time must be 'tree' or 'ts'\")\n    return min_time\n\n\ndef check_time_scale(time_scale):\n    if time_scale is None:\n        time_scale = \"time\"\n    if time_scale not in [\"time\", \"log_time\", \"rank\"]:\n        raise ValueError(\"time_scale must be 'time', 'log_time' or 'rank'\")\n    return time_scale\n\n\ndef check_format(format):  # noqa A002\n    if format is None:\n        format = \"SVG\"  # noqa A001\n    fmt = format.lower()\n    supported_formats = [\"svg\", \"ascii\", \"unicode\"]\n    if fmt not in supported_formats:\n        raise ValueError(\n            \"Unknown format '{}'. Supported formats are {}\".format(\n                format, supported_formats\n            )\n        )\n    return fmt\n\n\ndef check_order(order):\n    \"\"\"\n    Checks the specified drawing order is valid and returns the corresponding\n    tree traversal order.\n    \"\"\"\n    if order is None:\n        order = \"minlex\"\n    traversal_orders = {\n        \"minlex\": \"minlex_postorder\",\n        \"tree\": \"postorder\",\n    }\n    # Silently accept a tree traversal order as a valid order, so we can\n    # call this check twice if necessary\n    if order in traversal_orders.values():\n        return order\n    if order not in traversal_orders:\n        raise ValueError(\n            f\"Unknown display order '{order}'. \"\n            f\"Supported orders are {list(traversal_orders.keys())}\"\n        )\n    return traversal_orders[order]\n\n\ndef check_x_scale(x_scale):\n    \"\"\"\n    Checks the specified x_scale is valid and sets default if None\n    \"\"\"\n    if x_scale is None:\n        x_scale = \"physical\"\n    x_scales = [\"physical\", \"treewise\"]\n    if x_scale not in x_scales:\n        raise ValueError(\n            f\"Unknown display x_scale '{x_scale}'. Supported orders are {x_scales}\"\n        )\n    return x_scale\n\n\ndef check_x_lim(x_lim, max_x):\n    \"\"\"\n    Checks the specified x_limits are valid and sets default if None.\n    \"\"\"\n    if x_lim is None:\n        x_lim = (None, None)\n    if len(x_lim) != 2:\n        raise ValueError(\"The x_lim parameter must be a list of length 2, or None\")\n    try:\n        if x_lim[0] is not None and x_lim[0] < 0:\n            raise ValueError(\"x_lim[0] cannot be negative\")\n        if x_lim[1] is not None and x_lim[1] > max_x:\n            raise ValueError(\"x_lim[1] cannot be greater than the sequence length\")\n        if x_lim[0] is not None and x_lim[1] is not None and x_lim[0] >= x_lim[1]:\n            raise ValueError(\"x_lim[0] must be less than x_lim[1]\")\n    except TypeError:\n        raise TypeError(\"x_lim parameters must be numeric\")\n    return x_lim\n\n\ndef check_y_axis(y_axis):\n    \"\"\"\n    Checks the specified y_axis is valid and sets default if None.\n    \"\"\"\n    if y_axis is None:\n        y_axis = False\n    if y_axis is True:\n        y_axis = \"left\"\n    if y_axis not in [\"left\", \"right\", False]:\n        raise ValueError(f\"Unknown y_axis specification: '{y_axis}'.\")\n    return y_axis\n\n\ndef create_tick_labels(tick_values, decimal_places=2):\n    \"\"\"\n    If tick_values are numeric, round the labels to X decimal_places, but do not print\n    decimals if all values are integers\n    \"\"\"\n    try:\n        integer_ticks = np.all(np.round(tick_values) == tick_values)\n    except TypeError:\n        return tick_values\n    label_precision = 0 if integer_ticks else decimal_places\n    return [f\"{lab:.{label_precision}f}\" for lab in tick_values]\n\n\ndef clip_ts(ts, x_min, x_max, max_num_trees=None):\n    \"\"\"\n    Culls the edges of the tree sequence outside the limits of x_min and x_max if\n    necessary, and flags internal trees for omission if there are more than\n    max_num_trees in the tree sequence\n\n    Returns the new tree sequence using the same genomic scale, and an\n    array specifying which trees to actually plot from it. This array contains\n    information about whether a plotted tree was clipped, because clipping can\n    cause the rightmost and leftmost tree in this new TS to have reduced spans, and\n    should be displayed by omitting the appropriate breakpoint.\n\n    If x_min is None, we take it to be 0 if the first tree has edges or sites, or\n    ``min(edges.left)`` if the first tree represents an empty region.\n    Similarly, if x_max is None we take it to be ``ts.sequence_length`` if the last tree\n    has edges or mutations, or ``ts.last().interval.left`` if the last tree represents\n    an empty region.\n\n    To plot the full ts, including empty flanking regions, specify x_limits of\n    [0, seq_len].\n\n    \"\"\"\n    edges = ts.tables.edges\n    sites = ts.tables.sites\n    offsets = Offsets()\n    if x_min is None:\n        if ts.num_edges == 0:\n            if ts.num_sites == 0:\n                raise ValueError(\n                    \"To plot an empty tree sequence, specify x_lim=[0, sequence_length]\"\n                )\n            x_min = 0\n        else:\n            x_min = np.min(edges.left)\n            if ts.num_sites > 0 and np.min(sites.position) < x_min:\n                x_min = 0  # First region has no edges, but does have sites => keep\n    if x_max is None:\n        if ts.num_edges == 0:\n            if ts.num_sites == 0:\n                raise ValueError(\n                    \"To plot an empty tree sequence, specify x_lim=[0, sequence_length]\"\n                )\n            x_max = ts.sequence_length\n        else:\n            x_max = np.max(edges.right)\n            if ts.num_sites > 0 and np.max(sites.position) > x_max:\n                x_max = ts.sequence_length  # Last region has sites but no edges => keep\n\n    if max_num_trees is None:\n        max_num_trees = np.inf\n\n    if max_num_trees < 2:\n        raise ValueError(\"Must show at least 2 trees when clipping a tree sequence\")\n\n    if (x_min > 0) or (x_max < ts.sequence_length):\n        old_breaks = ts.breakpoints(as_array=True)\n        offsets.tree = np.searchsorted(old_breaks, x_min, \"right\") - 2\n        offsets.site = np.searchsorted(sites.position, x_min)\n        offsets.mutation = np.searchsorted(ts.tables.mutations.site, offsets.site)\n        ts = ts.keep_intervals([[x_min, x_max]], simplify=False)\n        if ts.num_edges == 0:\n            raise ValueError(\n                f\"Can't limit plotting from {x_min} to {x_max} as whole region is empty\"\n            )\n        edges = ts.tables.edges\n        sites = ts.tables.sites\n        trees_start = np.min(edges.left)\n        trees_end = np.max(edges.right)\n        tree_status = np.zeros(ts.num_trees, dtype=np.uint8)\n        # Are the leftmost/rightmost regions completely empty - if so, don't plot them\n        if 0 < x_min <= trees_start and (\n            ts.num_sites == 0 or trees_start <= np.min(sites.position)\n        ):\n            tree_status[0] = OMIT\n        if trees_end <= x_max < ts.sequence_length and (\n            ts.num_sites == 0 or trees_end >= np.max(sites.position)\n        ):\n            tree_status[-1] = OMIT\n\n        # Which breakpoints are new ones, as a result of clipping\n        new_breaks = np.logical_not(np.isin(ts.breakpoints(as_array=True), old_breaks))\n        tree_status[new_breaks[:-1]] |= LEFT_CLIP\n        tree_status[new_breaks[1:]] |= RIGHT_CLIP\n    else:\n        tree_status = np.zeros(ts.num_trees, dtype=np.uint8)\n\n    first_tree = 1 if tree_status[0] & OMIT else 0\n    last_tree = ts.num_trees - 2 if tree_status[-1] & OMIT else ts.num_trees - 1\n    num_shown_trees = last_tree - first_tree + 1\n    if num_shown_trees > max_num_trees:\n        num_start_trees = max_num_trees // 2 + (1 if max_num_trees % 2 else 0)\n        num_end_trees = max_num_trees // 2\n        assert num_start_trees + num_end_trees == max_num_trees\n        tree_status[(first_tree + num_start_trees) : (last_tree - num_end_trees + 1)] = (\n            OMIT | OMIT_MIDDLE\n        )\n\n    return ts, tree_status, offsets\n\n\ndef check_y_ticks(ticks: list | Mapping | None) -> Mapping:\n    \"\"\"\n    Later we might want to implement a tick locator function, such that e.g. ticks=5\n    selects ~5 nicely spaced tick locations (with sensible behaviour for log scales)\n    \"\"\"\n    if ticks is None:\n        return {}\n    if isinstance(ticks, Mapping):\n        return dict(zip(ticks, create_tick_labels(list(ticks.values()))))\n    return dict(zip(ticks, create_tick_labels(ticks)))\n\n\ndef rnd(x):\n    \"\"\"\n    Round a number so that the output SVG doesn't have unneeded precision\n    \"\"\"\n    digits = 6\n    if x == 0 or not math.isfinite(x):\n        return x\n    digits -= math.ceil(math.log10(abs(x)))\n    x = round(x, digits)\n    if int(x) == x:\n        return int(x)\n    return x\n\n\ndef bold_integer(number):\n    # For simple integers, it's easier to use bold unicode characters\n    # than to try to get the SVG to render a bold font for part of a string\n    return \"\".join(\"𝟎𝟏𝟐𝟑𝟒𝟓𝟔𝟕𝟖𝟗\"[int(digit)] for digit in str(number))\n\n\ndef edge_and_sample_nodes(ts, omit_regions=None):\n    \"\"\"\n    Return ids of nodes which are mentioned in an edge in this tree sequence or which\n    are samples: nodes not connected to an edge are often found if x_lim is specified.\n    \"\"\"\n    if omit_regions is None or len(omit_regions) == 0:\n        ids = np.concatenate((ts.edges_child, ts.edges_parent))\n    else:\n        ids = np.array([], dtype=ts.edges_child.dtype)\n        edges = ts.tables.edges\n        assert omit_regions.shape[1] == 2\n        omit_regions = omit_regions.flatten()\n        assert np.all(omit_regions == np.unique(omit_regions))  # Check they're in order\n        use_regions = np.concatenate(([0.0], omit_regions, [ts.sequence_length]))\n        use_regions = use_regions.reshape(-1, 2)\n        for left, right in use_regions:\n            used_edges = edges[np.logical_and(edges.left >= left, edges.right < right)]\n            ids = np.concatenate((ids, used_edges.child, used_edges.parent))\n    return np.unique(np.concatenate((ids, np.where(ts.nodes_flags & NODE_IS_SAMPLE)[0])))\n\n\ndef _postorder_tracked_node_traversal(tree, root, collapse_tracked, key_dict=None):\n    # Postorder traversal that only descends into subtrees if they contain\n    # a tracked node. Additionally, if collapse_tracked is not None, it is\n    # interpreted as a proportion, so that we do not descend into a subtree if\n    # that proportion or greater of the samples in the subtree are tracked.\n    # If key_dict is provided, use this to sort the children. This allows\n    # us to put e.g. the subtrees containing the most tracked nodes first.\n    # Private function, for use only in drawing.postorder_tracked_minlex_traversal()\n\n    # If we deliberately specify the virtual root, it should also be returned\n    is_virtual_root = root == tree.virtual_root\n    if root == tskit.NULL:\n        root = tree.virtual_root\n    stack = [(root, False)]\n    while stack:\n        u, visited = stack.pop()\n        if visited:\n            if u != tree.virtual_root or is_virtual_root:\n                yield u\n        else:\n            if tree.num_children(u) == 0:\n                yield u\n            elif tree.num_tracked_samples(u) == 0:\n                yield u\n            elif (\n                collapse_tracked is not None\n                and tree.num_children(u) != 1\n                and tree.num_tracked_samples(u) >= collapse_tracked * tree.num_samples(u)\n            ):\n                yield u\n            else:\n                stack.append((u, True))\n                if key_dict is None:\n                    stack.extend((c, False) for c in tree.children(u))\n                else:\n                    stack.extend(\n                        sorted(\n                            ((c, False) for c in tree.children(u)),\n                            key=lambda v: key_dict[v[0]],\n                            reverse=True,\n                        )\n                    )\n\n\ndef _postorder_tracked_minlex_traversal(tree, root=None, *, collapse_tracked=None):\n    \"\"\"\n    Postorder traversal for drawing purposes that places child nodes with the\n    most tracked sample descendants first (then sorts ties by minlex on leaf node ids).\n    Additionally, this traversal only descends into subtrees if they contain a tracked\n    node, and may not descend into other subtree, if the ``collapse_tracked``\n    parameter is set to a numeric value. More specifically, if the proportion of\n    tracked samples in the subtree is greater than or equal to ``collapse_tracked``,\n    the subtree is not descended into.\n    \"\"\"\n\n    key_dict = {}\n    parent_array = tree.parent_array\n    prev = tree.virtual_root\n    if root is None:\n        root = tskit.NULL\n    for u in _postorder_tracked_node_traversal(tree, root, collapse_tracked):\n        is_tip = parent_array[prev] != u\n        prev = u\n        if is_tip:\n            # Sort by number of tracked samples (desc), then by minlex\n            key_dict[u] = (-tree.num_tracked_samples(u), u)\n        else:\n            min_tip_id = min(key_dict[v][1] for v in tree.children(u) if v in key_dict)\n            key_dict[u] = (-tree.num_tracked_samples(u), min_tip_id)\n\n    return _postorder_tracked_node_traversal(\n        tree, root, collapse_tracked, key_dict=key_dict\n    )\n\n\ndef draw_tree(\n    tree,\n    width=None,\n    height=None,\n    node_labels=None,\n    node_colours=None,\n    mutation_labels=None,\n    mutation_colours=None,\n    format=None,  # noqa A002\n    edge_colours=None,\n    time_scale=None,\n    tree_height_scale=None,\n    max_time=None,\n    min_time=None,\n    max_tree_height=None,\n    order=None,\n    omit_sites=None,\n):\n    if time_scale is None and tree_height_scale is not None:\n        time_scale = tree_height_scale\n        # Deprecated in 0.3.6\n        warnings.warn(\n            \"tree_height_scale is deprecated; use time_scale instead\",\n            FutureWarning,\n            stacklevel=4,\n        )\n    if max_time is None and max_tree_height is not None:\n        max_time = max_tree_height\n        # Deprecated in 0.3.6\n        warnings.warn(\n            \"max_tree_height is deprecated; use max_time instead\",\n            FutureWarning,\n            stacklevel=4,\n        )\n\n    # See tree.draw() for documentation on these arguments.\n    fmt = check_format(format)\n    if fmt == \"svg\":\n        if width is None:\n            width = 200\n        if height is None:\n            height = 200\n\n        def remap_style(original_map, new_key, none_value):\n            if original_map is None:\n                return None\n            new_map = {}\n            for key, value in original_map.items():\n                if value is None:\n                    new_map[key] = {\"style\": none_value}\n                else:\n                    new_map[key] = {\"style\": f\"{new_key}:{value};\"}\n            return new_map\n\n        # Set style rather than fill & stroke directly to override top stylesheet\n        # Old semantics were to not draw the node if colour is None.\n        # Setting opacity to zero has the same effect.\n        node_attrs = remap_style(node_colours, \"fill\", \"fill-opacity:0;\")\n        edge_attrs = remap_style(edge_colours, \"stroke\", \"stroke-opacity:0;\")\n        mutation_attrs = remap_style(mutation_colours, \"fill\", \"fill-opacity:0;\")\n\n        node_label_attrs = None\n        tree = SvgTree(\n            tree,\n            (width, height),\n            node_labels=node_labels,\n            mutation_labels=mutation_labels,\n            time_scale=time_scale,\n            max_time=max_time,\n            min_time=min_time,\n            node_attrs=node_attrs,\n            edge_attrs=edge_attrs,\n            node_label_attrs=node_label_attrs,\n            mutation_attrs=mutation_attrs,\n            order=order,\n            omit_sites=omit_sites,\n        )\n        return SVGString(tree.drawing.tostring())\n\n    else:\n        if width is not None:\n            raise ValueError(\"Text trees do not support width\")\n        if height is not None:\n            raise ValueError(\"Text trees do not support height\")\n        if mutation_labels is not None:\n            raise ValueError(\"Text trees do not support mutation_labels\")\n        if mutation_colours is not None:\n            raise ValueError(\"Text trees do not support mutation_colours\")\n        if node_colours is not None:\n            raise ValueError(\"Text trees do not support node_colours\")\n        if edge_colours is not None:\n            raise ValueError(\"Text trees do not support edge_colours\")\n        if time_scale is not None:\n            raise ValueError(\"Text trees do not support time_scale\")\n\n        use_ascii = fmt == \"ascii\"\n        text_tree = VerticalTextTree(\n            tree,\n            node_labels=node_labels,\n            max_time=max_time,\n            min_time=min_time,\n            use_ascii=use_ascii,\n            orientation=TOP,\n            order=order,\n        )\n        return str(text_tree)\n\n\ndef add_class(attrs_dict, classes_str):\n    \"\"\"Adds the classes_str to the 'class' key in attrs_dict, or creates it\"\"\"\n    try:\n        attrs_dict[\"class\"] += \" \" + classes_str\n    except KeyError:\n        attrs_dict[\"class\"] = classes_str\n\n\n@dataclass\nclass Plotbox:\n    total_size: list\n    pad_top: float = 0\n    pad_left: float = 0\n    pad_bottom: float = 0\n    pad_right: float = 0\n\n    def set_padding(self, top, left, bottom, right):\n        self.pad_top = top\n        self.pad_left = left\n        self.pad_bottom = bottom\n        self.pad_right = right\n        self._check()\n\n    @property\n    def max_x(self):\n        return self.total_size[0]\n\n    @property\n    def max_y(self):\n        return self.total_size[1]\n\n    @property\n    def top(self):  # Alias for consistency with top & bottom\n        return self.pad_top\n\n    @property\n    def left(self):  # Alias for consistency with top & bottom\n        return self.pad_left\n\n    @property\n    def bottom(self):\n        return self.max_y - self.pad_bottom\n\n    @property\n    def right(self):\n        return self.max_x - self.pad_right\n\n    @property\n    def width(self):\n        return self.right - self.left\n\n    @property\n    def height(self):\n        return self.bottom - self.top\n\n    def __post_init__(self):\n        self._check()\n\n    def _check(self):\n        if self.width < 1 or self.height < 1:\n            raise ValueError(\"Image size too small to fit\")\n\n    def draw(self, dwg, add_to, colour=\"grey\"):\n        # used for debugging\n        add_to.add(\n            dwg.rect(\n                (0, 0),\n                (self.max_x, self.max_y),\n                fill=\"white\",\n                fill_opacity=0,\n                stroke=colour,\n                stroke_dasharray=\"15,15\",\n                class_=\"outer_plotbox\",\n            )\n        )\n        add_to.add(\n            dwg.rect(\n                (self.left, self.top),\n                (self.width, self.height),\n                fill=\"white\",\n                fill_opacity=0,\n                stroke=colour,\n                stroke_dasharray=\"5,5\",\n                class_=\"inner_plotbox\",\n            )\n        )\n\n\nclass SvgPlot:\n    \"\"\"\n    The base class for plotting any box to canvas\n    \"\"\"\n\n    text_height = 14  # May want to calculate this based on a font size\n    line_height = text_height * 1.2  # allowing padding above and below a line\n    default_width = 200  # for a single tree\n    default_height = 200\n\n    def __init__(\n        self,\n        size,\n        svg_class,\n        root_svg_attributes=None,\n        canvas_size=None,\n        preamble=None,\n    ):\n        \"\"\"\n        Creates self.drawing, an svgwrite.Drawing object for further use, and populates\n        it with a base group. The root_groups will be populated with\n        items that can be accessed from the outside, such as the plotbox, axes, etc.\n        \"\"\"\n\n        if root_svg_attributes is None:\n            root_svg_attributes = {}\n        if canvas_size is None:\n            canvas_size = size\n        dwg = Drawing(size=canvas_size, **root_svg_attributes)\n\n        self.preamble = preamble\n        self.image_size = size\n        self.plotbox = Plotbox(size)\n        self.root_groups = {}\n        self.svg_class = svg_class\n        self.timescaling = None\n        self.root_svg_attributes = root_svg_attributes\n        self.dwg_base = dwg.add(dwg.g(class_=svg_class))\n        self.drawing = dwg\n\n    def draw(self, path=None):\n        if self.preamble is not None:\n            self.drawing.root.children[0] = self.preamble\n        output = self.drawing.tostring()\n        if path is not None:\n            # TODO remove the 'pretty' when we are done debugging this.\n            self.drawing.saveas(path, pretty=True)\n        return SVGString(output)\n\n    def get_plotbox(self):\n        \"\"\"\n        Get the svgwrite plotbox, creating it if necessary.\n        \"\"\"\n        if \"plotbox\" not in self.root_groups:\n            dwg = self.drawing\n            self.root_groups[\"plotbox\"] = self.dwg_base.add(dwg.g(class_=\"plotbox\"))\n        return self.root_groups[\"plotbox\"]\n\n    def add_text_in_group(self, text, add_to, pos, group_class=None, **kwargs):\n        \"\"\"\n        Add the text to the elem within a group; allows text rotations to work smoothly,\n        otherwise, if x & y parameters are used to position text, rotations applied to\n        the text tag occur around the (0,0) point of the containing group\n        \"\"\"\n        dwg = self.drawing\n        group_attributes = {\"transform\": f\"translate({rnd(pos[0])} {rnd(pos[1])})\"}\n        if group_class is not None:\n            group_attributes[\"class_\"] = group_class\n        grp = add_to.add(dwg.g(**group_attributes))\n        grp.add(dwg.text(text, **kwargs))\n\n\nclass SvgSkippedPlot(SvgPlot):\n    def __init__(\n        self,\n        size,\n        num_skipped,\n    ):\n        super().__init__(\n            size,\n            svg_class=\"skipped\",\n        )\n        container = self.get_plotbox()\n        x = self.plotbox.width / 2\n        y = self.plotbox.height / 2\n        self.add_text_in_group(\n            f\"{num_skipped} trees\",\n            container,\n            (x, y - self.line_height / 2),\n            text_anchor=\"middle\",\n        )\n        self.add_text_in_group(\n            \"skipped\", container, (x, y + self.line_height / 2), text_anchor=\"middle\"\n        )\n\n\nclass SvgAxisPlot(SvgPlot):\n    \"\"\"\n    The class used for plotting either a tree or a tree sequence as an SVG file\n    \"\"\"\n\n    standard_style = (\n        \".background path {fill: #808080; fill-opacity: 0}\"\n        \".background path:nth-child(odd) {fill-opacity: .1}\"\n        \".x-regions rect {fill: yellow; stroke: black; opacity: 0.5}\"  # opaque 4 overlap\n        \".axes {font-size: 14px}\"\n        \".x-axis .tick .lab {font-weight: bold; dominant-baseline: hanging}\"\n        \".axes, .tree {font-size: 14px; text-anchor: middle}\"\n        \".axes line, .edge {stroke: black; fill: none}\"\n        \".axes .ax-skip {stroke-dasharray: 4}\"\n        \".y-axis .grid {stroke: #FAFAFA}\"\n        \".node > .sym {fill: black; stroke: none}\"\n        \".site > .sym {stroke: black}\"\n        \".mut text {fill: red; font-style: italic}\"\n        \".mut.extra text {fill: hotpink}\"\n        \".mut line {fill: none; stroke: none}\"  # Default hide mut line to expose edges\n        \".mut .sym {fill: none; stroke: red}\"\n        \".mut.extra .sym {stroke: hotpink}\"\n        \".node .mut .sym {stroke-width: 1.5px}\"\n        \".tree text, .tree-sequence text {dominant-baseline: central}\"\n        \".plotbox .lab.lft {text-anchor: end}\"\n        \".plotbox .lab.rgt {text-anchor: start}\"\n        \".polytomy line {stroke: black; stroke-dasharray: 1px, 1px}\"\n        \".polytomy text {paint-order:stroke;stroke-width:0.3em;stroke:white}\"\n    )\n\n    # TODO: we may want to make some of the constants below into parameters\n    root_branch_fraction = 1 / 8  # Rel root branch len, unless it has a timed mutation\n    default_tick_length = 5\n    default_tick_length_site = 10\n    # Placement of the axes lines within the padding - not used unless axis is plotted\n    default_x_axis_offset = 20\n    default_y_axis_offset = 40\n\n    def __init__(\n        self,\n        ts,\n        size,\n        root_svg_attributes,\n        style,\n        svg_class,\n        time_scale,\n        x_axis=None,\n        y_axis=None,\n        x_label=None,\n        y_label=None,\n        offsets=None,\n        debug_box=None,\n        omit_sites=None,\n        canvas_size=None,\n        mutation_titles=None,\n        preamble=None,\n    ):\n        super().__init__(\n            size,\n            svg_class,\n            root_svg_attributes,\n            canvas_size,\n            preamble=preamble,\n        )\n        self.ts = ts\n        dwg = self.drawing\n        # Put all styles in a single stylesheet (required for Inkscape 0.92)\n        style = self.standard_style + (\"\" if style is None else style)\n        dwg.defs.add(dwg.style(style))\n        self.debug_box = debug_box\n        self.time_scale = check_time_scale(time_scale)\n        self.y_axis = check_y_axis(y_axis)\n        self.x_axis = x_axis\n        if x_label is None and x_axis:\n            x_label = \"Genome position\"\n        if y_label is None and y_axis:\n            if time_scale == \"rank\":\n                y_label = \"Node time\"\n            else:\n                y_label = \"Time ago\"\n            if ts.time_units != tskit.TIME_UNITS_UNKNOWN:\n                y_label += f\" ({ts.time_units})\"\n        self.x_label = x_label\n        self.y_label = y_label\n        self.offsets = Offsets() if offsets is None else offsets\n        self.omit_sites = omit_sites\n        self.mutation_titles = {} if mutation_titles is None else mutation_titles\n        self.mutations_outside_tree = set()  # mutations in here get an additional class\n\n    def set_spacing(self, top=0, left=0, bottom=0, right=0):\n        \"\"\"\n        Set edges, but allow space for axes etc\n        \"\"\"\n        self.x_axis_offset = self.default_x_axis_offset\n        self.y_axis_offset = self.default_y_axis_offset\n        if self.x_label:\n            self.x_axis_offset += self.line_height\n        if self.y_label:\n            self.y_axis_offset += self.line_height\n        if self.x_axis:\n            bottom += self.x_axis_offset\n        if self.y_axis == \"left\":\n            left = (\n                self.y_axis_offset\n            )  # Override user-provided values, so y-axis is at x=0\n        if self.y_axis == \"right\":\n            right = self.y_axis_offset\n        self.plotbox.set_padding(top, left, bottom, right)\n        if self.debug_box:\n            self.root_groups[\"debug\"] = self.dwg_base.add(self.drawing.g(class_=\"debug\"))\n            self.plotbox.draw(self.drawing, self.root_groups[\"debug\"])\n\n    def get_axes(self):\n        if \"axes\" not in self.root_groups:\n            self.root_groups[\"axes\"] = self.dwg_base.add(self.drawing.g(class_=\"axes\"))\n        return self.root_groups[\"axes\"]\n\n    def draw_x_axis(\n        self,\n        tick_positions=None,  # np.array of ax ticks below (+ above if sites is None)\n        tick_labels=None,  # Tick labels below axis. If None, use the position value\n        tick_length_lower=default_tick_length,\n        tick_length_upper=None,  # If None, use the same as tick_length_lower\n        site_muts=None,  # A dict of site id => mutation to plot as ticks on the x axis\n        alternate_dash_positions=None,  # Where to alternate the axis from solid to dash\n        x_regions=None,  # A dict of (left, right):label items to place in boxes\n    ):\n        if not self.x_axis:\n            return\n        if alternate_dash_positions is None:\n            alternate_dash_positions = np.array([])\n        if x_regions is None:\n            x_regions = {}\n        dwg = self.drawing\n        axes = self.get_axes()\n        x_axis = axes.add(dwg.g(class_=\"x-axis\"))\n        if self.x_label:\n            self.add_text_in_group(\n                self.x_label,\n                x_axis,\n                pos=((self.plotbox.left + self.plotbox.right) / 2, self.plotbox.max_y),\n                group_class=\"title\",\n                class_=\"lab\",\n                transform=\"translate(0 -11)\",\n                text_anchor=\"middle\",\n            )\n        if len(x_regions) > 0:\n            regions_group = x_axis.add(dwg.g(class_=\"x-regions\"))\n            for i, ((left, right), label) in enumerate(x_regions.items()):\n                if not (0 <= left < right <= self.ts.sequence_length):\n                    raise ValueError(\n                        f\"Invalid coordinates ({left} to {right}) for x-axis region\"\n                    )\n                x1 = self.x_transform(left)\n                x2 = self.x_transform(right)\n                y = self.plotbox.max_y - self.x_axis_offset\n                region = regions_group.add(dwg.g(class_=f\"r{i}\"))\n                region.add(dwg.rect((x1, y), (x2 - x1, self.line_height), class_=\"r{i}\"))\n                self.add_text_in_group(\n                    label,\n                    region,\n                    pos=((x2 + x1) / 2, y + self.line_height / 2),\n                    class_=\"lab\",\n                    text_anchor=\"middle\",\n                )\n        if tick_length_upper is None:\n            tick_length_upper = tick_length_lower\n        y = rnd(self.plotbox.max_y - self.x_axis_offset)\n        dash_locs = np.concatenate(\n            (\n                [self.plotbox.left],\n                self.x_transform(alternate_dash_positions),\n                [self.plotbox.right],\n            )\n        )\n        for i, (x1, x2) in enumerate(zip(dash_locs[:-1], dash_locs[1:])):\n            x_axis.add(\n                dwg.line(\n                    (rnd(x1), y),\n                    (rnd(x2), y),\n                    class_=\"ax-skip\" if i % 2 else \"ax-line\",\n                )\n            )\n        if tick_positions is not None:\n            if tick_labels is None or isinstance(tick_labels, np.ndarray):\n                if tick_labels is None:\n                    tick_labels = tick_positions\n                tick_labels = create_tick_labels(tick_labels)  # format integers\n\n            upper_length = -tick_length_upper if site_muts is None else 0\n            ticks_group = x_axis.add(dwg.g(class_=\"ticks\"))\n            for pos, lab in itertools.zip_longest(tick_positions, tick_labels):\n                tick = ticks_group.add(\n                    dwg.g(\n                        class_=\"tick\",\n                        transform=f\"translate({rnd(self.x_transform(pos))} {y})\",\n                    )\n                )\n                tick.add(dwg.line((0, rnd(upper_length)), (0, rnd(tick_length_lower))))\n                self.add_text_in_group(\n                    lab,\n                    tick,\n                    class_=\"lab\",\n                    # place origin at the bottom of the tick plus a single px space\n                    pos=(0, tick_length_lower + 1),\n                )\n        if not self.omit_sites and site_muts is not None:\n            # Add sites as vertical lines with overlaid mutations as upper chevrons\n            for s_id, mutations in site_muts.items():\n                s = self.ts.site(s_id)\n                x = self.x_transform(s.position)\n                site = x_axis.add(\n                    dwg.g(\n                        class_=f\"site s{s.id + self.offsets.site}\",\n                        transform=f\"translate({rnd(x)} {y})\",\n                    )\n                )\n                site.add(dwg.line((0, 0), (0, rnd(-tick_length_upper)), class_=\"sym\"))\n                for i, m in enumerate(reversed(mutations)):\n                    mutation_class = f\"mut m{m.id + self.offsets.mutation}\"\n                    if m.id in self.mutations_outside_tree:\n                        mutation_class += \" extra\"\n                    mut = dwg.g(class_=mutation_class)\n                    h = -i * 4 - 1.5\n                    w = tick_length_upper / 4\n                    # Chevron symbol\n                    symbol = mut.add(\n                        dwg.polyline(\n                            [\n                                (rnd(w), rnd(h - 2 * w)),\n                                (0, rnd(h)),\n                                (rnd(-w), rnd(h - 2 * w)),\n                            ],\n                            class_=\"sym\",\n                        )\n                    )\n                    if m.id in self.mutation_titles:\n                        symbol.set_desc(title=self.mutation_titles[m.id])\n                    site.add(mut)\n\n    def draw_y_axis(\n        self,\n        ticks,  # A dict of pos->label\n        upper=None,  # In plot coords\n        lower=None,  # In plot coords\n        tick_length_outer=default_tick_length,  # Positive means towards the outside\n        gridlines=None,\n        side=\"left\",  # 'left' or 'right', where the axis is drawn\n    ):\n        if not self.y_axis and not self.y_label:\n            return\n        if upper is None:\n            upper = self.plotbox.top\n        if lower is None:\n            lower = self.plotbox.bottom\n        dwg = self.drawing\n        if side == \"left\":\n            x = rnd(self.y_axis_offset)\n            width = self.plotbox.right - x\n            direction = -1\n            text_anchor = \"end\"\n            pos = (0, (upper + lower) / 2)\n            transform = \"translate(11) rotate(-90)\"\n        else:\n            x = rnd(self.plotbox.max_x - self.y_axis_offset)\n            width = x - self.plotbox.left\n            direction = 1\n            text_anchor = \"start\"\n            pos = (self.plotbox.max_x, (upper + lower) / 2)\n            transform = \"translate(-11) rotate(90)\"\n        axes = self.get_axes()\n        y_axis = axes.add(dwg.g(class_=\"y-axis\"))\n        if self.y_label:\n            self.add_text_in_group(\n                self.y_label,\n                y_axis,\n                pos=pos,\n                group_class=\"title\",\n                class_=\"lab\",\n                text_anchor=\"middle\",\n                transform=transform,\n            )\n        if self.y_axis:\n            y_axis.add(dwg.line((x, rnd(lower)), (x, rnd(upper)), class_=\"ax-line\"))\n            ticks_group = y_axis.add(dwg.g(class_=\"ticks\"))\n            tick_outside_axis = {}\n            for y, label in ticks.items():\n                y_pos = self.timescaling.transform(y)\n                if y_pos > lower or y_pos < upper:  # nb lower > upper in SVG coords\n                    tick_outside_axis[y] = label\n                tick = ticks_group.add(\n                    dwg.g(class_=\"tick\", transform=f\"translate({x} {rnd(y_pos)})\")\n                )\n                if gridlines:\n                    tick.add(dwg.line((0, 0), (rnd(width), 0), class_=\"grid\"))\n                tick.add(dwg.line((0, 0), (rnd(direction * tick_length_outer), 0)))\n                self.add_text_in_group(\n                    # place the origin at the left of the tickmark plus a single px space\n                    label,\n                    tick,\n                    pos=(rnd(direction * (tick_length_outer + 1)), 0),\n                    class_=\"lab\",\n                    text_anchor=text_anchor,\n                )\n            if len(tick_outside_axis) > 0:\n                logging.warning(\n                    f\"Ticks {tick_outside_axis} lie outside the plotted axis\"\n                )\n\n    def shade_background(\n        self,\n        breaks,\n        tick_length_lower,\n        tree_width=None,\n        bottom_padding=None,\n    ):\n        if not self.x_axis:\n            return\n        if tree_width is None:\n            tree_width = self.plotbox.width\n        if bottom_padding is None:\n            bottom_padding = self.plotbox.pad_bottom\n        plot_breaks = self.x_transform(np.array(breaks))\n        dwg = self.drawing\n\n        # For tree sequences, we need to add on the background shaded regions\n        self.root_groups[\"background\"] = self.dwg_base.add(dwg.g(class_=\"background\"))\n        y = self.image_size[1] - self.x_axis_offset - self.plotbox.top\n        for i in range(1, len(breaks)):\n            break_x = plot_breaks[i]\n            prev_break_x = plot_breaks[i - 1]\n            tree_x = i * tree_width + self.plotbox.left\n            prev_tree_x = (i - 1) * tree_width + self.plotbox.left\n            # Shift diagonal lines between tree & axis into the treebox a little\n            diag_height = y - (self.image_size[1] - bottom_padding) + self.plotbox.top\n            self.root_groups[\"background\"].add(\n                # NB: the path below draws straight diagonal lines between the tree boxes\n                # and the X axis. An alternative implementation using bezier curves could\n                # substitute the following for lines 2 and 4 of the path spec string\n                # \"l0,{box_h:g} c0,{diag_h} {rdiag_x},0 {rdiag_x},{diag_h} \"\n                # \"c0,-{diag_h} {ldiag_x},0 {ldiag_x},-{diag_h} l0,-{box_h:g}z\"\n                dwg.path(\n                    \"M{start_x:g},{top:g} l{box_w:g},0 \"  # Top left to top right of tree\n                    \"l0,{box_h:g} l{rdiag_x:g},{diag_h:g} \"  # Down to axis\n                    \"l0,{tick_h:g} l{ax_x:g},0 l0,-{tick_h:g} \"  # Between axis ticks\n                    \"l{ldiag_x:g},-{diag_h:g} l0,-{box_h:g}z\".format(  # Up from axis\n                        top=rnd(self.plotbox.top),\n                        start_x=rnd(prev_tree_x),\n                        box_w=rnd(tree_x - prev_tree_x),\n                        box_h=rnd(y - diag_height),\n                        rdiag_x=rnd(break_x - tree_x),\n                        diag_h=rnd(diag_height),\n                        tick_h=rnd(tick_length_lower),\n                        ax_x=rnd(prev_break_x - break_x),\n                        ldiag_x=rnd(rnd(prev_tree_x) - rnd(prev_break_x)),\n                    )\n                )\n            )\n\n    def x_transform(self, x):\n        raise NotImplementedError(\n            \"No transform func defined for genome pos -> plot coords\"\n        )\n\n\nclass SvgTreeSequence(SvgAxisPlot):\n    \"\"\"\n    A class to draw a tree sequence in SVG format.\n\n    See :meth:`TreeSequence.draw_svg` for a description of usage and parameters.\n    \"\"\"\n\n    def __init__(\n        self,\n        ts,\n        size,\n        x_scale,\n        time_scale,\n        node_labels,\n        mutation_labels,\n        root_svg_attributes,\n        style,\n        order,\n        force_root_branch,\n        symbol_size,\n        x_axis,\n        y_axis,\n        x_label,\n        y_label,\n        y_ticks,\n        x_regions=None,\n        y_gridlines=None,\n        x_lim=None,\n        max_time=None,\n        min_time=None,\n        node_attrs=None,\n        mutation_attrs=None,\n        edge_attrs=None,\n        node_label_attrs=None,\n        mutation_label_attrs=None,\n        node_titles=None,\n        mutation_titles=None,\n        tree_height_scale=None,\n        max_tree_height=None,\n        max_num_trees=None,\n        title=None,\n        preamble=None,\n        **kwargs,\n    ):\n        if max_time is None and max_tree_height is not None:\n            max_time = max_tree_height\n            # Deprecated in 0.3.6\n            warnings.warn(\n                \"max_tree_height is deprecated; use max_time instead\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        if time_scale is None and tree_height_scale is not None:\n            time_scale = tree_height_scale\n            # Deprecated in 0.3.6\n            warnings.warn(\n                \"tree_height_scale is deprecated; use time_scale instead\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        x_lim = check_x_lim(x_lim, max_x=ts.sequence_length)\n        ts, self.tree_status, offsets = clip_ts(ts, x_lim[0], x_lim[1], max_num_trees)\n\n        use_tree = self.tree_status & OMIT == 0\n        use_skipped = np.append(np.diff(self.tree_status & OMIT_MIDDLE == 0) == 1, 0)\n        num_plotboxes = np.sum(np.logical_or(use_tree, use_skipped))\n        if size is None:\n            size = (self.default_width * int(num_plotboxes), self.default_height)\n        if max_time is None:\n            max_time = \"ts\"\n        if min_time is None:\n            min_time = \"ts\"\n        # X axis shown by default\n        if x_axis is None:\n            x_axis = True\n        super().__init__(\n            ts,\n            size,\n            root_svg_attributes,\n            style,\n            svg_class=\"tree-sequence\",\n            time_scale=time_scale,\n            x_axis=x_axis,\n            y_axis=y_axis,\n            x_label=x_label,\n            y_label=y_label,\n            offsets=offsets,\n            mutation_titles=mutation_titles,\n            preamble=preamble,\n            **kwargs,\n        )\n        x_scale = check_x_scale(x_scale)\n        order = check_order(order)\n        if node_labels is None:\n            node_labels = {u: str(u) for u in range(ts.num_nodes)}\n        if force_root_branch is None:\n            force_root_branch = any(\n                any(tree.parent(mut.node) == NULL for mut in tree.mutations())\n                for tree, use in zip(ts.trees(), use_tree)\n                if use\n            )\n\n        # TODO add general padding arguments following matplotlib's terminology.\n        self.set_spacing(\n            top=0 if title is None else self.line_height, left=20, bottom=10, right=20\n        )\n        subplot_size = (self.plotbox.width / num_plotboxes, self.plotbox.height)\n        subplots = []\n        for tree, use, summary in zip(ts.trees(), use_tree, use_skipped):\n            if use:\n                subplots.append(\n                    SvgTree(\n                        tree,\n                        size=subplot_size,\n                        time_scale=time_scale,\n                        node_labels=node_labels,\n                        mutation_labels=mutation_labels,\n                        node_titles=node_titles,\n                        mutation_titles=mutation_titles,\n                        order=order,\n                        force_root_branch=force_root_branch,\n                        symbol_size=symbol_size,\n                        max_time=max_time,\n                        min_time=min_time,\n                        node_attrs=node_attrs,\n                        mutation_attrs=mutation_attrs,\n                        edge_attrs=edge_attrs,\n                        node_label_attrs=node_label_attrs,\n                        mutation_label_attrs=mutation_label_attrs,\n                        offsets=offsets,\n                        # Do not plot axes on these subplots\n                        **kwargs,  # pass though e.g. debug boxes\n                    )\n                )\n                last_used_index = tree.index\n            elif summary:\n                subplots.append(\n                    SvgSkippedPlot(\n                        size=subplot_size, num_skipped=tree.index - last_used_index\n                    )\n                )\n        y_offset = self.plotbox.top\n        if title is not None:\n            self.add_text_in_group(\n                title,\n                self.drawing,\n                pos=(self.plotbox.max_x / 2, 0),\n                dominant_baseline=\"hanging\",\n                group_class=\"title\",\n                text_anchor=\"middle\",\n            )\n        self.tree_plotbox = subplots[0].plotbox\n        tree_is_used, breaks, skipbreaks = self.find_used_trees()\n        self.draw_x_axis(\n            x_scale,\n            tree_is_used,\n            breaks,\n            skipbreaks,\n            tick_length_lower=self.default_tick_length,  # TODO - parameterize\n            tick_length_upper=self.default_tick_length_site,  # TODO - parameterize\n            x_regions=x_regions,\n        )\n        y_low = self.tree_plotbox.bottom + y_offset\n        if self.y_axis:\n            tscales = {s.timescaling for s in subplots if s.timescaling}\n            if len(tscales) > 1:\n                raise ValueError(\n                    \"Can't draw a tree sequence Y axis if trees vary in timescale\"\n                )\n            # The timescaling of all subplots is used for outer box, but we\n            # need to shift it by the top padding to account for e.g. titles\n            self.timescaling = tscales.pop().with_offset(y_offset)\n            y_low = self.timescaling.transform(self.timescaling.min_time)\n            if y_ticks is None:\n                used_nodes = edge_and_sample_nodes(ts, breaks[skipbreaks])\n                y_ticks = np.unique(ts.nodes_time[used_nodes])\n                if self.time_scale == \"rank\":\n                    # Ticks labelled by time not rank\n                    y_ticks = dict(enumerate(y_ticks))\n\n        self.draw_y_axis(\n            ticks=check_y_ticks(y_ticks),\n            upper=self.tree_plotbox.top + y_offset,\n            lower=y_low,\n            tick_length_outer=self.default_tick_length,\n            gridlines=y_gridlines,\n            side=\"right\" if self.y_axis == \"right\" else \"left\",\n        )\n\n        subplot_x = self.plotbox.left\n        container = self.get_plotbox()  # Top-level TS plotbox contains all trees\n        container[\"class\"] = container[\"class\"] + \" trees\"\n        for subplot in subplots:\n            svg_subplot = container.add(\n                self.drawing.g(\n                    class_=subplot.svg_class,\n                    transform=f\"translate({rnd(subplot_x)} {y_offset})\",\n                )\n            )\n            for svg_items in subplot.root_groups.values():\n                svg_subplot.add(svg_items)\n            subplot_x += subplot.image_size[0]\n\n    def find_used_trees(self):\n        \"\"\"\n        Return a boolean array of which trees are actually plotted,\n        a list of which breakpoints are used to transition between plotted trees,\n        and a 2 x n array (often n=0) of indexes into these breakpoints delimiting\n        the regions that should be plotted as \"skipped\"\n        \"\"\"\n        tree_is_used = (self.tree_status & OMIT) != OMIT\n        break_used_as_tree_left = np.append(tree_is_used, False)\n        break_used_as_tree_right = np.insert(tree_is_used, 0, False)\n        break_used = np.logical_or(break_used_as_tree_left, break_used_as_tree_right)\n        all_breaks = self.ts.breakpoints(True)\n        used_breaks = all_breaks[break_used]\n        mark_skip_transitions = np.concatenate(\n            ([False], np.diff(self.tree_status & OMIT_MIDDLE) != 0, [False])\n        )\n        skipregion_indexes = np.where(mark_skip_transitions[break_used])[0]\n        assert len(skipregion_indexes) % 2 == 0  # all skipped regions have start, end\n        return tree_is_used, used_breaks, skipregion_indexes.reshape((-1, 2))\n\n    def draw_x_axis(\n        self,\n        x_scale,\n        tree_is_used,\n        breaks,\n        skipbreaks,\n        x_regions,\n        tick_length_lower=SvgAxisPlot.default_tick_length,\n        tick_length_upper=SvgAxisPlot.default_tick_length_site,\n    ):\n        \"\"\"\n        Add extra functionality to the original draw_x_axis method in SvgAxisPlot,\n        to account for the background shading that is displayed in a tree sequence\n        and in case trees are omitted from the middle of the tree sequence\n        \"\"\"\n        if not self.x_axis and not self.x_label:\n            return\n        if x_scale == \"physical\":\n            # In a tree sequence plot, the x_transform is used for the ticks, background\n            # shading positions, and sites along the x-axis. Each tree will have its own\n            # separate x_transform function for node positions within the tree.\n\n            # For a plot with a break on the x-axis (representing \"skipped\" trees), the\n            # x_transform is a piecewise function. We need to identify the breakpoints\n            # where the x-scale transitions from the standard scale to the scale(s) used\n            # within a skipped region\n\n            skipregion_plot_width = self.tree_plotbox.width\n            skipregion_span = np.diff(breaks[skipbreaks]).T[0]\n            std_scale = (\n                self.plotbox.width - skipregion_plot_width * len(skipregion_span)\n            ) / (breaks[-1] - breaks[0] - np.sum(skipregion_span))\n            skipregion_pos = breaks[skipbreaks].flatten()\n            genome_pos = np.concatenate(([breaks[0]], skipregion_pos, [breaks[-1]]))\n            plot_step = np.full(len(genome_pos) - 1, skipregion_plot_width)\n            plot_step[::2] = std_scale * np.diff(genome_pos)[::2]\n            plot_pos = np.cumsum(np.insert(plot_step, 0, self.plotbox.left))\n            # Convert to slope + intercept form\n            slope = np.diff(plot_pos) / np.diff(genome_pos)\n            intercept = plot_pos[1:] - slope * genome_pos[1:]\n            self.x_transform = lambda y: (\n                y * slope[np.searchsorted(skipregion_pos, y)]\n                + intercept[np.searchsorted(skipregion_pos, y)]\n            )\n            tick_positions = breaks\n            site_muts = {\n                s.id: s.mutations\n                for tree, use in zip(self.ts.trees(), tree_is_used)\n                for s in tree.sites()\n                if use\n            }\n\n            self.shade_background(\n                breaks,\n                tick_length_lower,\n                self.tree_plotbox.max_x,\n                self.plotbox.pad_bottom + self.tree_plotbox.pad_bottom,\n            )\n        else:\n            # For a treewise plot, the only time the x_transform is used is to apply\n            # to tick positions, so simply use positions 0..num_used_breaks for the\n            # positions, and a simple transform\n            self.x_transform = lambda x: (\n                self.plotbox.left + x / (len(breaks) - 1) * self.plotbox.width\n            )\n            tick_positions = np.arange(len(breaks))\n\n            site_muts = None  # It doesn't make sense to plot sites for \"treewise\" plots\n            tick_length_upper = None  # No sites plotted, so use the default upper tick\n            if x_regions is not None and len(x_regions) > 0:\n                raise ValueError(\"x_regions are not supported for treewise plots\")\n\n            # NB: no background shading needed if x_scale is \"treewise\"\n\n            skipregion_pos = skipbreaks.flatten()\n\n        first_tick = 1 if np.any(self.tree_status[tree_is_used] & LEFT_CLIP) else 0\n        last_tick = -1 if np.any(self.tree_status[tree_is_used] & RIGHT_CLIP) else None\n\n        super().draw_x_axis(\n            tick_positions=tick_positions[first_tick:last_tick],\n            tick_labels=breaks[first_tick:last_tick],\n            tick_length_lower=tick_length_lower,\n            tick_length_upper=tick_length_upper,\n            site_muts=site_muts,\n            alternate_dash_positions=skipregion_pos,\n            x_regions=x_regions,\n        )\n\n\nclass SvgTree(SvgAxisPlot):\n    \"\"\"\n    A class to draw a tree in SVG format.\n\n    See :meth:`Tree.draw_svg` for a description of usage and frequently used parameters.\n    \"\"\"\n\n    PolytomyLine = collections.namedtuple(\n        \"PolytomyLine\", \"num_branches, num_samples, line_pos\"\n    )\n    margin_left = 20\n    margin_right = 20\n    margin_top = 10  # oldest point is line_height below or 2*line_height if title given\n    margin_bottom = 15  # youngest plot points are line_height above this bottom margin\n\n    def __init__(\n        self,\n        tree,\n        size=None,\n        max_time=None,\n        min_time=None,\n        max_tree_height=None,\n        node_labels=None,\n        mutation_labels=None,\n        node_titles=None,\n        mutation_titles=None,\n        root_svg_attributes=None,\n        style=None,\n        order=None,\n        force_root_branch=None,\n        symbol_size=None,\n        x_axis=None,\n        y_axis=None,\n        x_label=None,\n        y_label=None,\n        title=None,\n        x_regions=None,\n        y_ticks=None,\n        y_gridlines=None,\n        all_edge_mutations=None,\n        time_scale=None,\n        tree_height_scale=None,\n        node_attrs=None,\n        mutation_attrs=None,\n        edge_attrs=None,\n        node_label_attrs=None,\n        mutation_label_attrs=None,\n        offsets=None,\n        omit_sites=None,\n        pack_untracked_polytomies=None,\n        preamble=None,\n        **kwargs,\n    ):\n        if max_time is None and max_tree_height is not None:\n            max_time = max_tree_height\n            # Deprecated in 0.3.6\n            warnings.warn(\n                \"max_tree_height is deprecated; use max_time instead\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        if time_scale is None and tree_height_scale is not None:\n            time_scale = tree_height_scale\n            # Deprecated in 0.3.6\n            warnings.warn(\n                \"tree_height_scale is deprecated; use time_scale instead\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        if size is None:\n            size = (self.default_width, self.default_height)\n        if symbol_size is None:\n            symbol_size = 6\n        self.symbol_size = symbol_size\n        self.pack_untracked_polytomies = pack_untracked_polytomies\n        ts = tree.tree_sequence\n        tree_index = tree.index\n        if offsets is not None:\n            tree_index += offsets.tree\n        super().__init__(\n            ts,\n            size,\n            root_svg_attributes,\n            style,\n            svg_class=f\"tree t{tree_index}\",\n            time_scale=time_scale,\n            x_axis=x_axis,\n            y_axis=y_axis,\n            x_label=x_label,\n            y_label=y_label,\n            offsets=offsets,\n            omit_sites=omit_sites,\n            preamble=preamble,\n            **kwargs,\n        )\n        self.tree = tree\n        if order is None or isinstance(order, str):\n            # Can't use the Tree.postorder array as we need minlex\n            self.postorder_nodes = list(tree.nodes(order=check_order(order)))\n        else:\n            # Currently undocumented feature: we can pass a (postorder) list\n            # of nodes to plot, which allows us to draw a subset of nodes, or\n            # stop traversing certain subtrees\n            self.postorder_nodes = order\n\n        # Create some instance variables for later use in plotting\n        self.node_mutations = collections.defaultdict(list)\n        self.edge_attrs = {}\n        self.node_attrs = {}\n        self.node_label_attrs = {}\n        self.mutation_attrs = {}\n        self.mutation_label_attrs = {}\n        self.node_titles = {} if node_titles is None else node_titles\n        self.mutation_titles = {} if mutation_titles is None else mutation_titles\n        self.mutations_over_roots = False\n        # mutations collected per node\n        nodes = set(tree.nodes())\n        unplotted = []\n        if not omit_sites:\n            for site in tree.sites():\n                for mutation in site.mutations:\n                    if mutation.node in nodes:\n                        self.node_mutations[mutation.node].append(mutation)\n                        if tree.parent(mutation.node) == NULL:\n                            self.mutations_over_roots = True\n                    else:\n                        unplotted.append(mutation.id + self.offsets.mutation)\n        if len(unplotted) > 0:\n            warnings.warn(\n                f\"Mutations {unplotted} are above nodes which are not present in the \"\n                \"displayed tree, so are not plotted on the topology.\",\n                UserWarning,\n                stacklevel=2,\n            )\n        self.left_extent = tree.interval.left\n        self.right_extent = tree.interval.right\n        if not omit_sites and all_edge_mutations:\n            tree_left = tree.interval.left\n            tree_right = tree.interval.right\n            edge_left = ts.tables.edges.left\n            edge_right = ts.tables.edges.right\n            node_edges = tree.edge_array\n            # whittle mutations down so we only need look at those above the tree nodes\n            mut_t = ts.tables.mutations\n            focal_mutations = np.isin(mut_t.node, np.fromiter(nodes, mut_t.node.dtype))\n            mutation_nodes = mut_t.node[focal_mutations]\n            mutation_positions = ts.tables.sites.position[mut_t.site][focal_mutations]\n            mutation_ids = np.arange(ts.num_mutations, dtype=int)[focal_mutations]\n            for m_id, node, pos in zip(mutation_ids, mutation_nodes, mutation_positions):\n                curr_edge = node_edges[node]\n                if curr_edge >= 0:\n                    if (\n                        edge_left[curr_edge] <= pos < tree_left\n                    ):  # Mutation on this edge but to left of plotted tree\n                        self.node_mutations[node].append(ts.mutation(m_id))\n                        self.mutations_outside_tree.add(m_id)\n                        self.left_extent = min(self.left_extent, pos)\n                    elif (\n                        tree_right <= pos < edge_right[curr_edge]\n                    ):  # Mutation on this edge but to right of plotted tree\n                        self.node_mutations[node].append(ts.mutation(m_id))\n                        self.mutations_outside_tree.add(m_id)\n                        self.right_extent = max(self.right_extent, pos)\n            if self.right_extent != tree.interval.right:\n                # Use nextafter so extent of plotting incorporates the mutation\n                self.right_extent = np.nextafter(\n                    self.right_extent, self.right_extent + 1\n                )\n        # attributes for symbols\n        half_symbol_size = f\"{rnd(symbol_size / 2):g}\"\n        symbol_size = f\"{rnd(symbol_size):g}\"\n        for u in tree.nodes():\n            self.edge_attrs[u] = {}\n            if edge_attrs is not None and u in edge_attrs:\n                self.edge_attrs[u].update(edge_attrs[u])\n            if tree.is_sample(u):\n                # a square: set bespoke svgwrite params\n                self.node_attrs[u] = {\n                    \"size\": (symbol_size,) * 2,\n                    \"insert\": (\"-\" + half_symbol_size,) * 2,\n                }\n            else:\n                # a circle: set bespoke svgwrite param `centre` and default radius\n                self.node_attrs[u] = {\"center\": (0, 0), \"r\": half_symbol_size}\n            if node_attrs is not None and u in node_attrs:\n                self.node_attrs[u].update(node_attrs[u])\n            add_class(self.node_attrs[u], \"sym\")  # class 'sym' for symbol\n            label = \"\"\n            if node_labels is None:\n                label = str(u)\n            elif u in node_labels:\n                label = str(node_labels[u])\n            self.node_label_attrs[u] = {\"text\": label}\n            add_class(self.node_label_attrs[u], \"lab\")  # class 'lab' for label\n            if node_label_attrs is not None and u in node_label_attrs:\n                self.node_label_attrs[u].update(node_label_attrs[u])\n        for _, mutations in self.node_mutations.items():\n            for mutation in mutations:\n                m = mutation.id + self.offsets.mutation\n                # We need to offset the mutation symbol so that it's centred\n                self.mutation_attrs[m] = {\n                    \"d\": \"M -{0},-{0} l {1},{1} M -{0},{0} l {1},-{1}\".format(\n                        half_symbol_size, symbol_size\n                    )\n                }\n                if mutation_attrs is not None and m in mutation_attrs:\n                    self.mutation_attrs[m].update(mutation_attrs[m])\n                add_class(self.mutation_attrs[m], \"sym\")  # class 'sym' for symbol\n                label = \"\"\n                if mutation_labels is None:\n                    label = str(m)\n                elif m in mutation_labels:\n                    label = str(mutation_labels[m])\n                self.mutation_label_attrs[m] = {\"text\": label}\n                if mutation_label_attrs is not None and m in mutation_label_attrs:\n                    self.mutation_label_attrs[m].update(mutation_label_attrs[m])\n                add_class(self.mutation_label_attrs[m], \"lab\")\n\n        self.set_spacing(\n            top=self.margin_top + (0 if title is None else self.line_height),\n            left=self.margin_left,\n            bottom=self.margin_bottom,\n            right=self.margin_right,\n        )\n        if title is not None:\n            self.add_text_in_group(\n                title,\n                self.drawing,\n                pos=(self.plotbox.max_x / 2, 0),\n                dominant_baseline=\"hanging\",\n                group_class=\"title\",\n                text_anchor=\"middle\",\n            )\n\n        self.assign_x_coordinates()\n        self.assign_y_coordinates(max_time, min_time, force_root_branch)\n        tick_length_lower = self.default_tick_length  # TODO - parameterize\n        tick_length_upper = self.default_tick_length_site  # TODO - parameterize\n        if all_edge_mutations:\n            self.shade_background(tree.interval, tick_length_lower)\n\n        first_site, last_site = np.searchsorted(\n            self.ts.tables.sites.position, [self.left_extent, self.right_extent]\n        )\n        site_muts = {site_id: [] for site_id in range(first_site, last_site)}\n        # Only use mutations plotted on the tree (not necessarily all at the site)\n        for muts in self.node_mutations.values():\n            for mut in muts:\n                site_muts[mut.site].append(mut)\n\n        self.draw_x_axis(\n            tick_positions=np.array(tree.interval),\n            tick_length_lower=tick_length_lower,\n            tick_length_upper=tick_length_upper,\n            site_muts=site_muts,\n            x_regions=x_regions,\n        )\n        if y_ticks is None:\n            y_ticks = {h: ts.node(u).time for u, h in sorted(self.node_height.items())}\n\n        self.draw_y_axis(\n            ticks=check_y_ticks(y_ticks),\n            lower=self.timescaling.transform(self.timescaling.min_time),\n            tick_length_outer=self.default_tick_length,\n            gridlines=y_gridlines,\n            side=\"right\" if y_axis == \"right\" else \"left\",\n        )\n        self.draw_tree()\n\n    def process_mutations_over_node(self, u, low_bound, high_bound, ignore_times=False):\n        \"\"\"\n        Sort the self.node_mutations array for a given node ``u`` in reverse time order.\n        The main complication is with UNKNOWN_TIME values: we replace these with times\n        spaced between the low & high bounds (this is always done if ignore_times=True).\n        We do not currently allow a mix of known & unknown mutation times in a tree\n        sequence, which makes the logic easy. If we were to allow it, more complex\n        logic can be neatly encapsulated in this method.\n        \"\"\"\n        mutations = self.node_mutations[u]\n        time_unknown = [util.is_unknown_time(m.time) for m in mutations]\n        if all(time_unknown) or ignore_times is True:\n            # sort by site then within site by parent: will end up with oldest first\n            mutations.sort(key=operator.attrgetter(\"site\", \"parent\"))\n            diff = high_bound - low_bound\n            for i in range(len(mutations)):\n                mutations[i].time = high_bound - diff * (i + 1) / (len(mutations) + 1)\n        else:\n            assert not any(time_unknown)\n            mutations.sort(key=operator.attrgetter(\"time\"), reverse=True)\n\n    def assign_y_coordinates(\n        self,\n        max_time,\n        min_time,\n        force_root_branch,\n        bottom_space=SvgAxisPlot.line_height,\n        top_space=SvgAxisPlot.line_height,\n    ):\n        \"\"\"\n        Create a self.node_height dict, a self.timescaling instance and\n        self.min_root_branch_plot_length for use in plotting. Allow extra space within\n        the plotbox, at the bottom for leaf labels, and  (potentially, if no root\n        branches are plotted) above the topmost root node for root labels.\n        \"\"\"\n        max_time = check_max_time(max_time, self.time_scale != \"rank\")\n        min_time = check_min_time(min_time, self.time_scale != \"rank\")\n        node_time = self.ts.nodes_time\n        mut_time = self.ts.mutations_time\n        root_branch_len = 0\n        if self.time_scale == \"rank\":\n            t = np.zeros_like(node_time)\n            if max_time == \"tree\":\n                # We only rank the times within the tree in this case.\n                for u in self.node_x_coord.keys():\n                    t[u] = node_time[u]\n            else:\n                # only rank the nodes that are actually referenced in the edge table\n                # (non-referenced nodes could occur if the user specifies x_lim values)\n                # However, we do include nodes in trees that have been skipped\n                use_time = edge_and_sample_nodes(self.ts)\n                t[use_time] = node_time[use_time]\n            node_time = t\n            times = np.unique(node_time[node_time <= self.ts.max_root_time])\n            max_node_height = len(times)\n            depth = {t: j for j, t in enumerate(times)}\n            if self.mutations_over_roots or force_root_branch:\n                root_branch_len = 1  # Will get scaled later\n            max_time = max(depth.values()) + root_branch_len\n            if min_time in (None, \"tree\", \"ts\"):\n                assert min(depth.values()) == 0\n                min_time = 0\n            # In pathological cases, all the nodes are at the same time\n            if max_time == min_time:\n                max_time = min_time + 1\n            self.node_height = {u: depth[node_time[u]] for u in self.node_x_coord.keys()}\n            for u in self.node_mutations.keys():\n                if u in self.node_height:\n                    parent = self.tree.parent(u)\n                    if parent == NULL:\n                        top = self.node_height[u] + root_branch_len\n                    else:\n                        top = depth[node_time[parent]]\n                    self.process_mutations_over_node(\n                        u, self.node_height[u], top, ignore_times=True\n                    )\n        else:\n            assert self.time_scale in [\"time\", \"log_time\"]\n            self.node_height = {u: node_time[u] for u in self.node_x_coord.keys()}\n            if max_time == \"tree\":\n                max_node_height = max(self.node_height.values())\n                max_mut_height = np.nanmax(\n                    [0] + [mut.time for m in self.node_mutations.values() for mut in m]\n                )\n                max_time = max(max_node_height, max_mut_height)  # Reuse variable\n            elif max_time == \"ts\":\n                max_node_height = self.ts.max_root_time\n                max_mut_height = np.nanmax(np.append(mut_time, 0))\n                max_time = max(max_node_height, max_mut_height)  # Reuse variable\n            else:\n                max_node_height = max_time\n            if min_time == \"tree\":\n                min_time = min(self.node_height.values())\n                # don't need to check mutation times, as they must be above a node\n            elif min_time == \"ts\":\n                min_time = np.min(self.ts.nodes_time[edge_and_sample_nodes(self.ts)])\n            # In pathological cases, all the nodes are at the same time\n            if min_time == max_time:\n                max_time = min_time + 1\n            if self.mutations_over_roots or force_root_branch:\n                # Define a minimum root branch length, after transformation if necessary\n                if self.time_scale != \"log_time\":\n                    root_branch_len = (max_time - min_time) * self.root_branch_fraction\n                else:\n                    max_plot_y = np.log(max_time + 1)\n                    diff_plot_y = max_plot_y - np.log(min_time + 1)\n                    root_plot_y = max_plot_y + diff_plot_y * self.root_branch_fraction\n                    root_branch_len = np.exp(root_plot_y) - 1 - max_time\n                # If necessary, allow for this extra branch in max_time\n                if max_node_height + root_branch_len > max_time:\n                    max_time = max_node_height + root_branch_len\n            for u in self.node_mutations.keys():\n                if u in self.node_height:\n                    parent = self.tree.parent(u)\n                    if parent == NULL:\n                        # This is a root: if muts have no times we specify an upper time\n                        top = self.node_height[u] + root_branch_len\n                    else:\n                        top = node_time[parent]\n                    self.process_mutations_over_node(u, self.node_height[u], top)\n\n        assert float(max_time) == max_time\n        assert float(min_time) == min_time\n        # Add extra space above the top and below the bottom of the tree to keep the\n        # node labels within the plotbox (but top label space not needed if the\n        # existence of a root branch pushes the whole tree + labels downwards anyway)\n        top_space = 0 if root_branch_len > 0 else top_space\n        self.timescaling = Timescaling(\n            max_time=max_time,\n            min_time=min_time,\n            plot_min=self.plotbox.height + self.plotbox.top - bottom_space,\n            plot_range=self.plotbox.height - top_space - bottom_space,\n            use_log_transform=(self.time_scale == \"log_time\"),\n        )\n\n        # Calculate default root branch length to use (in plot coords). This is a\n        # minimum, as branches with deep root mutations could be longer\n        self.min_root_branch_plot_length = self.timescaling.transform(\n            self.timescaling.max_time\n        ) - self.timescaling.transform(self.timescaling.max_time + root_branch_len)\n\n    def assign_x_coordinates(self):\n        # Set up transformation for genome positions\n        self.x_transform = lambda x: (\n            (x - self.left_extent)\n            / (self.right_extent - self.left_extent)\n            * self.plotbox.width\n            + self.plotbox.left\n        )\n        # Set up x positions for nodes\n        node_xpos = {}\n        untracked_children = collections.defaultdict(list)\n        self.extra_line = {}  # To store a dotted line to represent polytomies\n        leaf_x = 0  # First leaf starts at x=1, to give some space between Y axis & leaf\n        tree = self.tree\n        prev = tree.virtual_root\n        for u in self.postorder_nodes:\n            parent = tree.parent(u)\n            omit = self.pack_untracked_polytomies and tree.num_tracked_samples(u) == 0\n            if parent == prev:\n                raise ValueError(\"Nodes must be passed in postorder to Tree.draw_svg()\")\n            is_tip = tree.parent(prev) != u\n            if is_tip:\n                if not omit:\n                    leaf_x += 1\n                    node_xpos[u] = leaf_x\n            elif not omit:\n                # Untracked children are available for packing into a polytomy summary\n                untracked_children = []\n                if self.pack_untracked_polytomies:\n                    untracked_children += [\n                        c for c in tree.children(u) if tree.num_tracked_samples(c) == 0\n                    ]\n                child_x = [node_xpos[c] for c in tree.children(u) if c in node_xpos]\n                if len(untracked_children) > 0:\n                    if len(untracked_children) <= 1:\n                        # If only a single non-focal lineage, treat it as a condensed tip\n                        for child in untracked_children:\n                            leaf_x += 1\n                            node_xpos[child] = leaf_x\n                            child_x.append(leaf_x)\n                    else:\n                        # Otherwise show a horizontal line with the number of lineages\n                        # Extra length of line is equal to log of the polytomy size\n                        self.extra_line[u] = self.PolytomyLine(\n                            len(untracked_children),\n                            sum(tree.num_samples(v) for v in untracked_children),\n                            [leaf_x, leaf_x + 1 + np.log(len(untracked_children))],\n                        )\n                        child_x.append(leaf_x + 1)\n                        leaf_x = self.extra_line[u].line_pos[1]\n                assert len(child_x) != 0  # Must have prev hit somethng defined as a tip\n                if len(child_x) == 1:\n                    node_xpos[u] = child_x[0]\n                else:\n                    a = min(child_x)\n                    b = max(child_x)\n                    node_xpos[u] = a + (b - a) / 2\n            prev = u\n        # Now rescale to the plot width: leaf_x is the maximum value of the last leaf\n        if len(node_xpos) > 0:\n            scale = self.plotbox.width / leaf_x\n            lft = self.plotbox.left - scale / 2\n        self.node_x_coord = {k: lft + v * scale for k, v in node_xpos.items()}\n        for v in self.extra_line.values():\n            for i in range(len(v.line_pos)):\n                v.line_pos[i] = lft + v.line_pos[i] * scale\n\n    def info_classes(self, focal_node_id):\n        \"\"\"\n        For a focal node id, return a set of classes that encode this useful information:\n            \"a<X>\" or \"root\": where <X> == id of immediate ancestor (parent) node\n            \"i<I>\":           where <I> == individual id\n            \"p<P>\":           where <P> == population id\n            \"n<Y>\":           where <Y> == focal node id\n            \"m<A>\":           where <A> == mutation id\n            \"s<B>\":           where <B> == site id of all mutations\n            \"c<N>\" or \"leaf\": where <N> == number of direct children of this node\n        \"\"\"\n        # Add a new group for each node, and give it classes for css targetting\n        focal_node = self.ts.node(focal_node_id)\n        classes = set()\n        classes.add(f\"node n{focal_node_id}\")\n        if focal_node.individual != NULL:\n            classes.add(f\"i{focal_node.individual}\")\n        if focal_node.population != NULL:\n            classes.add(f\"p{focal_node.population}\")\n        v = self.tree.parent(focal_node_id)\n        if v == NULL:\n            classes.add(\"root\")\n        else:\n            classes.add(f\"a{v}\")\n        if self.tree.is_sample(focal_node_id):\n            classes.add(\"sample\")\n        if self.tree.is_leaf(focal_node_id):\n            classes.add(\"leaf\")\n        else:\n            classes.add(f\"c{self.tree.num_children(focal_node_id)}\")\n        for mutation in self.node_mutations[focal_node_id]:\n            # Adding mutations and sites above this node allows identification\n            # of the tree under any specific mutation\n            classes.add(f\"m{mutation.id + self.offsets.mutation}\")\n            classes.add(f\"s{mutation.site + self.offsets.site}\")\n        return sorted(classes)\n\n    def text_transform(self, position, dy=0):\n        line_h = self.text_height\n        sym_sz = self.symbol_size\n        transforms = {\n            \"below\": f\"translate(0 {rnd(line_h - sym_sz / 2 + dy)})\",\n            \"above\": f\"translate(0 {rnd(-(line_h - sym_sz / 2) + dy)})\",\n            \"above_left\": f\"translate({rnd(-sym_sz / 2)} {rnd(-line_h / 2 + dy)})\",\n            \"above_right\": f\"translate({rnd(sym_sz / 2)} {-rnd(line_h / 2 + dy)})\",\n            \"left\": f\"translate({-rnd(2 + sym_sz / 2)} {rnd(dy)})\",\n            \"right\": f\"translate({rnd(2 + sym_sz / 2)} {rnd(dy)})\",\n        }\n        return transforms[position]\n\n    def draw_tree(self):\n        # Note: the displayed tree may not be the same as self.tree, e.g. if the nodes\n        # have been collapsed, or a subtree is being displayed. The node_x_coord\n        # dictionary keys gives the nodes of the displayed tree, in postorder.\n        NodeDrawInfo = collections.namedtuple(\"NodeDrawInfo\", [\"pos\", \"is_tip\"])\n        dwg = self.drawing\n        tree = self.tree\n        left_child = get_left_child(tree, self.postorder_nodes)\n        parent_array = tree.parent_array\n        edge_array = tree.edge_array\n\n        node_info = {}\n        roots = []  # Roots of the displated tree\n        prev = tree.virtual_root\n        for u, x in self.node_x_coord.items():  # Node ids `u` returned in postorder\n            node_info[u] = NodeDrawInfo(\n                pos=np.array([x, self.timescaling.transform(self.node_height[u])]),\n                # Detect if this is a \"tip\" in the displayed tree, even if\n                # it is not a leaf in the original tree, by looking at the prev parent\n                is_tip=(parent_array[prev] != u),\n            )\n            prev = u\n            if parent_array[u] not in self.node_x_coord:\n                roots.append(u)\n        # Iterate over displayed nodes, adding groups to reflect the tree hierarchy\n        stack = []\n        for u in roots:\n            x, y = node_info[u].pos\n            grp = dwg.g(\n                class_=\" \".join(self.info_classes(u)),\n                transform=f\"translate({rnd(x)} {rnd(y)})\",\n            )\n            stack.append((u, self.get_plotbox().add(grp)))\n\n        # Preorder traversal, so we can create nested groups\n        while len(stack) > 0:\n            u, curr_svg_group = stack.pop()\n            pu, is_tip = node_info[u]\n            for focal in tree.children(u):\n                if focal not in node_info:\n                    continue\n                fx, fy = node_info[focal].pos - pu\n                new_svg_group = curr_svg_group.add(\n                    dwg.g(\n                        class_=\" \".join(self.info_classes(focal)),\n                        transform=f\"translate({rnd(fx)} {rnd(fy)})\",\n                    )\n                )\n                stack.append((focal, new_svg_group))\n\n            o = (0, 0)\n            v = parent_array[u]\n\n            # Add polytomy line if necessary\n            if u in self.extra_line:\n                info = self.extra_line[u]\n                x2 = info.line_pos[1] - pu[0]\n                poly = dwg.g(class_=\"polytomy\")\n                poly.add(\n                    dwg.line(\n                        start=(0, 0),\n                        end=(x2, 0),\n                    )\n                )\n                label = dwg.text(\n                    f\"+{info.num_samples}/{bold_integer(info.num_branches)}\",\n                    font_style=\"italic\",\n                    x=[rnd(x2)],\n                    dy=[rnd(-self.text_height / 10)],  # make the plus sign line up\n                    text_anchor=\"end\",\n                )\n                label.set_desc(\n                    title=(\n                        f\"This polytomy has {info.num_branches} additional branches, \"\n                        f\"leading to a total of {info.num_samples} descendant samples\"\n                    )\n                )\n                poly.add(label)\n                curr_svg_group.add(poly)\n\n            # Add edge above node first => on layer underneath anything else\n            draw_edge_above_node = False\n            try:\n                dx, dy = node_info[v].pos - pu\n                draw_edge_above_node = True\n            except KeyError:\n                # Must be a root\n                root_branch_l = self.min_root_branch_plot_length\n                if root_branch_l > 0:\n                    if len(self.node_mutations[u]) > 0:\n                        mtop = self.timescaling.transform(self.node_mutations[u][0].time)\n                        root_branch_l = max(root_branch_l, pu[1] - mtop)\n                    dx, dy = 0, -root_branch_l\n                    draw_edge_above_node = True\n            if draw_edge_above_node:\n                edge_id_class = (\n                    \"root\" if edge_array[u] == tskit.NULL else f\"e{edge_array[u]}\"\n                )\n                add_class(self.edge_attrs[u], f\"edge {edge_id_class}\")\n                path = dwg.path(\n                    [(\"M\", o), (\"V\", rnd(dy)), (\"H\", rnd(dx))], **self.edge_attrs[u]\n                )\n                curr_svg_group.add(path)\n\n            # Add mutation symbols + labels\n            for mutation in self.node_mutations[u]:\n                # TODO get rid of these manual positioning tweaks and add them\n                # as offsets the user can access via a transform or something.\n                dy = self.timescaling.transform(mutation.time) - pu[1]\n                mutation_id = mutation.id + self.offsets.mutation\n                mutation_class = (\n                    f\"mut m{mutation_id} s{mutation.site + self.offsets.site}\"\n                )\n                # Use the real mutation ID here, since we are referencing into the ts\n                if util.is_unknown_time(self.ts.mutation(mutation.id).time):\n                    mutation_class += \" unknown_time\"\n                if mutation_id in self.mutations_outside_tree:\n                    mutation_class += \" extra\"\n                mut_group = curr_svg_group.add(\n                    dwg.g(class_=mutation_class, transform=f\"translate(0 {rnd(dy)})\")\n                )\n                # A line from the mutation to the node below, normally hidden, but\n                # revealable if we want to flag the path below a mutation\n                mut_group.add(dwg.line(end=(0, -rnd(dy))))\n                # Symbols\n                symbol = mut_group.add(dwg.path(**self.mutation_attrs[mutation_id]))\n                if mutation_id in self.mutation_titles:\n                    symbol.set_desc(title=self.mutation_titles[mutation_id])\n                # Labels\n                if u == left_child[parent_array[u]]:\n                    mut_label_class = \"lft\"\n                    transform = self.text_transform(\"left\")\n                else:\n                    mut_label_class = \"rgt\"\n                    transform = self.text_transform(\"right\")\n                add_class(self.mutation_label_attrs[mutation_id], mut_label_class)\n                self.mutation_label_attrs[mutation_id][\"transform\"] = transform\n                mut_group.add(dwg.text(**self.mutation_label_attrs[mutation_id]))\n\n            # Add node symbol + label (visually above the edge subtending this node)\n            # -> symbols\n            if tree.is_sample(u):\n                symbol = curr_svg_group.add(dwg.rect(**self.node_attrs[u]))\n            else:\n                symbol = curr_svg_group.add(dwg.circle(**self.node_attrs[u]))\n            multi_samples = None\n            if is_tip and tree.num_samples(u) > 1:  # Multi-sample tip => trapezium shape\n                multi_samples = tree.num_samples(u)\n                trapezium_attrs = self.node_attrs[u].copy()\n                # Remove the shape-styling attributes\n                for unwanted_attr in (\"size\", \"insert\", \"center\", \"r\"):\n                    trapezium_attrs.pop(unwanted_attr, None)\n                trapezium_attrs[\"points\"] = [  # add a trapezium shape below the symbol\n                    (self.symbol_size / 2, 0),\n                    (self.symbol_size, self.symbol_size),\n                    (-self.symbol_size, self.symbol_size),\n                    (-self.symbol_size / 2, 0),\n                ]\n                add_class(trapezium_attrs, \"multi\")\n                curr_svg_group.add(dwg.polygon(**trapezium_attrs))\n            if u in self.node_titles:\n                symbol.set_desc(title=self.node_titles[u])\n            # -> labels\n            node_lab_attr = self.node_label_attrs[u]\n            if is_tip and multi_samples is None:\n                node_lab_attr[\"transform\"] = self.text_transform(\"below\")\n            elif u in roots and self.min_root_branch_plot_length == 0:\n                node_lab_attr[\"transform\"] = self.text_transform(\"above\")\n            else:\n                if multi_samples is not None:\n                    label = dwg.text(\n                        text=f\"+{multi_samples}\",\n                        transform=self.text_transform(\"below\", dy=1),\n                        font_style=\"italic\",\n                        class_=\"lab summary\",\n                    )\n                    title = (\n                        f\"A collapsed {'sample' if tree.is_sample(u) else 'non-sample'} \"\n                        f\"node with {multi_samples} descendant samples in this tree\"\n                    )\n                    label.set_desc(title=title)\n                    curr_svg_group.add(label)\n                if u == left_child[tree.parent(u)]:\n                    add_class(node_lab_attr, \"lft\")\n                    node_lab_attr[\"transform\"] = self.text_transform(\"above_left\")\n                else:\n                    add_class(node_lab_attr, \"rgt\")\n                    node_lab_attr[\"transform\"] = self.text_transform(\"above_right\")\n            curr_svg_group.add(dwg.text(**node_lab_attr))\n\n\nclass TextTreeSequence:\n    \"\"\"\n    Draw a tree sequence as horizontal line of trees.\n    \"\"\"\n\n    def __init__(\n        self,\n        ts,\n        node_labels=None,\n        use_ascii=False,\n        time_label_format=None,\n        position_label_format=None,\n        order=None,\n    ):\n        self.ts = ts\n\n        time_label_format = \"{:.2f}\" if time_label_format is None else time_label_format\n        tick_labels = ts.breakpoints(as_array=True)\n        if position_label_format is None:\n            position_scale_labels = create_tick_labels(tick_labels)\n        else:\n            position_scale_labels = [\n                position_label_format.format(x) for x in tick_labels\n            ]\n\n        time = ts.tables.nodes.time\n        time_scale_labels = [\n            time_label_format.format(time[u]) for u in range(ts.num_nodes)\n        ]\n\n        trees = [\n            VerticalTextTree(\n                tree,\n                max_time=\"ts\",\n                node_labels=node_labels,\n                use_ascii=use_ascii,\n                order=order,\n            )\n            for tree in self.ts.trees()\n        ]\n\n        self.height = 1 + max(tree.height for tree in trees)\n        self.width = sum(tree.width + 2 for tree in trees) - 1\n        max_time_scale_label_len = max(map(len, time_scale_labels))\n        self.width += 3 + max_time_scale_label_len + len(position_scale_labels[-1]) // 2\n\n        self.canvas = np.zeros((self.height, self.width), dtype=str)\n        self.canvas[:] = \" \"\n\n        vertical_sep = \"|\" if use_ascii else \"┊\"\n        x = 0\n        time_position = trees[0].time_position\n        for u, label in enumerate(map(to_np_unicode, time_scale_labels)):\n            y = time_position[u]\n            self.canvas[y, 0 : label.shape[0]] = label\n        self.canvas[:, max_time_scale_label_len] = vertical_sep\n        x = 2 + max_time_scale_label_len\n\n        for j, tree in enumerate(trees):\n            pos_label = to_np_unicode(position_scale_labels[j])\n            k = len(pos_label)\n            label_x = max(x - k // 2 - 2, 0)\n            self.canvas[-1, label_x : label_x + k] = pos_label\n            h, w = tree.canvas.shape\n            self.canvas[-h - 1 : -1, x : x + w - 1] = tree.canvas[:, :-1]\n            x += w\n            self.canvas[:, x] = vertical_sep\n            x += 2\n\n        pos_label = to_np_unicode(position_scale_labels[-1])\n        k = len(pos_label)\n        label_x = max(x - k // 2 - 2, 0)\n        self.canvas[-1, label_x : label_x + k] = pos_label\n        self.canvas[:, -1] = \"\\n\"\n\n    def __str__(self):\n        return \"\".join(self.canvas.reshape(self.width * self.height))\n\n\ndef to_np_unicode(string):\n    \"\"\"\n    Converts the specified string to a numpy unicode array.\n    \"\"\"\n    # TODO: what's the clean of doing this with numpy?\n    # It really wants to create a zero-d Un array here\n    # which breaks the assignment below and we end up\n    # with n copies of the first char.\n    n = len(string)\n    np_string = np.zeros(n, dtype=\"U\")\n    for j in range(n):\n        np_string[j] = string[j]\n    return np_string\n\n\ndef get_left_neighbour(tree, traversal_order):\n    \"\"\"\n    Returns the left-most neighbour of each node in the tree according to the\n    specified traversal order. The left neighbour is the closest node in terms\n    of path distance to the left of a given node.\n    \"\"\"\n    # The traversal order will define the order of children and roots.\n    # Root order is defined by this traversal, and the roots are\n    # the children of -1\n    children = collections.defaultdict(list)\n    for u in tree.nodes(order=traversal_order):\n        children[tree.parent(u)].append(u)\n\n    left_neighbour = np.full(tree.tree_sequence.num_nodes + 1, NULL, dtype=int)\n\n    def find_neighbours(u, neighbour):\n        left_neighbour[u] = neighbour\n        for v in children[u]:\n            find_neighbours(v, neighbour)\n            neighbour = v\n\n    # The children of -1 are the roots and the neighbour of all left-most\n    # nodes in the tree is also -1 (NULL)\n    find_neighbours(-1, -1)\n\n    return left_neighbour[:-1]\n\n\ndef get_left_child(tree, postorder_nodes):\n    \"\"\"\n    Returns the left-most child of each node in the tree according to the\n    traversal order listed in postorder_nodes. If a node has no children or\n    NULL is passed in, return NULL.\n    \"\"\"\n    left_child = np.full(tree.tree_sequence.num_nodes + 1, NULL, dtype=int)\n    for u in postorder_nodes:\n        parent = tree.parent(u)\n        if parent != NULL and left_child[parent] == NULL:\n            left_child[parent] = u\n    return left_child\n\n\ndef node_time_depth(tree, min_branch_length=None, max_time=\"tree\"):\n    \"\"\"\n    Returns a dictionary mapping nodes in the specified tree to their depth\n    in the specified tree (from the root direction). If min_branch_len is\n    provided, it specifies the minimum length of each branch. If not specified,\n    default to 1.\n    \"\"\"\n    if min_branch_length is None:\n        min_branch_length = {u: 1 for u in range(tree.tree_sequence.num_nodes)}\n    time_node_map = collections.defaultdict(list)\n    current_depth = 0\n    depth = {}\n    # TODO this is basically the same code for the two cases. Refactor so that\n    # we use the same code.\n    if max_time == \"tree\":\n        for u in tree.nodes():\n            time_node_map[tree.time(u)].append(u)\n        for t in sorted(time_node_map.keys()):\n            for u in time_node_map[t]:\n                for v in tree.children(u):\n                    current_depth = max(current_depth, depth[v] + min_branch_length[v])\n            for u in time_node_map[t]:\n                depth[u] = current_depth\n            current_depth += 2\n        for root in tree.roots:\n            current_depth = max(current_depth, depth[root] + min_branch_length[root])\n    else:\n        assert max_time == \"ts\"\n        ts = tree.tree_sequence\n        for node in ts.nodes():\n            time_node_map[node.time].append(node.id)\n        node_edges = collections.defaultdict(list)\n        for edge in ts.edges():\n            node_edges[edge.parent].append(edge)\n\n        for t in sorted(time_node_map.keys()):\n            for u in time_node_map[t]:\n                for edge in node_edges[u]:\n                    v = edge.child\n                    current_depth = max(current_depth, depth[v] + min_branch_length[v])\n            for u in time_node_map[t]:\n                depth[u] = current_depth\n            current_depth += 2\n\n    return depth, current_depth\n\n\nclass TextTree:\n    \"\"\"\n    Draws a reprentation of a tree using unicode drawing characters written\n    to a 2D array.\n    \"\"\"\n\n    def __init__(\n        self,\n        tree,\n        node_labels=None,\n        max_time=None,\n        min_time=None,\n        use_ascii=False,\n        orientation=None,\n        order=None,\n    ):\n        self.tree = tree\n        self.traversal_order = check_order(order)\n        self.max_time = check_max_time(max_time, allow_numeric=False)\n        self.min_time = check_min_time(min_time, allow_numeric=False)\n        self.use_ascii = use_ascii\n        self.orientation = check_orientation(orientation)\n        self.horizontal_line_char = \"━\"\n        self.vertical_line_char = \"┃\"\n        if use_ascii:\n            self.horizontal_line_char = \"-\"\n            self.vertical_line_char = \"|\"\n        # These are set below by the placement algorithms.\n        self.width = None\n        self.height = None\n        self.canvas = None\n        # Placement of nodes in the 2D space. Nodes are positioned in one\n        # dimension based on traversal ordering and by their time in the\n        # other dimension. These are mapped to x and y coordinates according\n        # to the orientation.\n        self.traversal_position = {}  # Position of nodes in traversal space\n        self.time_position = {}\n        # Labels for nodes\n        self.node_labels = {}\n\n        # Set the node labels\n        for u in tree.nodes():\n            if node_labels is None:\n                # If we don't specify node_labels, default to node ID\n                self.node_labels[u] = str(u)\n            else:\n                # If we do specify node_labels, default to an empty line\n                self.node_labels[u] = self.default_node_label\n        if node_labels is not None:\n            for node, label in node_labels.items():\n                self.node_labels[node] = label\n\n        self._assign_time_positions()\n        self._assign_traversal_positions()\n        self.canvas = np.zeros((self.height, self.width), dtype=str)\n        self.canvas[:] = \" \"\n        self._draw()\n        self.canvas[:, -1] = \"\\n\"\n\n    def __str__(self):\n        return \"\".join(self.canvas.reshape(self.width * self.height))\n\n\nclass VerticalTextTree(TextTree):\n    \"\"\"\n    Text tree rendering where root nodes are at the top and time goes downwards\n    into the present.\n    \"\"\"\n\n    @property\n    def default_node_label(self):\n        return self.vertical_line_char\n\n    def _assign_time_positions(self):\n        tree = self.tree\n        # TODO when we add mutations to the text tree we'll need to take it into\n        # account here. Presumably we need to get the maximum number of mutations\n        # per branch.\n        self.time_position, total_depth = node_time_depth(tree, max_time=self.max_time)\n        self.height = total_depth - 1\n\n    def _assign_traversal_positions(self):\n        self.label_x = {}\n        left_neighbour = get_left_neighbour(self.tree, self.traversal_order)\n        x = 0\n        for u in self.tree.nodes(order=self.traversal_order):\n            label_size = len(self.node_labels[u])\n            if self.tree.is_leaf(u):\n                self.traversal_position[u] = x + label_size // 2\n                self.label_x[u] = x\n                x += label_size + 1\n            else:\n                coords = [self.traversal_position[c] for c in self.tree.children(u)]\n                if len(coords) == 1:\n                    self.traversal_position[u] = coords[0]\n                else:\n                    a = min(coords)\n                    b = max(coords)\n                    child_mid = int(round(a + (b - a) / 2))\n                    self.traversal_position[u] = child_mid\n                self.label_x[u] = self.traversal_position[u] - label_size // 2\n                neighbour_x = -1\n                neighbour = left_neighbour[u]\n                if neighbour != NULL:\n                    neighbour_x = self.traversal_position[neighbour]\n                self.label_x[u] = max(neighbour_x + 1, self.label_x[u])\n                x = max(x, self.label_x[u] + label_size + 1)\n            assert self.label_x[u] >= 0\n        self.width = x\n\n    def _draw(self):\n        if self.use_ascii:\n            left_child = \"+\"\n            right_child = \"+\"\n            mid_parent = \"+\"\n            mid_parent_child = \"+\"\n            mid_child = \"+\"\n        elif self.orientation == TOP:\n            left_child = \"┏\"\n            right_child = \"┓\"\n            mid_parent = \"┻\"\n            mid_parent_child = \"╋\"\n            mid_child = \"┳\"\n        else:\n            left_child = \"┗\"\n            right_child = \"┛\"\n            mid_parent = \"┳\"\n            mid_parent_child = \"╋\"\n            mid_child = \"┻\"\n\n        for u in self.tree.nodes():\n            xu = self.traversal_position[u]\n            yu = self.time_position[u]\n            label = to_np_unicode(self.node_labels[u])\n            label_len = label.shape[0]\n            label_x = self.label_x[u]\n            assert label_x >= 0\n            self.canvas[yu, label_x : label_x + label_len] = label\n            children = self.tree.children(u)\n            if len(children) > 0:\n                if len(children) == 1:\n                    yv = self.time_position[children[0]]\n                    self.canvas[yv:yu, xu] = self.vertical_line_char\n                else:\n                    left = min(self.traversal_position[v] for v in children)\n                    right = max(self.traversal_position[v] for v in children)\n                    y = yu - 1\n                    self.canvas[y, left + 1 : right] = self.horizontal_line_char\n                    self.canvas[y, xu] = mid_parent\n                    for v in children:\n                        xv = self.traversal_position[v]\n                        yv = self.time_position[v]\n                        self.canvas[yv:yu, xv] = self.vertical_line_char\n                        mid_char = mid_parent_child if xv == xu else mid_child\n                        self.canvas[y, xv] = mid_char\n                    self.canvas[y, left] = left_child\n                    self.canvas[y, right] = right_child\n        if self.orientation == TOP:\n            self.canvas = np.flip(self.canvas, axis=0)\n            # Reverse the time positions so that we can use them in the tree\n            # sequence drawing as well.\n            flipped_time_position = {\n                u: self.height - y - 1 for u, y in self.time_position.items()\n            }\n            self.time_position = flipped_time_position\n\n\nclass HorizontalTextTree(TextTree):\n    \"\"\"\n    Text tree rendering where root nodes are at the left and time goes\n    rightwards into the present.\n    \"\"\"\n\n    @property\n    def default_node_label(self):\n        return self.horizontal_line_char\n\n    def _assign_time_positions(self):\n        # TODO when we add mutations to the text tree we'll need to take it into\n        # account here. Presumably we need to get the maximum number of mutations\n        # per branch.\n        self.time_position, total_depth = node_time_depth(\n            self.tree, {u: 1 + len(self.node_labels[u]) for u in self.tree.nodes()}\n        )\n        self.width = total_depth\n\n    def _assign_traversal_positions(self):\n        y = 0\n        for root in self.tree.roots:\n            for u in self.tree.nodes(root, order=self.traversal_order):\n                if self.tree.is_leaf(u):\n                    self.traversal_position[u] = y\n                    y += 2\n                else:\n                    coords = [self.traversal_position[c] for c in self.tree.children(u)]\n                    if len(coords) == 1:\n                        self.traversal_position[u] = coords[0]\n                    else:\n                        a = min(coords)\n                        b = max(coords)\n                        child_mid = int(round(a + (b - a) / 2))\n                        self.traversal_position[u] = child_mid\n            y += 1\n        self.height = y - 2\n\n    def _draw(self):\n        if self.use_ascii:\n            top_across = \"+\"\n            bot_across = \"+\"\n            mid_parent = \"+\"\n            mid_parent_child = \"+\"\n            mid_child = \"+\"\n        elif self.orientation == LEFT:\n            top_across = \"┏\"\n            bot_across = \"┗\"\n            mid_parent = \"┫\"\n            mid_parent_child = \"╋\"\n            mid_child = \"┣\"\n        else:\n            top_across = \"┓\"\n            bot_across = \"┛\"\n            mid_parent = \"┣\"\n            mid_parent_child = \"╋\"\n            mid_child = \"┫\"\n\n        # Draw in root-right mode as the coordinates go in the expected direction.\n        for u in self.tree.nodes():\n            yu = self.traversal_position[u]\n            xu = self.time_position[u]\n            label = to_np_unicode(self.node_labels[u])\n            if self.orientation == LEFT:\n                # We flip the array at the end so need to reverse the label.\n                label = label[::-1]\n            label_len = label.shape[0]\n            self.canvas[yu, xu : xu + label_len] = label\n            children = self.tree.children(u)\n            if len(children) > 0:\n                if len(children) == 1:\n                    xv = self.time_position[children[0]]\n                    self.canvas[yu, xv:xu] = self.horizontal_line_char\n                else:\n                    bot = min(self.traversal_position[v] for v in children)\n                    top = max(self.traversal_position[v] for v in children)\n                    x = xu - 1\n                    self.canvas[bot + 1 : top, x] = self.vertical_line_char\n                    self.canvas[yu, x] = mid_parent\n                    for v in children:\n                        yv = self.traversal_position[v]\n                        xv = self.time_position[v]\n                        self.canvas[yv, xv:x] = self.horizontal_line_char\n                        mid_char = mid_parent_child if yv == yu else mid_child\n                        self.canvas[yv, x] = mid_char\n                    self.canvas[bot, x] = top_across\n                    self.canvas[top, x] = bot_across\n        if self.orientation == LEFT:\n            self.canvas = np.flip(self.canvas, axis=1)\n            # Move the padding to the left.\n            self.canvas[:, :-1] = self.canvas[:, 1:]\n            self.canvas[:, -1] = \" \"\n"
  },
  {
    "path": "python/tskit/exceptions.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2021 Tskit Developers\n# Copyright (c) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nExceptions defined in tskit.\n\"\"\"\n\nfrom _tskit import FileFormatError  # noqa F401\nfrom _tskit import IdentityPairsNotStoredError  # noqa F401\nfrom _tskit import IdentitySegmentsNotStoredError  # noqa F401\nfrom _tskit import LibraryError  # noqa F401\nfrom _tskit import TskitException\nfrom _tskit import VersionTooNewError  # noqa F401\nfrom _tskit import VersionTooOldError  # noqa F401\n\n\nclass DuplicatePositionsError(TskitException):\n    \"\"\"\n    Duplicate positions in the list of sites.\n    \"\"\"\n\n\nclass ProvenanceValidationError(TskitException):\n    \"\"\"\n    A JSON document did not validate against the provenance schema.\n    \"\"\"\n\n\nclass MetadataValidationError(TskitException):\n    \"\"\"\n    A metadata object did not validate against the provenance schema.\n    \"\"\"\n\n\nclass MetadataSchemaValidationError(TskitException):\n    \"\"\"\n    A metadata schema object did not validate against the metaschema.\n    \"\"\"\n\n\nclass MetadataEncodingError(TskitException):\n    \"\"\"\n    A metadata object was of a type that could not be encoded\n    \"\"\"\n\n\nclass ImmutableTableError(ValueError):\n    \"\"\"\n    Raised when attempting to modify an immutable table view.\n\n    Use TreeSequence.dump_tables() to get a mutable copy.\n    \"\"\"\n"
  },
  {
    "path": "python/tskit/genotypes.py",
    "content": "#\n# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (c) 2015-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\nfrom __future__ import annotations\n\nimport collections\nimport logging\nimport typing\n\nimport numpy as np\n\nimport _tskit\nimport tskit\nimport tskit.trees as trees\nimport tskit.util as util\n\n\nclass Variant:\n    \"\"\"\n    A variant in a tree sequence, describing the observed genetic variation\n    among the specified nodes (by default, the sample nodes) for a given site.\n    A variant consists of (a) a tuple of **alleles** listing the potential\n    allelic states which the requested nodes at this site can possess; (b) an\n    array of **genotypes** mapping node IDs to the observed alleles; (c) a\n    reference to the :class:`Site` at which the Variant has been decoded; and\n    (d) an array of **samples** giving the node ID to which each element of the\n    genotypes array corresponds.\n\n    After creation a Variant is not yet decoded, and has no genotypes.\n    To decode a Variant, call the :meth:`decode` method. The Variant class will then\n    use a Tree, internal to the Variant, to seek to the position of the site and\n    decode the genotypes at that site. It is therefore much more efficient to visit\n    sites in sequential genomic order, either in a forwards or backwards direction,\n    than to do so randomly.\n\n    Each element in the ``alleles`` tuple is a string, representing an\n    observed allelic state that may be seen at this site. The ``alleles`` tuple,\n    which is guaranteed not to contain any duplicates, is generated in one of two\n    ways. The first (and default) way is for ``tskit`` to generate the encoding on\n    the fly while generating genotypes. In this case, the first element of this\n    tuple is guaranteed to be the same as the site's ``ancestral_state`` value.\n    Note that allelic values may be listed that are not referred to by any\n    samples. For example, if we have a site that is fixed for the derived state\n    (i.e., we have a mutation over the tree root), all genotypes will be 1, but\n    the alleles list will be equal to ``('0', '1')``. Other than the\n    ancestral state being the first allele, the alleles are listed in\n    no particular order, and the ordering should not be relied upon\n    (but see the notes on missing data below).\n\n    The second way is for the user to define the mapping between\n    genotype values and allelic state strings using the\n    ``alleles`` parameter to the :meth:`TreeSequence.variants` method.\n    In this case, there is no indication of which allele is the ancestral state,\n    as the ordering is determined by the user.\n\n    The ``genotypes`` represent the observed allelic states for each requested\n    node, such that ``var.alleles[var.genotypes[j]]`` gives the string allele\n    for the node at index ``j`` (i.e., for ``variant.samples[j]``). Thus, the\n    elements of the genotypes array are\n    indexes into the ``alleles`` list. The genotypes are provided in this\n    way via a numpy numeric array to enable efficient calculations. To obtain a\n    (less efficient) array of allele strings for each node, you can use e.g.\n    ``np.asarray(variant.alleles)[variant.genotypes]``.\n\n    When :ref:`missing data<sec_data_model_missing_data>` is present at a given\n    site, the property ``has_missing_data`` will be True, at least one element\n    of the ``genotypes`` array will be equal to ``tskit.MISSING_DATA``, and the\n    last element of the ``alleles`` array will be ``None``. Note that in this\n    case ``variant.num_alleles`` will **not** be equal to\n    ``len(variant.alleles)``. The rationale for adding ``None`` to the end of\n    the ``alleles`` list is to help code that does not handle missing data\n    correctly fail early rather than introducing subtle and hard-to-find bugs.\n    As ``tskit.MISSING_DATA`` is equal to -1, code that decodes genotypes into\n    allelic values without taking missing data into account would otherwise\n    incorrectly output the last allele in the list.\n\n    :param TreeSequence tree_sequence: The tree sequence to which this variant\n        belongs.\n    :param array_like samples: An array of node IDs for which to generate\n        genotypes, or ``None`` for all sample nodes. Non-sample nodes may also\n        be provided to generate genotypes for internal nodes. Default: ``None``.\n    :param bool isolated_as_missing: If True, the genotype value assigned to\n        isolated nodes without mutations (samples or non-samples) is\n        :data:`.MISSING_DATA` (-1). If False, such nodes will be\n        assigned the allele index for the ancestral state.\n        Default: True.\n    :param tuple alleles: A tuple of strings defining the encoding of\n        alleles as integer genotype values. At least one allele must be provided.\n        If duplicate alleles are provided, output genotypes will always be\n        encoded as the first occurrence of the allele. If None (the default),\n        the alleles are encoded as they are encountered during genotype\n        generation.\n\n    \"\"\"\n\n    def __init__(\n        self, tree_sequence, samples=None, isolated_as_missing=None, alleles=None\n    ):\n        if isolated_as_missing is None:\n            isolated_as_missing = True\n        self.tree_sequence = tree_sequence\n        if samples is not None:\n            samples = util.safe_np_int_cast(samples, np.int32)\n        self._ll_variant = _tskit.Variant(\n            tree_sequence._ll_tree_sequence,\n            samples=samples,\n            isolated_as_missing=isolated_as_missing,\n            alleles=alleles,\n        )\n\n    def _check_decoded(self):\n        if self._ll_variant.site_id == tskit.NULL:\n            raise ValueError(\n                \"This variant has not yet been decoded at a specific site, \"\n                \"call Variant.decode to set the site.\"\n            )\n\n    @property\n    def site(self) -> trees.Site:\n        \"\"\"\n        The Site object for the site at which this variant has been decoded.\n        \"\"\"\n        self._check_decoded()\n        return self.tree_sequence.site(self._ll_variant.site_id)\n\n    @property\n    def alleles(self) -> tuple[str | None, ...]:\n        \"\"\"\n        A tuple of the allelic values which nodes can possess at the current\n        site. Unless an encoding of alleles is specified when creating this\n        variant instance, the first element of this tuple is always the site's\n        ancestral state.\n        \"\"\"\n        return self._ll_variant.alleles\n\n    @property\n    def samples(self) -> np.ndarray:\n        \"\"\"\n        A numpy array of the node ids whose genotypes will be returned\n        by the :meth:`genotypes` method.\n        \"\"\"\n        return self._ll_variant.samples\n\n    @property\n    def genotypes(self) -> np.ndarray:\n        \"\"\"\n        An array of indexes into the list ``alleles``, giving the\n        state of each requested node at the current site.\n        \"\"\"\n        self._check_decoded()\n        return self._ll_variant.genotypes\n\n    @property\n    def isolated_as_missing(self) -> bool:\n        \"\"\"\n        True if isolated nodes are decoded to missing data. If False, isolated\n        nodes are decoded to the ancestral state.\n        \"\"\"\n        return self._ll_variant.isolated_as_missing\n\n    @property\n    def has_missing_data(self) -> bool:\n        \"\"\"\n        True if there is missing data for any of the\n        requested nodes at the current site.\n        \"\"\"\n        alleles = self._ll_variant.alleles\n        return len(alleles) > 0 and alleles[-1] is None\n\n    @property\n    def num_missing(self) -> int:\n        \"\"\"\n        The number of requested nodes with missing data at this site.\n        \"\"\"\n        return np.sum(self.genotypes == tskit.NULL)\n\n    @property\n    def num_alleles(self) -> int:\n        \"\"\"\n        The number of distinct alleles at this site. Note that this may\n        not be the same as the number of distinct values in the genotypes\n        array: firstly missing data is not counted as an allele, and secondly,\n        the site may contain mutations to alternative allele states (which are\n        counted in the number of alleles) without the mutation being inherited\n        by any of the requested nodes.\n        \"\"\"\n        return len(self.alleles) - self.has_missing_data\n\n    # Deprecated alias to avoid breaking existing code.\n    @property\n    def position(self) -> float:\n        return self.site.position\n\n    # Deprecated alias to avoid breaking existing code.\n    @property\n    def index(self) -> int:\n        return self._ll_variant.site_id\n\n    # We need a custom eq for the numpy array\n    def __eq__(self, other) -> bool:\n        return (\n            isinstance(other, Variant)\n            and self.tree_sequence == other.tree_sequence\n            and self._ll_variant.site_id == other._ll_variant.site_id\n            and self._ll_variant.site_id != tskit.NULL\n            and self._ll_variant.alleles == other._ll_variant.alleles\n            and np.array_equal(self._ll_variant.genotypes, other._ll_variant.genotypes)\n        )\n\n    def decode(self, site_id) -> None:\n        \"\"\"\n        Decode the variant at the given site, setting the site ID, genotypes and\n        alleles to those of the site and samples of this Variant.\n\n        :param int site_id: The ID of the site to decode. This must be a valid site ID.\n        \"\"\"\n        self._ll_variant.decode(site_id)\n\n    def copy(self) -> Variant:\n        \"\"\"\n        Create a copy of this Variant. Note that calling :meth:`decode` on the\n        copy will fail as it does not take a copy of the internal tree.\n\n        :return: The copy of this Variant.\n        \"\"\"\n        variant_copy = Variant.__new__(Variant)\n        variant_copy.tree_sequence = self.tree_sequence\n        variant_copy._ll_variant = self._ll_variant.restricted_copy()\n        return variant_copy\n\n    def states(self, missing_data_string=None) -> np.ndarray:\n        \"\"\"\n        Returns the allelic states at this site as an array of strings.\n\n        .. warning::\n            Using this method is inefficient compared to working with the\n            underlying integer representation of genotypes as returned by\n            the :attr:`~Variant.genotypes` property.\n\n        :param str missing_data_string: A string that will be used to represent missing\n            data. If any normal allele contains this character, an error is raised.\n            Default: `None`, treated as `'N'`.\n        :return: An numpy array of strings of length ``num_sites``.\n        \"\"\"\n        if missing_data_string is None:\n            missing_data_string = \"N\"\n        elif not isinstance(missing_data_string, str):\n            # Must explicitly test here, otherwise we output a numpy object array\n            raise ValueError(\"Missing data string is not a string\")\n        alleles = self.alleles\n        if alleles[-1] is None:\n            if missing_data_string in alleles:\n                raise ValueError(\n                    \"An existing allele is equal to the \"\n                    f\"missing data string '{missing_data_string}'\"\n                )\n            alleles = alleles[:-1] + (missing_data_string,)\n        return np.array(alleles)[self.genotypes]\n\n    def counts(self) -> typing.Counter[str | None]:\n        \"\"\"\n        Returns a :class:`python:collections.Counter` object providing counts for each\n        possible :attr:`allele <Variant.alleles>` at this site: i.e. the number of\n        samples possessing that allele among the set of samples specified when creating\n        this Variant (by default, this is all the sample nodes in the tree sequence).\n        Missing data is represented by an allelic state of ``None``.\n\n        :return: A counter of the number of samples associated with each allele.\n        \"\"\"\n        counts = collections.Counter()\n        if self.alleles[-1] is None:\n            # we have to treat the last element of the genotypes array as special\n            counts[None] = np.sum(self.genotypes == tskit.MISSING_DATA)\n            for i, allele in enumerate(self.alleles[:-1]):\n                counts[allele] = np.sum(self.genotypes == i)\n        else:\n            bincounts = np.bincount(self.genotypes, minlength=self.num_alleles)\n            for i, allele in enumerate(self.alleles):\n                counts[allele] = bincounts[i]\n        return counts\n\n    def frequencies(self, remove_missing=None) -> dict[str, float]:\n        \"\"\"\n        Return a dictionary mapping each possible :attr:`allele <Variant.alleles>`\n        at this site to the frequency of that allele: i.e. the number of samples\n        with that allele divided by the total number of samples, among the set of\n        samples specified when creating this Variant (by default, this is all the\n        sample nodes in the tree sequence). Note, therefore, that if a restricted set\n        of samples was specified on creation, the allele frequencies returned here\n        will *not* be the global allele frequencies in the whole tree sequence.\n\n        :param bool remove_missing: If True, only samples with non-missing data will\n            be counted in the total number of samples used to calculate the frequency,\n            and no information on the frequency of missing data is returned. Otherwise\n            (default), samples with missing data are included when calculating\n            frequencies.\n        :return: A dictionary mapping allelic states to the frequency of each allele\n            among the samples\n        \"\"\"\n        if remove_missing is None:\n            remove_missing = False\n        total = len(self.samples)\n        if remove_missing:\n            total -= self.num_missing\n        if total == 0:\n            logging.warning(\"No non-missing samples at this site, frequencies undefined\")\n        return {\n            allele: count / total if total > 0 else np.nan\n            for allele, count in self.counts().items()\n            if not (allele is None and remove_missing)\n        }\n\n    def __str__(self) -> str:\n        \"\"\"\n        Return a plain text summary of the contents of a variant.\n        \"\"\"\n        try:\n            site_id = util.format_number(self.site.id, sep=\",\")\n            site_position = util.format_number(self.site.position, sep=\",\")\n            counts = self.counts()\n            freqs = self.frequencies()\n            samples = util.format_number(len(self.samples), sep=\",\")\n            num_alleles = util.format_number(self.num_alleles, sep=\",\")\n            rows = (\n                [\n                    [\"Site id\", f\"{site_id}\"],\n                    [\"Site position\", f\"{site_position}\"],\n                    [\"Number of samples\", f\"{samples}\"],\n                    [\"Number of alleles\", f\"{num_alleles}\"],\n                ]\n                + [\n                    [\n                        f\"Samples with allele \"\n                        f\"\"\"{\"missing\" if k is None else \"'\" + k + \"'\"}\"\"\",\n                        f\"{util.format_number(counts[k], sep=',')} \"\n                        f\"({util.format_number(freqs[k] * 100, 2, sep=',')}%)\",\n                    ]\n                    for k in self.alleles\n                ]\n                + [\n                    [\"Has missing data\", str(self.has_missing_data)],\n                    [\"Isolated as missing\", str(bool(self.isolated_as_missing))],\n                ]\n            )\n        except ValueError as err:\n            rows = [[str(err), \"\"]]\n        return util.unicode_table(rows, title=\"Variant\")\n\n    def _repr_html_(self) -> str:\n        \"\"\"\n        Return an html summary of a variant. Called by Jupyter notebooks\n        to render a Variant.\n        \"\"\"\n        return util.variant_html(self)\n\n    def __repr__(self):\n        d = {\n            \"site\": self.site,\n            \"samples\": self.samples,\n            \"alleles\": self.alleles,\n            \"genotypes\": self.genotypes,\n            \"has_missing_data\": self.has_missing_data,\n            \"isolated_as_missing\": self.isolated_as_missing,\n        }\n        return f\"Variant({repr(d)})\"\n\n\n#\n# Miscellaneous auxiliary methods.\n#\ndef allele_remap(alleles_from, alleles_to):\n    # Returns an index map from the elements in one list (alleles_from)\n    # to the elements of another list (alleles_to).\n    #\n    # If some elements in alleles_from are not in alleles_to,\n    # then indices outside of alleles_to are used.\n    alleles_to = np.array(alleles_to, dtype=\"U\")\n    alleles_from = np.array(alleles_from, dtype=\"U\")\n    allele_map = np.empty_like(alleles_from, dtype=\"uint32\")\n    overflow = len(alleles_to)\n    for i, allele in enumerate(alleles_from):\n        try:\n            # Use the index of the first matching element.\n            allele_map[i] = np.where(alleles_to == allele)[0][0]\n        except IndexError:\n            allele_map[i] = overflow\n            overflow += 1\n    return allele_map\n"
  },
  {
    "path": "python/tskit/intervals.py",
    "content": "# MIT License\n#\n# Copyright (c) 2023-2025 Tskit Developers\n# Copyright (C) 2020-2021 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n#\n\"\"\"\nUtilities for working with intervals and interval maps.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport collections.abc\nimport numbers\n\nimport numpy as np\n\nimport tskit\nimport tskit.util as util\n\n\nclass RateMap(collections.abc.Mapping):\n    \"\"\"\n    A class mapping a non-negative rate value to a set of non-overlapping intervals\n    along the genome. Intervals for which the rate is unknown (i.e., missing data)\n    are encoded by NaN values in the ``rate`` array.\n\n    :param list position: A list of :math:`n+1` positions, starting at 0, and ending\n        in the sequence length over which the RateMap will apply.\n    :param list rate: A list of :math:`n` positive rates that apply between each\n        position. Intervals with missing data are encoded by NaN values.\n    \"\"\"\n\n    # The args are marked keyword only to give us some flexibility in how we\n    # create class this in the future.\n    def __init__(\n        self,\n        *,\n        position,\n        rate,\n    ):\n        # Making the arrays read-only guarantees rate and cumulative mass stay in sync\n        # We prevent the arrays themselves being overwritten by making self.position,\n        # etc properties.\n\n        # TODO we always coerce the position type to float here, but we may not\n        # want to do this. int32 is a perfectly good choice a lot of the time.\n        self._position = np.array(position, dtype=float)\n        self._position.flags.writeable = False\n        self._rate = np.array(rate, dtype=float)\n        self._rate.flags.writeable = False\n        size = len(self._position)\n        if size < 2:\n            raise ValueError(\"Must have at least two positions\")\n        if len(self._rate) != size - 1:\n            raise ValueError(\n                \"Rate array must have one less entry than the position array\"\n            )\n        if self._position[0] != 0:\n            raise ValueError(\"First position must be zero\")\n\n        span = self.span\n        if np.any(span <= 0):\n            bad_pos = np.where(span <= 0)[0] + 1\n            raise ValueError(\n                f\"Position values not strictly increasing at indexes {bad_pos}\"\n            )\n        if np.any(self._rate < 0):\n            bad_rates = np.where(self._rate < 0)[0]\n            raise ValueError(f\"Rate values negative at indexes {bad_rates}\")\n        self._missing = np.isnan(self.rate)\n        self._num_missing_intervals = np.sum(self._missing)\n        if self._num_missing_intervals == len(self.rate):\n            raise ValueError(\"All intervals are missing data\")\n        # We don't expose the cumulative mass array as a part of the array\n        # API is it's not quite as obvious how it lines up for each interval.\n        # It's really the sum of the mass up to but not including the current\n        # interval, which is a bit confusing. Probably best to just leave\n        # it as a function, so that people can sample at regular positions\n        # along the genome anyway, emphasising that it's a continuous function,\n        # not a step function like the other interval attributes.\n        self._cumulative_mass = np.insert(np.nancumsum(self.mass), 0, 0)\n        assert self._cumulative_mass[0] == 0\n        self._cumulative_mass.flags.writeable = False\n\n    @property\n    def left(self):\n        \"\"\"\n        The left position of each interval (inclusive).\n        \"\"\"\n        return self._position[:-1]\n\n    @property\n    def right(self):\n        \"\"\"\n        The right position of each interval (exclusive).\n        \"\"\"\n        return self._position[1:]\n\n    @property\n    def mid(self):\n        \"\"\"\n        Returns the midpoint of each interval.\n        \"\"\"\n        mid = self.left + self.span / 2\n        mid.flags.writeable = False\n        return mid\n\n    @property\n    def span(self):\n        \"\"\"\n        Returns the span (i.e., ``right - left``) of each of the intervals.\n        \"\"\"\n        span = self.right - self.left\n        span.flags.writeable = False\n        return span\n\n    @property\n    def position(self):\n        \"\"\"\n        The breakpoint positions between intervals. This is equal to the\n        :attr:`~.RateMap.left` array with the :attr:`sequence_length`\n        appended.\n        \"\"\"\n        return self._position\n\n    @property\n    def rate(self):\n        \"\"\"\n        The rate associated with each interval. Missing data is encoded\n        by NaN values.\n        \"\"\"\n        return self._rate\n\n    @property\n    def mass(self):\n        r\"\"\"\n        The \"mass\" of each interval, defined as the :attr:`~.RateMap.rate`\n        :math:`\\times` :attr:`~.RateMap.span`. This is NaN for intervals\n        containing missing data.\n        \"\"\"\n        return self._rate * self.span\n\n    @property\n    def missing(self):\n        \"\"\"\n        A boolean array encoding whether each interval contains missing data.\n        Equivalent to ``np.isnan(rate_map.rate)``\n        \"\"\"\n        return self._missing\n\n    @property\n    def non_missing(self):\n        \"\"\"\n        A boolean array encoding whether each interval contains non-missing data.\n        Equivalent to ``np.logical_not(np.isnan(rate_map.rate))``\n        \"\"\"\n        return ~self._missing\n\n    #\n    # Interval counts\n    #\n\n    @property\n    def num_intervals(self) -> int:\n        \"\"\"\n        The total number of intervals in this map. Equal to\n        :attr:`~.RateMap.num_missing_intervals` +\n        :attr:`~.RateMap.num_non_missing_intervals`.\n        \"\"\"\n        return len(self._rate)\n\n    @property\n    def num_missing_intervals(self) -> int:\n        \"\"\"\n        Returns the number of missing intervals, i.e., those in which the\n        :attr:`~.RateMap.rate` value is a NaN.\n        \"\"\"\n        return self._num_missing_intervals\n\n    @property\n    def num_non_missing_intervals(self) -> int:\n        \"\"\"\n        The number of non missing intervals, i.e., those in which the\n        :attr:`~.RateMap.rate` value is not a NaN.\n        \"\"\"\n        return self.num_intervals - self.num_missing_intervals\n\n    @property\n    def sequence_length(self):\n        \"\"\"\n        The sequence length covered by this map\n        \"\"\"\n        return self.position[-1]\n\n    @property\n    def total_mass(self):\n        \"\"\"\n        The cumulative total mass over the entire map.\n        \"\"\"\n        return self._cumulative_mass[-1]\n\n    @property\n    def mean_rate(self):\n        \"\"\"\n        The mean rate over this map weighted by the span covered by each rate.\n        Unknown intervals are excluded.\n        \"\"\"\n        total_span = np.sum(self.span[self.non_missing])\n        return self.total_mass / total_span\n\n    def get_rate(self, x):\n        \"\"\"\n        Return the rate at the specified list of positions.\n\n        .. note:: This function will return a NaN value for any positions\n            that contain missing data.\n\n        :param numpy.ndarray x: The positions for which to return values.\n        :return: An array of rates, the same length as ``x``.\n        :rtype: numpy.ndarray\n        \"\"\"\n        loc = np.searchsorted(self.position, x, side=\"right\") - 1\n        if np.any(loc < 0) or np.any(loc >= len(self.rate)):\n            raise ValueError(\"position out of bounds\")\n        return self.rate[loc]\n\n    def get_cumulative_mass(self, x):\n        \"\"\"\n        Return the cumulative mass of the map up to (but not including) a\n        given point for a list of positions along the map. This is equal to\n        the integral of the rate from 0 to the point.\n\n        :param numpy.ndarray x: The positions for which to return values.\n\n        :return: An array of cumulative mass values, the same length as ``x``\n        :rtype: numpy.ndarray\n        \"\"\"\n        x = np.array(x)\n        if np.any(x < 0) or np.any(x > self.sequence_length):\n            raise ValueError(f\"Cannot have positions < 0 or > {self.sequence_length}\")\n        return np.interp(x, self.position, self._cumulative_mass)\n\n    def find_index(self, x: float) -> int:\n        \"\"\"\n        Returns the index of the interval that the specified position falls within,\n        such that ``rate_map.left[index] <= x < self.rate_map.right[index]``.\n\n        :param float x: The position to search.\n        :return: The index of the interval containing this point.\n        :rtype: int\n        :raises KeyError: if the position is not contained in any of the intervals.\n        \"\"\"\n        if x < 0 or x >= self.sequence_length:\n            raise KeyError(f\"Position {x} out of bounds\")\n        index = np.searchsorted(self.position, x, side=\"left\")\n        if x < self.position[index]:\n            index -= 1\n        assert self.left[index] <= x < self.right[index]\n        return index\n\n    def missing_intervals(self):\n        \"\"\"\n        Returns the left and right coordinates of the intervals containing\n        missing data in this map as a 2D numpy array\n        with shape (:attr:`~.RateMap.num_missing_intervals`, 2). Each row\n        of this returned array is therefore a ``left``, ``right`` tuple\n        corresponding to the coordinates of the missing intervals.\n\n        :return: A numpy array of the coordinates of intervals containing\n            missing data.\n        :rtype: numpy.ndarray\n        \"\"\"\n        out = np.empty((self.num_missing_intervals, 2))\n        out[:, 0] = self.left[self.missing]\n        out[:, 1] = self.right[self.missing]\n        return out\n\n    def asdict(self):\n        return {\"position\": self.position, \"rate\": self.rate}\n\n    #\n    # Dunder methods. We implement the Mapping protocol via __iter__, __len__\n    # and __getitem__. We have some extra semantics for __getitem__, providing\n    # slice notation.\n    #\n\n    def __iter__(self):\n        # The clinching argument for using mid here is that if we used\n        # left instead we would have\n        #   RateMap([0, 1], [0.1]) == RateMap([0, 100], [0.1])\n        # by the inherited definition of equality since the dictionary items\n        # would be equal.\n        # Similarly, we only return the midpoints of known intervals\n        # because NaN values are not equal, and we would need to do\n        # something to work around this. It seems reasonable that\n        # this high-level operation returns the *known* values only\n        # anyway.\n        yield from self.mid[self.non_missing]\n\n    def __len__(self):\n        return np.sum(self.non_missing)\n\n    def __getitem__(self, key):\n        if isinstance(key, slice):\n            if key.step is not None:\n                raise TypeError(\"Only interval slicing is supported\")\n            return self.slice(key.start, key.stop)\n        if isinstance(key, numbers.Number):\n            index = self.find_index(key)\n            if np.isnan(self.rate[index]):\n                # To be consistent with the __iter__ definition above we\n                # don't consider these missing positions to be \"in\" the map.\n                raise KeyError(f\"Position {key} is within a missing interval\")\n            return self.rate[index]\n        # TODO we could implement numpy array indexing here and call\n        # to get_rate. Note we'd need to take care that we return a keyerror\n        # if the returned array contains any nans though.\n        raise KeyError(\"Key {key} not in map\")\n\n    def _text_header_and_rows(self, limit=None):\n        headers = (\"left\", \"right\", \"mid\", \"span\", \"rate\")\n        num_rows = len(self.left)\n        rows = []\n        row_indexes = util.truncate_rows(num_rows, limit)\n        for j in row_indexes:\n            if j == -1:\n                rows.append(f\"__skipped__{num_rows - limit}\")\n            else:\n                rows.append(\n                    [\n                        f\"{self.left[j]:.10g}\",\n                        f\"{self.right[j]:.10g}\",\n                        f\"{self.mid[j]:.10g}\",\n                        f\"{self.span[j]:.10g}\",\n                        f\"{self.rate[j]:.2g}\",\n                    ]\n                )\n        return headers, rows\n\n    def __str__(self):\n        header, rows = self._text_header_and_rows(\n            limit=tskit._print_options[\"max_lines\"]\n        )\n        table = util.unicode_table(\n            rows=rows,\n            header=header,\n            column_alignments=\"<<>>>\",\n        )\n        return table\n\n    def _repr_html_(self):\n        header, rows = self._text_header_and_rows(\n            limit=tskit._print_options[\"max_lines\"]\n        )\n        return util.html_table(rows, header=header)\n\n    def __repr__(self):\n        return f\"RateMap(position={repr(self.position)}, rate={repr(self.rate)})\"\n\n    #\n    # Methods for building rate maps.\n    #\n\n    def copy(self) -> RateMap:\n        \"\"\"\n        Returns a deep copy of this RateMap.\n        \"\"\"\n        # We take read-only copies of the arrays in the constructor anyway, so\n        # no need for copying.\n        return RateMap(position=self.position, rate=self.rate)\n\n    def slice(self, left=None, right=None, *, trim=False) -> RateMap:  # noqa: A003\n        \"\"\"\n        Returns a subset of this rate map in the specified interval.\n\n        :param float left: The left coordinate (inclusive) of the region to keep.\n            If ``None``, defaults to 0.\n        :param float right: The right coordinate (exclusive) of the region to keep.\n            If ``None``, defaults to the sequence length.\n        :param bool trim: If True, remove the flanking regions such that the\n            sequence length of the new rate map is ``right`` - ``left``. If ``False``\n            (default), do not change the coordinate system and mark the flanking\n            regions as \"unknown\".\n        :return: A new RateMap instance\n        :rtype: RateMap\n        \"\"\"\n        left = 0 if left is None else left\n        right = self.sequence_length if right is None else right\n        if not (0 <= left < right <= self.sequence_length):\n            raise KeyError(f\"Invalid slice: left={left}, right={right}\")\n\n        i = self.find_index(left)\n        j = i + np.searchsorted(self.position[i:], right, side=\"right\")\n        if right > self.position[j - 1]:\n            j += 1\n\n        position = self.position[i:j].copy()\n        rate = self.rate[i : j - 1].copy()\n        position[0] = left\n        position[-1] = right\n\n        if trim:\n            # Return trimmed map with changed coords\n            return RateMap(position=position - left, rate=rate)\n\n        # Need to check regions before & after sliced region are filled out:\n        if left != 0:\n            if np.isnan(rate[0]):\n                position[0] = 0  # Extend\n            else:\n                rate = np.insert(rate, 0, np.nan)  # Prepend\n                position = np.insert(position, 0, 0)\n        if right != self.position[-1]:\n            if np.isnan(rate[-1]):\n                position[-1] = self.sequence_length  # Extend\n            else:\n                rate = np.append(rate, np.nan)  # Append\n                position = np.append(position, self.position[-1])\n        return RateMap(position=position, rate=rate)\n\n    @staticmethod\n    def uniform(sequence_length, rate) -> RateMap:\n        \"\"\"\n        Create a uniform rate map\n        \"\"\"\n        return RateMap(position=[0, sequence_length], rate=[rate])\n\n    @staticmethod\n    def read_hapmap(\n        fileobj,\n        sequence_length=None,\n        *,\n        has_header=True,\n        position_col=None,\n        rate_col=None,\n        map_col=None,\n    ):\n        # Black barfs with an INTERNAL_ERROR trying to reformat this docstring,\n        # so we explicitly disable reformatting here.\n        # fmt: off\n        \"\"\"\n        Parses the specified file in HapMap format and returns a :class:`.RateMap`.\n        HapMap files must white-space-delimited, and by default are assumed to\n        contain a single header line (which is ignored). Each subsequent line\n        then contains a physical position (in base pairs) and either a genetic\n        map position (in centiMorgans) or a recombination rate (in centiMorgans\n        per megabase). The value in the rate column in a given line gives the\n        constant rate between the physical position in that line (inclusive) and the\n        physical position on the next line (exclusive).\n        By default, the second column of the file is taken\n        as the physical position and the fourth column is taken as the genetic\n        position, as seen in the following sample of the format::\n\n            Chromosome\tPosition(bp)  Rate(cM/Mb)  Map(cM)\n            chr10       48232         0.1614       0.002664\n            chr10       48486         0.1589       0.002705\n            chr10       50009         0.159        0.002947\n            chr10       52147         0.1574       0.003287\n            ...\n            chr10\t133762002     3.358        181.129345\n            chr10\t133766368     0.000        181.144008\n\n        In the example above, the first row has a nonzero genetic map position\n        (last column, cM), implying a nonzero recombination rate before that\n        position, that is assumed to extend to the start of the chromosome\n        (at position 0 bp). However, if the first line has a nonzero bp position\n        (second column) and a zero genetic map position (last column, cM),\n        then the recombination rate before that position is *unknown*, producing\n        :ref:`missing data <sec_rate_maps_missing>`.\n\n        .. note::\n            The rows are all assumed to come from the same contig, and the\n            first column is currently ignored. Therefore if you have a single\n            file containing several contigs or chromosomes, you must must split\n            it up into multiple files, and pass each one separately to this\n            function.\n\n        :param str fileobj: Filename or file to read. This is passed directly\n            to :func:`numpy.loadtxt`, so if the filename extension is .gz or .bz2,\n            the file is decompressed first\n        :param float sequence_length: The total length of the map. If ``None``,\n            then assume it is the last physical position listed in the file.\n            Otherwise it must be greater then or equal to the last physical\n            position in the file, and the region between the last physical position\n            and the sequence_length is padded with a rate of zero.\n        :param bool has_header: If True (default), assume the file has a header row\n            and ignore the first line of the file.\n        :param int position_col: The zero-based index of the column in the file\n            specifying the physical position in base pairs. If ``None`` (default)\n            assume an index of 1 (i.e. the second column).\n        :param int rate_col: The zero-based index of the column in the file\n            specifying the rate in cM/Mb. If ``None`` (default) do not use the rate\n            column, but calculate rates using the genetic map positions, as\n            specified in ``map_col``. If the rate column is used, the\n            interval from 0 to first physical position in the file is marked as\n            unknown, and the last value in the rate column must be zero.\n        :param int map_col: The zero-based index of the column in the file\n            specifying the genetic map position in centiMorgans. If ``None``\n            (default), assume an index of 3 (i.e. the fourth column). If the first\n            genetic position is 0 the interval from position 0 to the first\n            physical position in the file is marked as unknown. Otherwise, act\n            as if an additional row, specifying physical position 0 and genetic\n            position 0, exists at the start of the file.\n        :return: A RateMap object.\n        :rtype: RateMap\n        \"\"\"\n        # fmt: on\n        column_defs = {}  # column definitions passed to np.loadtxt\n        if rate_col is None and map_col is None:\n            # Default to map_col\n            map_col = 3\n        elif rate_col is not None and map_col is not None:\n            raise ValueError(\"Cannot specify both rate_col and map_col\")\n        if map_col is not None:\n            column_defs[map_col] = (\"map\", float)\n        else:\n            column_defs[rate_col] = (\"rate\", float)\n        position_col = 1 if position_col is None else position_col\n        if position_col in column_defs:\n            raise ValueError(\n                \"Cannot specify the same columns for position_col and \"\n                \"rate_col or map_col\"\n            )\n        column_defs[position_col] = (\"pos\", int)\n\n        column_names = [c[0] for c in column_defs.values()]\n        column_data = np.loadtxt(\n            fileobj,\n            skiprows=1 if has_header else 0,\n            dtype=list(column_defs.values()),\n            usecols=list(column_defs.keys()),\n            unpack=True,\n        )\n        data = dict(zip(column_names, column_data))\n\n        if \"map\" not in data:\n            assert \"rate\" in data\n            if data[\"rate\"][-1] != 0:\n                raise ValueError(\"The last entry in the 'rate' column must be zero\")\n            pos_Mb = data[\"pos\"] / 1e6\n            map_pos = np.cumsum(data[\"rate\"][:-1] * np.diff(pos_Mb))\n            data[\"map\"] = np.insert(map_pos, 0, 0) / 100\n        else:\n            data[\"map\"] /= 100  # Convert centiMorgans to Morgans\n        if len(data[\"map\"]) == 0:\n            raise ValueError(\"Empty hapmap file\")\n\n        # TO DO: read in chrom name from col 0 and poss set as .name\n        # attribute on the RateMap\n\n        physical_positions = data[\"pos\"]\n        genetic_positions = data[\"map\"]\n        start = physical_positions[0]\n        end = physical_positions[-1]\n\n        if genetic_positions[0] > 0 and start == 0:\n            raise ValueError(\n                \"The map distance at the start of the chromosome must be zero\"\n            )\n        if start > 0:\n            physical_positions = np.insert(physical_positions, 0, 0)\n            if genetic_positions[0] > 0:\n                # Exception for a map that starts > 0cM: include the start rate\n                # in the mean\n                start = 0\n            genetic_positions = np.insert(genetic_positions, 0, 0)\n\n        if sequence_length is not None:\n            if sequence_length < end:\n                raise ValueError(\n                    \"The sequence_length cannot be less than the last physical position \"\n                    f\" ({physical_positions[-1]})\"\n                )\n            if sequence_length > end:\n                physical_positions = np.append(physical_positions, sequence_length)\n                genetic_positions = np.append(genetic_positions, genetic_positions[-1])\n\n        assert genetic_positions[0] == 0\n        rate = np.diff(genetic_positions) / np.diff(physical_positions)\n        if start != 0:\n            rate[0] = np.nan\n        if end != physical_positions[-1]:\n            rate[-1] = np.nan\n        return RateMap(position=physical_positions, rate=rate)\n"
  },
  {
    "path": "python/tskit/jit/__init__.py",
    "content": ""
  },
  {
    "path": "python/tskit/jit/numba.py",
    "content": "import functools\nimport os\n\nimport numpy as np\n\nimport tskit\n\ntry:\n    import numba\n\nexcept ImportError:\n    raise ImportError(\n        \"Numba is not installed. Please install it with `pip install numba` \"\n        \"or `conda install numba` to use the tskit.jit.numba module.\"\n    )\n\n\nFORWARD = 1  #: Direction constant for forward tree traversal\nREVERSE = -1  #: Direction constant for reverse tree traversal\n\n# Retrieve these here to avoid lookups in tight loops\nNODE_IS_SAMPLE = tskit.NODE_IS_SAMPLE\nNULL = tskit.NULL\n\nedge_range_spec = [\n    (\"start\", numba.int32),\n    (\"stop\", numba.int32),\n    (\"order\", numba.int32[:]),\n]\n\nparent_index_spec = [\n    (\"edge_index\", numba.int32[:]),\n    (\"index_range\", numba.int32[:, :]),\n]\n\n\n@numba.experimental.jitclass(edge_range_spec)\nclass EdgeRange:\n    \"\"\"\n    Represents a range of edges during tree traversal.\n\n    This class encapsulates information about a contiguous range of edges\n    that are either being removed or added to step from one tree to another.\n    The ``start`` and ``stop`` indices, when applied to the order array,\n    define the ids of edges to process.\n\n    Attributes\n    ----------\n    start : int\n        Starting index of the edge range (inclusive).\n    stop : int\n        Stopping index of the edge range (exclusive).\n    order : numpy.ndarray\n        Array (dtype=np.int32) containing edge IDs in the order they should be processed.\n        The edge ids in this range are order[start:stop].\n    \"\"\"\n\n    def __init__(self, start, stop, order):\n        self.start = start\n        self.stop = stop\n        self.order = order\n\n\n@numba.experimental.jitclass(parent_index_spec)\nclass ParentIndex:\n    \"\"\"\n    Simple data container for parent index information.\n\n    This class provides access to all edges where a given node is the child.\n    Since edges are not sorted by child in the tskit edge table, a custom index\n    (edge_index) is built that sorts edge IDs by child node. `index_range`\n    then contains the [start, stop) range of edges for each child node in `edge_index`.\n\n    Attributes\n    ----------\n    edge_index : numpy.ndarray\n        Array (dtype=np.int32) of edge IDs sorted by child node and left coordinate.\n    index_range : numpy.ndarray\n        Array (dtype=np.int32, shape=(num_nodes, 2)) where each row contains the\n        [start, stop) range in edge_index where this node is the child.\n    \"\"\"\n\n    def __init__(self, edge_index, index_range):\n        self.edge_index = edge_index\n        self.index_range = index_range\n\n\nclass TreeIndex:\n    \"\"\"\n    Traverse trees in a numba compatible tree sequence.\n\n    This class provides efficient forward and backward iteration through\n    the trees in a tree sequence. It provides the tree interval,\n    edge changes to create the current tree, along with its sites and mutations.\n    A full pass over the trees using repeated `next` or `prev` requires O(E + M + S) time\n    complexity.\n\n    It should not be instantiated directly, but is returned by the `tree_index` method\n    of `NumbaTreeSequence`.\n\n\n    Attributes\n    ----------\n    ts : NumbaTreeSequence\n        Reference to the tree sequence being traversed.\n    index : int\n        Current tree index. -1 indicates no current tree (null state).\n    direction : int\n        Traversal direction: tskit.FORWARD or tskit.REVERSE. tskit.NULL\n        if uninitialised.\n    interval : tuple\n        Genomic interval (left, right) covered by the current tree.\n    in_range : EdgeRange\n        Edges being added to form this current tree, relative to the last state\n    out_range : EdgeRange\n        Edges being removed to form this current tree, relative to the last state\n    site_range : tuple\n        Range of sites in the current tree (start, stop).\n    mutation_range : tuple\n        Range of mutations in the current tree (start, stop).\n\n    Example\n    --------\n    >>> tree_index = numba_ts.tree_index()\n    >>> num_edges = 0\n    >>> while tree_index.next():\n            num_edges += (tree_index.in_range.stop - tree_index.in_range.start)\n            num_edges -= (tree_index.out_range.stop - tree_index.out_range.start)\n            print(f\"Tree {tree_index.index}: {num_edges} edges\")\n    \"\"\"\n\n    def __init__(self, ts):\n        self.ts = ts\n        self.index = -1\n        self.direction = NULL\n        self.interval = (0, 0)\n        self.in_range = EdgeRange(0, 0, np.zeros(0, dtype=np.int32))\n        self.out_range = EdgeRange(0, 0, np.zeros(0, dtype=np.int32))\n        self.site_range = (0, 0)\n        self.mutation_range = (0, 0)\n\n    def set_null(self):\n        \"\"\"\n        Reset the tree index to null state.\n        \"\"\"\n        self.index = -1\n        self.interval = (0, 0)\n        self.site_range = (0, 0)\n        self.mutation_range = (0, 0)\n\n    def next(self):  # noqa: A003\n        \"\"\"\n        Move to the next tree in forward direction.\n\n        Updates the tree index to the next tree in the sequence,\n        computing the edges that need to be added and removed to\n        transform from the previous tree to the current tree.\n        On the first call, this initializes the iterator and moves to tree 0.\n\n        :return: True if successfully moved to next tree, False if the end\n            of the tree sequence is reached. When False is returned, the iterator\n            is in null state (index=-1).\n        :rtype: bool\n        \"\"\"\n        M = self.ts.num_edges\n        NS = self.ts.num_sites\n        NM = self.ts.num_mutations\n        breakpoints = self.ts.breakpoints\n        left_coords = self.ts.edges_left\n        left_order = self.ts.indexes_edge_insertion_order\n        right_coords = self.ts.edges_right\n        right_order = self.ts.indexes_edge_removal_order\n        sites_position = self.ts.sites_position\n        mutations_site = self.ts.mutations_site\n\n        if self.index == -1:\n            self.interval = (self.interval[0], 0)\n            self.out_range.stop = 0\n            self.in_range.stop = 0\n            self.direction = FORWARD\n            self.site_range = (0, 0)\n            self.mutation_range = (0, 0)\n\n        if self.direction == FORWARD:\n            left_current_index = self.in_range.stop\n            right_current_index = self.out_range.stop\n        else:\n            left_current_index = self.out_range.stop + 1\n            right_current_index = self.in_range.stop + 1\n\n        left = self.interval[1]\n\n        j = right_current_index\n        self.out_range.start = j\n        while j < M and right_coords[right_order[j]] == left:\n            j += 1\n        self.out_range.stop = j\n        self.out_range.order = right_order\n\n        j = left_current_index\n        self.in_range.start = j\n        while j < M and left_coords[left_order[j]] == left:\n            j += 1\n        self.in_range.stop = j\n        self.in_range.order = left_order\n\n        self.direction = FORWARD\n        self.index += 1\n        if self.index == self.ts.num_trees:\n            self.set_null()\n        else:\n            right = breakpoints[self.index + 1]\n            self.interval = (left, right)\n\n            # Find sites in current tree interval [left, right)\n            old_site_left, old_site_right = self.site_range\n            j = old_site_right\n            while j < NS and sites_position[j] < right:\n                j += 1\n            self.site_range = (old_site_right, j)\n\n            # Find mutations for sites in this interval\n            old_mutation_left, old_mutation_right = self.mutation_range\n            k = old_mutation_right\n            while k < NM and mutations_site[k] < j:\n                k += 1\n            self.mutation_range = (old_mutation_right, k)\n\n        return self.index != -1\n\n    def prev(self):\n        \"\"\"\n        Move to the previous tree in reverse direction.\n\n        Updates the tree index to the previous tree in the sequence,\n        computing the edges that need to be added and removed to\n        transform from the next tree to the current tree.\n        On the first call, this initializes the iterator and moves to the most\n        rightward tree.\n\n        :return: True if successfully moved to previous tree, False if the beginning\n            of the tree sequence is reached. When False is returned, the iterator\n            is in null state (index=-1).\n        :rtype: bool\n        \"\"\"\n        M = self.ts.num_edges\n        NS = self.ts.num_sites\n        NM = self.ts.num_mutations\n        breakpoints = self.ts.breakpoints\n        right_coords = self.ts.edges_right\n        right_order = self.ts.indexes_edge_removal_order\n        left_coords = self.ts.edges_left\n        left_order = self.ts.indexes_edge_insertion_order\n        sites_position = self.ts.sites_position\n        mutations_site = self.ts.mutations_site\n\n        if self.index == -1:\n            self.index = self.ts.num_trees\n            self.interval = (self.ts.sequence_length, self.interval[1])\n            self.in_range.stop = M - 1\n            self.out_range.stop = M - 1\n            self.direction = REVERSE\n            self.site_range = (NS, NS)\n            self.mutation_range = (NM, NM)\n\n        if self.direction == REVERSE:\n            left_current_index = self.out_range.stop\n            right_current_index = self.in_range.stop\n        else:\n            left_current_index = self.in_range.stop - 1\n            right_current_index = self.out_range.stop - 1\n\n        right = self.interval[0]\n\n        j = left_current_index\n        self.out_range.start = j\n        while j >= 0 and left_coords[left_order[j]] == right:\n            j -= 1\n        self.out_range.stop = j\n        self.out_range.order = left_order\n\n        j = right_current_index\n        self.in_range.start = j\n        while j >= 0 and right_coords[right_order[j]] == right:\n            j -= 1\n        self.in_range.stop = j\n        self.in_range.order = right_order\n\n        self.direction = REVERSE\n        self.index -= 1\n        if self.index == -1:\n            self.set_null()\n        else:\n            left = breakpoints[self.index]\n            self.interval = (left, right)\n\n            # Find sites in current tree interval [left, right) going backward\n            old_site_left, old_site_right = self.site_range\n            j = old_site_left - 1\n            while j >= 0 and sites_position[j] >= left:\n                j -= 1\n            self.site_range = (j + 1, old_site_left)\n\n            # Find mutations for sites in this interval going backward\n            old_mutation_left, old_mutation_right = self.mutation_range\n            k = old_mutation_left - 1\n            while k >= 0 and mutations_site[k] >= self.site_range[0]:\n                k -= 1\n            self.mutation_range = (k + 1, old_mutation_left)\n\n        return self.index != -1\n\n\nclass NumbaTreeSequence:\n    \"\"\"\n    A Numba-compatible representation of a tree sequence.\n\n    This class provides access a tree sequence class that can be used\n    from within Numba \"njit\" compiled functions. :meth:`jitwrap` should\n    be used to JIT compile this class from a :class:`tskit.TreeSequence` object,\n    before it is passed to a Numba function.\n\n    Attributes\n    ----------\n    num_trees : int\n        Number of trees in the tree sequence.\n    num_nodes : int\n        Number of nodes in the tree sequence.\n    num_samples : int\n        Number of samples in the tree sequence.\n    num_edges : int\n        Number of edges in the tree sequence.\n    num_sites : int\n        Number of sites in the tree sequence.\n    num_mutations : int\n        Number of mutations in the tree sequence.\n    sequence_length : float\n        Total sequence length of the tree sequence.\n    edges_left : numpy.ndarray\n        Array (dtype=np.float64) of left coordinates of edges.\n    edges_right : numpy.ndarray\n        Array (dtype=np.float64) of right coordinates of edges.\n    edges_parent : numpy.ndarray\n        Array (dtype=np.int32) of parent node IDs for each edge.\n    edges_child : numpy.ndarray\n        Array (dtype=np.int32) of child node IDs for each edge.\n    nodes_time : numpy.ndarray\n        Array (dtype=np.float64) of time values for each node.\n    nodes_flags : numpy.ndarray\n        Array (dtype=np.uint32) of flag values for each node.\n    nodes_population : numpy.ndarray\n        Array (dtype=np.int32) of population IDs for each node.\n    nodes_individual : numpy.ndarray\n        Array (dtype=np.int32) of individual IDs for each node.\n    individuals_flags : numpy.ndarray\n        Array (dtype=np.uint32) of flag values for each individual.\n    sites_position : numpy.ndarray\n        Array (dtype=np.float64) of positions of sites along the sequence.\n    mutations_site : numpy.ndarray\n        Array (dtype=np.int32) of site IDs for each mutation.\n    mutations_node : numpy.ndarray\n        Array (dtype=np.int32) of node IDs for each mutation.\n    mutations_parent : numpy.ndarray\n        Array (dtype=np.int32) of parent mutation IDs.\n    mutations_time : numpy.ndarray\n        Array (dtype=np.float64) of time values for each mutation.\n    breakpoints : numpy.ndarray\n        Array (dtype=np.float64) of genomic positions where trees change.\n    indexes_edge_insertion_order : numpy.ndarray\n        Array (dtype=np.int32) specifying the order in which edges are inserted\n        during tree building.\n    indexes_edge_removal_order : numpy.ndarray\n        Array (dtype=np.int32) specifying the order in which edges are removed\n        during tree building.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        num_trees,\n        num_nodes,\n        num_samples,\n        num_edges,\n        num_sites,\n        num_mutations,\n        sequence_length,\n        edges_left,\n        edges_right,\n        indexes_edge_insertion_order,\n        indexes_edge_removal_order,\n        individuals_flags,\n        nodes_time,\n        nodes_flags,\n        nodes_population,\n        nodes_individual,\n        edges_parent,\n        edges_child,\n        sites_position,\n        sites_ancestral_state,\n        mutations_site,\n        mutations_node,\n        mutations_parent,\n        mutations_time,\n        mutations_derived_state,\n        mutations_inherited_state,\n        breakpoints,\n        max_ancestral_length,\n        max_derived_length,\n        max_inherited_length,\n    ):\n        self.num_trees = num_trees\n        self.num_nodes = num_nodes\n        self.num_samples = num_samples\n        self.num_edges = num_edges\n        self.num_sites = num_sites\n        self.num_mutations = num_mutations\n        self.sequence_length = sequence_length\n        self.edges_left = edges_left\n        self.edges_right = edges_right\n        self.indexes_edge_insertion_order = indexes_edge_insertion_order\n        self.indexes_edge_removal_order = indexes_edge_removal_order\n        self.individuals_flags = individuals_flags\n        self.nodes_time = nodes_time\n        self.nodes_flags = nodes_flags\n        self.nodes_population = nodes_population\n        self.nodes_individual = nodes_individual\n        self.edges_parent = edges_parent\n        self.edges_child = edges_child\n        self.sites_position = sites_position\n        self.sites_ancestral_state = sites_ancestral_state\n        self.mutations_site = mutations_site\n        self.mutations_node = mutations_node\n        self.mutations_parent = mutations_parent\n        self.mutations_time = mutations_time\n        self.mutations_derived_state = mutations_derived_state\n        self.mutations_inherited_state = mutations_inherited_state\n        self.breakpoints = breakpoints\n        self.max_ancestral_length = max_ancestral_length\n        self.max_derived_length = max_derived_length\n        self.max_inherited_length = max_inherited_length\n\n    def tree_index(self):\n        \"\"\"\n        Create a :class:`TreeIndex` for traversing this tree sequence.\n\n        :return: A new tree index initialized to the null tree.\n            Use next() or prev() to move to an actual tree.\n        :rtype: TreeIndex\n        \"\"\"\n        # This method will be overriden when the concrete JIT class TreeIndex\n        # is defined in `jitwrap`.\n        return TreeIndex(self)  # pragma: no cover\n\n    def child_index(self):\n        \"\"\"\n        Create child index array for finding child edges of nodes. This operation\n        requires a linear pass over the edge table and therefore has a time\n        complexity of O(E).\n\n        :return: A numpy array (dtype=np.int32, shape=(num_nodes, 2)) where each row\n            contains the [start, stop) range of edges where this node is the parent.\n        :rtype: numpy.ndarray\n        \"\"\"\n        child_range = np.full((self.num_nodes, 2), -1, dtype=np.int32)\n        edges_parent = self.edges_parent\n        if self.num_edges == 0:\n            return child_range\n\n        # Find ranges in tskit edge ordering\n        last_parent = -1\n        for edge_id in range(self.num_edges):\n            parent = edges_parent[edge_id]\n            if parent != last_parent:\n                child_range[parent, 0] = edge_id\n            if last_parent != -1:\n                child_range[last_parent, 1] = edge_id\n            last_parent = parent\n\n        if last_parent != -1:\n            child_range[last_parent, 1] = self.num_edges\n\n        return child_range\n\n    def parent_index(self):\n        \"\"\"\n        Create a :class:`ParentIndex` for finding parent edges of nodes.\n\n        Edges within each child's group are not guaranteed to be in any\n        specific order. This operation uses a two-pass algorithm with\n        O(N + E) time complexity and O(N) auxiliary space.\n\n        :return: A new parent index container that can be used to\n            efficiently find all edges where a given node is the child.\n        :rtype: ParentIndex\n        \"\"\"\n        num_nodes = self.num_nodes\n        num_edges = self.num_edges\n        edges_child = self.edges_child\n\n        child_counts = np.zeros(num_nodes, dtype=np.int32)\n        edge_index = np.zeros(num_edges, dtype=np.int32)\n        index_range = np.zeros((num_nodes, 2), dtype=np.int32)\n\n        if num_edges == 0:\n            return ParentIndex(edge_index, index_range)\n\n        # Count how many children each node has\n        for child_node in edges_child:\n            child_counts[child_node] += 1\n\n        # From the counts build the index ranges, we set both the start and the\n        # end index to the start - this lets us use the end index as a tracker\n        # for where we should insert the next edge for that node - when all\n        # edges are done these values will be the correct end values!\n        current_start = 0\n        for i in range(num_nodes):\n            index_range[i, :] = current_start\n            current_start += child_counts[i]\n\n        # Now go over the edges, inserting them at the index pointed to\n        # by the node's current end value, then increment.\n        for edge_id in range(num_edges):\n            child = edges_child[edge_id]\n            pos = index_range[child, 1]\n            edge_index[pos] = edge_id\n            index_range[child, 1] += 1\n\n        return ParentIndex(edge_index, index_range)\n\n\n# We cache these classes to avoid repeated JIT compilation\n@functools.lru_cache(None)\ndef _jitwrap(max_ancestral_length, max_derived_length, max_inherited_length):\n    # We have a circular dependency in JIT compilation between NumbaTreeSequence\n    # and NumbaTreeIndex so we used a deferred type to break it\n    tree_sequence_type = numba.deferred_type()\n\n    # We run this code on CI with this env var set so we can get coverage\n    # of the jitted functions. EdgeRange doesn't have a class_type\n    # in this case, so we skip the spec entirely.\n    if os.environ.get(\"NUMBA_DISABLE_JIT\") == \"1\":\n        tree_index_spec = []\n    else:\n        tree_index_spec = [\n            (\"ts\", tree_sequence_type),\n            (\"index\", numba.int32),\n            (\"direction\", numba.int32),\n            (\"interval\", numba.types.UniTuple(numba.float64, 2)),\n            (\"in_range\", EdgeRange.class_type.instance_type),\n            (\"out_range\", EdgeRange.class_type.instance_type),\n            (\"site_range\", numba.types.UniTuple(numba.int32, 2)),\n            (\"mutation_range\", numba.types.UniTuple(numba.int32, 2)),\n        ]\n\n    JittedTreeIndex = numba.experimental.jitclass(tree_index_spec)(TreeIndex)\n\n    tree_sequence_spec = [\n        (\"num_trees\", numba.int32),\n        (\"num_nodes\", numba.int32),\n        (\"num_samples\", numba.int32),\n        (\"num_edges\", numba.int32),\n        (\"num_sites\", numba.int32),\n        (\"num_mutations\", numba.int32),\n        (\"sequence_length\", numba.float64),\n        (\"edges_left\", numba.float64[:]),\n        (\"edges_right\", numba.float64[:]),\n        (\"indexes_edge_insertion_order\", numba.int32[:]),\n        (\"indexes_edge_removal_order\", numba.int32[:]),\n        (\"individuals_flags\", numba.uint32[:]),\n        (\"nodes_time\", numba.float64[:]),\n        (\"nodes_flags\", numba.uint32[:]),\n        (\"nodes_population\", numba.int32[:]),\n        (\"nodes_individual\", numba.int32[:]),\n        (\"edges_parent\", numba.int32[:]),\n        (\"edges_child\", numba.int32[:]),\n        (\"sites_position\", numba.float64[:]),\n        (\"sites_ancestral_state\", numba.types.UnicodeCharSeq(max_ancestral_length)[:]),\n        (\"mutations_site\", numba.int32[:]),\n        (\"mutations_node\", numba.int32[:]),\n        (\"mutations_parent\", numba.int32[:]),\n        (\"mutations_time\", numba.float64[:]),\n        (\"mutations_derived_state\", numba.types.UnicodeCharSeq(max_derived_length)[:]),\n        (\n            \"mutations_inherited_state\",\n            numba.types.UnicodeCharSeq(max_inherited_length)[:],\n        ),\n        (\"breakpoints\", numba.float64[:]),\n        (\"max_ancestral_length\", numba.int32),\n        (\"max_derived_length\", numba.int32),\n        (\"max_inherited_length\", numba.int32),\n    ]\n\n    # The `tree_index` method on NumbaTreeSequence uses NumbaTreeIndex\n    # which is the uncompiled version of the class. The compiled version isn't\n    # known till now, so replace the method with this definition.\n\n    class _NumbaTreeSequence(NumbaTreeSequence):\n        def tree_index(self):\n            return JittedTreeIndex(self)\n\n    JittedTreeSequence = numba.experimental.jitclass(tree_sequence_spec)(\n        _NumbaTreeSequence\n    )\n\n    # Now both classes are setup we can resolve the deferred type\n    if os.environ.get(\"NUMBA_DISABLE_JIT\") != \"1\":\n        tree_sequence_type.define(JittedTreeSequence.class_type.instance_type)\n\n    return JittedTreeSequence\n\n\ndef jitwrap(ts):\n    \"\"\"\n    Convert a TreeSequence to a Numba-compatible format.\n\n    Creates a NumbaTreeSequence object that can be used within\n    Numba-compiled functions.\n\n    :param tskit.TreeSequence ts: The tree sequence to convert.\n    :return: A Numba-compatible representation of the input tree sequence.\n        Contains all necessary data arrays and metadata for tree traversal.\n    :rtype: NumbaTreeSequence\n    \"\"\"\n    max_ancestral_length = max(1, max(map(len, ts.sites_ancestral_state), default=1))\n    max_derived_length = max(1, max(map(len, ts.mutations_derived_state), default=1))\n    max_inherited_length = max(1, max(map(len, ts.mutations_inherited_state), default=1))\n\n    JittedTreeSequence = _jitwrap(\n        max_ancestral_length, max_derived_length, max_inherited_length\n    )\n\n    # Create the tree sequence instance\n    numba_ts = JittedTreeSequence(\n        num_trees=ts.num_trees,\n        num_nodes=ts.num_nodes,\n        num_samples=ts.num_samples,\n        num_edges=ts.num_edges,\n        num_sites=ts.num_sites,\n        num_mutations=ts.num_mutations,\n        sequence_length=ts.sequence_length,\n        edges_left=ts.edges_left,\n        edges_right=ts.edges_right,\n        indexes_edge_insertion_order=ts.indexes_edge_insertion_order,\n        indexes_edge_removal_order=ts.indexes_edge_removal_order,\n        individuals_flags=ts.individuals_flags,\n        nodes_time=ts.nodes_time,\n        nodes_flags=ts.nodes_flags,\n        nodes_population=ts.nodes_population,\n        nodes_individual=ts.nodes_individual,\n        edges_parent=ts.edges_parent,\n        edges_child=ts.edges_child,\n        sites_position=ts.sites_position,\n        sites_ancestral_state=ts.sites_ancestral_state.astype(\n            f\"U{max_ancestral_length}\"\n        ),\n        mutations_site=ts.mutations_site,\n        mutations_node=ts.mutations_node,\n        mutations_parent=ts.mutations_parent,\n        mutations_time=ts.mutations_time,\n        mutations_derived_state=ts.mutations_derived_state.astype(\n            f\"U{max_derived_length}\"\n        ),\n        mutations_inherited_state=ts.mutations_inherited_state.astype(\n            f\"U{max_inherited_length}\"\n        ),\n        breakpoints=ts.breakpoints(as_array=True),\n        max_ancestral_length=max_ancestral_length,\n        max_derived_length=max_derived_length,\n        max_inherited_length=max_inherited_length,\n    )\n\n    return numba_ts\n"
  },
  {
    "path": "python/tskit/metadata.py",
    "content": "# MIT License\n#\n# Copyright (c) 2020-2025 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nClasses for metadata decoding, encoding and validation\n\"\"\"\n\nfrom __future__ import annotations\n\nimport abc\nimport builtins\nimport collections\nimport copy\nimport functools\nimport json\nimport pprint\nimport struct\nimport types\nfrom collections.abc import Mapping\nfrom itertools import islice\nfrom typing import Any\n\nimport jsonschema\nimport numpy as np\n\nimport tskit\nimport tskit.exceptions as exceptions\nimport tskit.util as util\n\n__builtins__object__setattr__ = builtins.object.__setattr__\n\n\ndef replace_root_refs(obj):\n    if type(obj) is list:\n        return [replace_root_refs(j) for j in obj]\n    elif type(obj) is dict:\n        ret = {k: replace_root_refs(v) for k, v in obj.items()}\n        if ret.get(\"$ref\") == \"#\":\n            ret[\"$ref\"] = \"#/definitions/root\"\n        return ret\n    else:\n        return obj\n\n\n# Our schema is the Draft7Validator schema with added codec information.\nTSKITMetadataSchemaValidator = jsonschema.validators.extend(\n    jsonschema.validators.Draft7Validator\n)\nderef_meta_schema: Mapping[str, Any] = copy.deepcopy(\n    TSKITMetadataSchemaValidator.META_SCHEMA\n)\n# We need a top-level only required property so we need to rewrite any reference\n# to the top-level schema to a copy in a definition.\nderef_meta_schema = replace_root_refs(deref_meta_schema)\nderef_meta_schema[\"definitions\"][\"root\"] = copy.deepcopy(deref_meta_schema)\nderef_meta_schema[\"codec\"] = {\"type\": \"string\"}\nderef_meta_schema[\"required\"] = [\"codec\"]\n# For interoperability reasons, force the top-level to be an object or union\n# of object and null\nderef_meta_schema[\"properties\"][\"type\"] = {\"enum\": [\"object\", [\"object\", \"null\"]]}\n# Change the schema URL to avoid jsonschema's cache\nderef_meta_schema[\"$schema\"] = \"http://json-schema.org/draft-o=07/schema#tskit\"\nTSKITMetadataSchemaValidator.META_SCHEMA = deref_meta_schema\n\n\nclass AbstractMetadataCodec(metaclass=abc.ABCMeta):\n    \"\"\"\n    Superclass of all MetadataCodecs.\n    \"\"\"\n\n    def __init__(self, schema: Mapping[str, Any]) -> None:\n        raise NotImplementedError  # pragma: no cover\n\n    @classmethod\n    def modify_schema(cls, schema: Mapping) -> Mapping:\n        return schema\n\n    @classmethod\n    def is_schema_trivial(cls, schema: Mapping) -> bool:\n        return False\n\n    @abc.abstractmethod\n    def encode(self, obj: Any) -> bytes:\n        raise NotImplementedError  # pragma: no cover\n\n    @abc.abstractmethod\n    def decode(self, encoded: bytes) -> Any:\n        raise NotImplementedError  # pragma: no cover\n\n    def numpy_dtype(self, schema) -> Any:\n        raise NotImplementedError\n\n\ncodec_registry = {}\n\n\ndef register_metadata_codec(\n    codec_cls: type[AbstractMetadataCodec], codec_id: str\n) -> None:\n    \"\"\"\n    Register a metadata codec class.\n    This function maintains a mapping from metadata codec identifiers used in schemas\n    to codec classes. When a codec class is registered, it will replace any class\n    previously registered under the same codec identifier, if present.\n\n    :param str codec_id: String to use to refer to the codec in the schema.\n    \"\"\"\n    codec_registry[codec_id] = codec_cls\n\n\nclass JSONCodec(AbstractMetadataCodec):\n    @staticmethod\n    def default_validator(validator, types, instance, schema):\n        # For json codec defaults must be at the top level\n        if validator.is_type(instance, \"object\"):\n            for v in instance.get(\"properties\", {}).values():\n                for v2 in v.get(\"properties\", {}).values():\n                    if \"default\" in v2:\n                        yield jsonschema.ValidationError(\n                            \"Defaults can only be specified at the top level\"\n                            \" for JSON codec\"\n                        )\n\n    schema_validator = jsonschema.validators.extend(\n        TSKITMetadataSchemaValidator, {\"default\": default_validator}\n    )\n\n    @classmethod\n    def is_schema_trivial(cls, schema: Mapping) -> bool:\n        return len(schema.get(\"properties\", {})) == 0\n\n    def __init__(self, schema: Mapping[str, Any]) -> None:\n        try:\n            self.schema_validator.check_schema(schema)\n        except jsonschema.exceptions.SchemaError as ve:\n            raise exceptions.MetadataSchemaValidationError(str(ve)) from ve\n\n        # Find default values to fill in on decode, top level only\n        self.defaults = {\n            key: prop[\"default\"]\n            for key, prop in schema.get(\"properties\", {}).items()\n            if \"default\" in prop\n        }\n\n    def encode(self, obj: Any) -> bytes:\n        try:\n            return tskit.canonical_json(obj).encode()\n        except TypeError as e:\n            raise exceptions.MetadataEncodingError(\n                f\"Could not encode metadata of type {str(e).split()[3]}\"\n            )\n\n    def decode(self, encoded: bytes) -> Any:\n        if len(encoded) == 0:\n            result = {}\n        else:\n            result = json.loads(encoded.decode())\n\n        # Assign default values\n        if isinstance(result, dict):\n            return dict(self.defaults, **result)\n        else:\n            return result\n\n\nregister_metadata_codec(JSONCodec, \"json\")\n\n\nclass NOOPCodec(AbstractMetadataCodec):\n    def __init__(self, schema: Mapping[str, Any]) -> None:\n        pass\n\n    def encode(self, data: bytes) -> bytes:\n        return data\n\n    def decode(self, data: bytes) -> bytes:\n        return data\n\n\nclass JSONStructCodec(AbstractMetadataCodec):\n    \"\"\"\n    Pack canonical JSON metadata together with a struct-encoded binary payload.\n    The codec expects a metadata schema with separate ``json`` and ``struct``\n    subschemas and produces a single dict containing the union of the keys from\n    those subschemas after decoding.\n\n    The structure of the encoded metadata is as follows: first, a fixed-size\n    header that contains the number of bytes for both ``json`` and ``struct``\n    portions; next, the json; then a variable number of zeroed padding bytes\n    that brings the length to a multiple of 8 for alignment; and finally\n    the struct-encoded binary portion.\n    \"\"\"\n\n    MAGIC = b\"JBLB\"\n    VERSION = 1\n    _HDR = struct.Struct(\"<4sBQQ\")  # magic, version, json_len, blob_len\n\n    @classmethod\n    def is_schema_trivial(cls, schema: Mapping) -> bool:\n        return False\n\n    def __init__(self, schema: Mapping[str, Any]) -> None:\n        json_schema = schema.get(\"json\")\n        struct_schema = schema.get(\"struct\")\n        if not isinstance(json_schema, Mapping) or not isinstance(\n            struct_schema, Mapping\n        ):\n            raise exceptions.MetadataSchemaValidationError(\n                \"json+struct requires 'json' and 'struct' schema mappings\"\n            )\n\n        json_schema = copy.deepcopy(dict(json_schema, codec=\"json\"))\n        struct_schema = copy.deepcopy(dict(struct_schema, codec=\"struct\"))\n\n        try:\n            json_schema = JSONCodec.modify_schema(json_schema)\n            JSONCodec.schema_validator.check_schema(json_schema)\n        except jsonschema.exceptions.SchemaError as ve:\n            raise exceptions.MetadataSchemaValidationError(str(ve)) from ve\n        try:\n            struct_schema = StructCodec.modify_schema(struct_schema)\n            StructCodecSchemaValidator.check_schema(struct_schema)\n        except jsonschema.exceptions.SchemaError as ve:\n            raise exceptions.MetadataSchemaValidationError(str(ve)) from ve\n\n        self.json_schema = json_schema\n        self.struct_schema = struct_schema\n        json_props = self.json_schema.get(\"properties\", {})\n        struct_props = self.struct_schema.get(\"properties\", {})\n        overlap = set(json_props).intersection(struct_props)\n        if overlap:\n            raise exceptions.MetadataSchemaValidationError(\n                \"json and struct schemas must not share property names: \"\n                + \", \".join(sorted(overlap))\n            )\n        for name, sub_schema in (\n            (\"json\", self.json_schema),\n            (\"struct\", self.struct_schema),\n        ):\n            sub_type = sub_schema.get(\"type\")\n            if sub_type is None:\n                continue\n            if isinstance(sub_type, list):\n                is_object = \"object\" in sub_type\n            else:\n                is_object = sub_type == \"object\"\n            if not is_object:\n                raise exceptions.MetadataSchemaValidationError(\n                    f\"{name} subschema must describe an object for json+struct codec\"\n                )\n\n        self.json_codec = JSONCodec(self.json_schema)\n        self.struct_codec = StructCodec(self.struct_schema)\n        self._struct_keys = set(struct_props.keys())\n        self._validate_json = TSKITMetadataSchemaValidator(self.json_schema).validate\n        self._validate_struct = TSKITMetadataSchemaValidator(self.struct_schema).validate\n\n    def validate_row(self, row: Any) -> None:\n        if not isinstance(row, dict):\n            raise exceptions.MetadataValidationError(\n                \"json+struct metadata must be a mapping\"\n            )\n        struct_data = {k: v for k, v in row.items() if k in self._struct_keys}\n        json_data = {k: v for k, v in row.items() if k not in self._struct_keys}\n        try:\n            self._validate_json(json_data)\n            self._validate_struct(struct_data)\n        except jsonschema.exceptions.ValidationError as ve:\n            raise exceptions.MetadataValidationError(str(ve)) from ve\n\n    def encode(self, obj: Any) -> bytes:\n        if not isinstance(obj, dict):\n            raise exceptions.MetadataEncodingError(\n                \"json+struct metadata must be a mapping\"\n            )\n        json_bytes = self.json_codec.encode(\n            {k: v for k, v in obj.items() if k not in self._struct_keys}\n        )\n        blob_bytes = self.struct_codec.encode(\n            {k: v for k, v in obj.items() if k in self._struct_keys}\n        )\n        header = self._HDR.pack(\n            self.MAGIC, self.VERSION, len(json_bytes), len(blob_bytes)\n        )\n        padding_bytes = bytes((8 - ((len(header) + len(json_bytes)) % 8)) % 8)\n        return header + json_bytes + padding_bytes + blob_bytes\n\n    def decode(self, encoded: bytes) -> Any:\n        if len(encoded) >= self._HDR.size and encoded[:4] == self.MAGIC:\n            _, version, jlen, blen = self._HDR.unpack_from(encoded)\n            if version != self.VERSION:\n                raise ValueError(\"Unsupported json+struct version\")\n            start = self._HDR.size\n            # the second mod 8 is here because if start+jlen % 8 is 0 we get 8, not 0\n            padding_length = (8 - ((start + jlen) % 8)) % 8\n            if (\n                jlen > len(encoded) - start\n                or blen > len(encoded) - start - jlen - padding_length\n            ):\n                raise ValueError(\n                    \"Invalid json+struct payload: declared lengths exceed buffer size\"\n                )\n            json_bytes = encoded[start : start + jlen]\n            blob_bytes = encoded[\n                start + jlen + padding_length : start + jlen + padding_length + blen\n            ]\n            json_data = self.json_codec.decode(json_bytes)\n            struct_data = self.struct_codec.decode(blob_bytes)\n            overlap = set(json_data).intersection(struct_data)\n            if overlap:\n                raise ValueError(\n                    \"json+struct decoded duplicate keys: \" + \", \".join(sorted(overlap))\n                )\n            combined = dict(json_data)\n            combined.update(struct_data)\n            return combined\n        raise ValueError(\"Invalid json+struct payload: missing magic header\")\n\n\nregister_metadata_codec(JSONStructCodec, \"json+struct\")\n\n\ndef binary_format_validator(validator, types, instance, schema):\n    # We're hooking into jsonschemas validation code here, which works by creating\n    # generators of exceptions, hence the yielding\n\n    # Make sure the normal type validation gets done\n    try:\n        yield from jsonschema._validators.type(validator, types, instance, schema)\n    except AttributeError:\n        # Needed since jsonschema==4.19.1\n        yield from jsonschema._keywords.type(validator, types, instance, schema)\n\n    # Non-composite types must have a binaryFormat\n    if validator.is_type(instance, \"object\"):\n        for v in instance.values():\n            if (\n                isinstance(v, dict)\n                and v.get(\"type\")\n                not in (None, \"object\", \"array\", \"null\", [\"object\", \"null\"])\n                and \"binaryFormat\" not in v\n            ):\n                yield jsonschema.ValidationError(\n                    f\"{v['type']} type must have binaryFormat set\"\n                )\n    # null type must be padding\n    if (\n        validator.is_type(instance, \"object\")\n        and \"null\" in instance\n        and instance[\"null\"].get(\"type\") == \"null\"\n        and \"binaryFormat\" in instance[\"null\"]\n        and instance[\"null\"][\"binaryFormat\"][-1] != \"x\"\n    ):\n        yield jsonschema.ValidationError(\n            'null type binaryFormat must be padding (\"x\") if set'\n        )\n\n\ndef array_length_validator(validator, types, instance, schema):\n    # Validate that array schema doesn't have both length and\n    # noLengthEncodingExhaustBuffer set. Also ensure that arrayLengthFormat\n    # is not set when length is set.\n\n    # Call the normal properties validator first\n    try:\n        yield from jsonschema._validators.properties(validator, types, instance, schema)\n    except AttributeError:\n        # Needed since jsonschema==4.19.1\n        yield from jsonschema._keywords.properties(validator, types, instance, schema)\n    for prop, sub_schema in instance[\"properties\"].items():\n        if sub_schema.get(\"type\") == \"array\":\n            has_length = \"length\" in sub_schema\n            has_exhaust = sub_schema.get(\"noLengthEncodingExhaustBuffer\", False)\n\n            if has_length and has_exhaust:\n                yield jsonschema.ValidationError(\n                    f\"{prop} array cannot have both 'length' and \"\n                    \"'noLengthEncodingExhaustBuffer' set\"\n                )\n\n            if has_length and \"arrayLengthFormat\" in sub_schema:\n                yield jsonschema.ValidationError(\n                    f\"{prop} fixed-length array should not specify 'arrayLengthFormat'\"\n                )\n\n            if has_length and sub_schema[\"length\"] < 0:\n                yield jsonschema.ValidationError(\n                    f\"{prop} array length must be non-negative, got \"\n                    f\"{sub_schema['length']}\"\n                )\n\n\ndef required_validator(validator, required, instance, schema):\n    # Do the normal validation\n    try:\n        yield from jsonschema._validators.required(validator, required, instance, schema)\n    except AttributeError:\n        # Needed since jsonschema==4.19.1\n        yield from jsonschema._keywords.required(validator, required, instance, schema)\n\n    # For struct codec if a property is not required, then it must have a default\n    for prop, sub_schema in instance[\"properties\"].items():\n        if prop not in instance[\"required\"] and \"default\" not in sub_schema:\n            yield jsonschema.ValidationError(\n                f\"Optional property '{prop}' must have a default value\"\n            )\n\n\nStructCodecSchemaValidator = jsonschema.validators.extend(\n    TSKITMetadataSchemaValidator,\n    {\n        \"type\": binary_format_validator,\n        \"required\": required_validator,\n        \"properties\": array_length_validator,\n    },\n)\nstruct_meta_schema: Mapping[str, Any] = copy.deepcopy(\n    StructCodecSchemaValidator.META_SCHEMA\n)\n# No union types\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"type\"] = {\n    \"$ref\": \"#/definitions/simpleTypes\"\n}\n# No hetrogeneous arrays\nstruct_meta_schema[\"properties\"][\"items\"] = {\"$ref\": \"#/definitions/root\"}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"items\"] = struct_meta_schema[\n    \"properties\"\n][\"items\"]\n# binaryFormat matches regex\nstruct_meta_schema[\"properties\"][\"binaryFormat\"] = {\n    \"type\": \"string\",\n    \"pattern\": r\"^([cbB\\?hHiIlLqQfd]|\\d*[spx])$\",\n}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"binaryFormat\"] = (\n    struct_meta_schema[\"properties\"][\"binaryFormat\"]\n)\n# arrayLengthFormat matches regex and has default\nstruct_meta_schema[\"properties\"][\"arrayLengthFormat\"] = {\n    \"type\": \"string\",\n    \"pattern\": r\"^[BHILQ]$\",\n    \"default\": \"L\",\n}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"arrayLengthFormat\"] = (\n    struct_meta_schema[\"properties\"][\"arrayLengthFormat\"]\n)\n# index is numeric\nstruct_meta_schema[\"properties\"][\"index\"] = {\"type\": \"number\"}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"index\"] = struct_meta_schema[\n    \"properties\"\n][\"index\"]\n# stringEncoding is string and has default\nstruct_meta_schema[\"properties\"][\"stringEncoding\"] = {\n    \"type\": \"string\",\n    \"default\": \"utf-8\",\n}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"stringEncoding\"] = (\n    struct_meta_schema[\"properties\"][\"stringEncoding\"]\n)\n# nullTerminated is a boolean\nstruct_meta_schema[\"properties\"][\"nullTerminated\"] = {\"type\": \"boolean\"}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"nullTerminated\"] = (\n    struct_meta_schema[\"properties\"][\"nullTerminated\"]\n)\n# noLengthEncodingExhaustBuffer is a boolean\nstruct_meta_schema[\"properties\"][\"noLengthEncodingExhaustBuffer\"] = {\"type\": \"boolean\"}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\n    \"noLengthEncodingExhaustBuffer\"\n] = struct_meta_schema[\"properties\"][\"noLengthEncodingExhaustBuffer\"]\n\n# length is numeric (for fixed-length arrays)\nstruct_meta_schema[\"properties\"][\"length\"] = {\"type\": \"integer\"}\nstruct_meta_schema[\"definitions\"][\"root\"][\"properties\"][\"length\"] = struct_meta_schema[\n    \"properties\"\n][\"length\"]\n\nStructCodecSchemaValidator.META_SCHEMA = struct_meta_schema\n\n\nclass StructCodec(AbstractMetadataCodec):\n    \"\"\"\n    Codec that encodes data using struct. Note that this codec has extra restrictions\n    Namely that object keys must be fixed (all present and no extra); each entry should\n    have a binaryFormat; that arrays are homogeneous and that types are not unions.\n    \"\"\"\n\n    @classmethod\n    def order_by_index(cls, obj, do_sort=False):\n        \"\"\"\n        Take a schema and recursively convert any dict that is under the key\n        name ``properties`` to an OrderedDict.\n        \"\"\"\n        if isinstance(obj, collections.abc.Mapping):\n            items = obj.items()\n            if do_sort:\n                # Python sort is stable so we can do the sorts in reverse priority\n                items = sorted(items, key=lambda k_v: k_v[0])\n                items = sorted(items, key=lambda k_v: k_v[1].get(\"index\", 0))\n            items = ((k, cls.order_by_index(v, k == \"properties\")) for k, v in items)\n            if do_sort:\n                return collections.OrderedDict(items)\n            else:\n                return dict(items)\n        elif isinstance(obj, list) or isinstance(obj, tuple):\n            return [cls.order_by_index(v, False) for v in obj]\n        else:\n            return obj\n\n    @classmethod\n    def make_decode(cls, sub_schema):\n        \"\"\"\n        Create a function that can decode objects of this schema\n        \"\"\"\n        if set(sub_schema[\"type\"]) == {\"object\", \"null\"}:\n            return StructCodec.make_object_or_null_decode(sub_schema)\n        else:\n            return {\n                \"array\": StructCodec.make_array_decode,\n                \"object\": StructCodec.make_object_decode,\n                \"string\": StructCodec.make_string_decode,\n                \"null\": StructCodec.make_null_decode,\n                \"number\": StructCodec.make_numeric_decode,\n                \"integer\": StructCodec.make_numeric_decode,\n                \"boolean\": StructCodec.make_numeric_decode,\n            }[sub_schema[\"type\"]](sub_schema)\n\n    @classmethod\n    def make_array_decode(cls, sub_schema):\n        element_decoder = StructCodec.make_decode(sub_schema[\"items\"])\n        fixed_length = sub_schema.get(\"length\")\n        array_length_f = \"<\" + sub_schema.get(\"arrayLengthFormat\", \"L\")\n        array_length_size = struct.calcsize(array_length_f)\n        exhaust_buffer = sub_schema.get(\"noLengthEncodingExhaustBuffer\", False)\n\n        def array_decode(buffer):\n            array_length = struct.unpack(\n                array_length_f, bytes(islice(buffer, array_length_size))\n            )[0]\n            return [element_decoder(buffer) for _ in range(array_length)]\n\n        def array_decode_exhaust(buffer):\n            ret = []\n            while True:\n                try:\n                    ret.append(element_decoder(buffer))\n                except struct.error as e:\n                    if \"unpack requires a buffer\" in str(e):\n                        break\n                    else:\n                        raise e\n            return ret\n\n        def array_decode_fixed_length(buffer):\n            return [element_decoder(buffer) for _ in range(fixed_length)]\n\n        if fixed_length is not None:\n            return array_decode_fixed_length\n        elif exhaust_buffer:\n            return array_decode_exhaust\n        else:\n            return array_decode\n\n    @classmethod\n    def make_object_decode(cls, sub_schema):\n        sub_decoders = {\n            key: StructCodec.make_decode(prop)\n            for key, prop in sub_schema[\"properties\"].items()\n        }\n        return lambda buffer: {\n            key: sub_decoder(buffer) for key, sub_decoder in sub_decoders.items()\n        }\n\n    @classmethod\n    def make_object_or_null_decode(cls, sub_schema):\n        sub_decoders = {\n            key: StructCodec.make_decode(prop)\n            for key, prop in sub_schema[\"properties\"].items()\n        }\n\n        def decode_object_or_null(buffer):\n            # We have to check the buffer length for null, as the islices in\n            # sub-decoders won't raise StopIteration\n            buffer = list(buffer)\n            if len(buffer) == 0:\n                return None\n            else:\n                buffer = iter(buffer)\n                return {\n                    key: sub_decoder(buffer) for key, sub_decoder in sub_decoders.items()\n                }\n\n        return decode_object_or_null\n\n    @classmethod\n    def make_string_decode(cls, sub_schema):\n        f = \"<\" + sub_schema[\"binaryFormat\"]\n        size = struct.calcsize(f)\n        encoding = sub_schema.get(\"stringEncoding\", \"utf-8\")\n        null_terminated = sub_schema.get(\"nullTerminated\", False)\n        if not null_terminated:\n            return lambda buffer: struct.unpack(f, bytes(islice(buffer, size)))[\n                0\n            ].decode(encoding)\n        else:\n\n            def decode_string(buffer):\n                s = struct.unpack(f, bytes(islice(buffer, size)))[0].decode(encoding)\n                i = s.find(\"\\x00\")\n                if i == -1:\n                    return s\n                return s[:i]\n\n            return decode_string\n\n    @classmethod\n    def make_null_decode(cls, sub_schema):\n        if sub_schema.get(\"binaryFormat\") is not None:\n            f = sub_schema[\"binaryFormat\"]\n            size = struct.calcsize(f)\n\n            def padding_decode(buffer):\n                struct.unpack(f, bytes(islice(buffer, size)))\n\n            return padding_decode\n        else:\n            return lambda _: None\n\n    @classmethod\n    def make_numeric_decode(cls, sub_schema):\n        f = \"<\" + sub_schema[\"binaryFormat\"]\n        size = struct.calcsize(f)\n        return lambda buffer: struct.unpack(f, bytes(islice(buffer, size)))[0]\n\n    @classmethod\n    def make_encode(cls, sub_schema):\n        \"\"\"\n        Create a function that can encode objects of this schema\n        \"\"\"\n        if set(sub_schema[\"type\"]) == {\"object\", \"null\"}:\n            return StructCodec.make_object_or_null_encode(sub_schema)\n        else:\n            return {\n                \"array\": StructCodec.make_array_encode,\n                \"object\": StructCodec.make_object_encode,\n                \"string\": StructCodec.make_string_encode,\n                \"null\": StructCodec.make_null_encode,\n                \"number\": StructCodec.make_numeric_encode,\n                \"integer\": StructCodec.make_numeric_encode,\n                \"boolean\": StructCodec.make_numeric_encode,\n            }[sub_schema[\"type\"]](sub_schema)\n\n    @classmethod\n    def make_array_encode(cls, sub_schema):\n        element_encoder = StructCodec.make_encode(sub_schema[\"items\"])\n        fixed_length = sub_schema.get(\"length\")\n        array_length_f = \"<\" + sub_schema.get(\"arrayLengthFormat\", \"L\")\n        exhaust_buffer = sub_schema.get(\"noLengthEncodingExhaustBuffer\", False)\n\n        def array_encode_fixed_length(array):\n            if len(array) != fixed_length:\n                raise ValueError(\n                    f\"Array length {len(array)} does not match schema\"\n                    f\" fixed length {fixed_length}\"\n                )\n            return b\"\".join(element_encoder(ele) for ele in array)\n\n        def array_encode_exhaust(array):\n            return b\"\".join(element_encoder(ele) for ele in array)\n\n        def array_encode_with_length(array):\n            try:\n                packed_length = struct.pack(array_length_f, len(array))\n            except struct.error:\n                raise ValueError(\n                    \"Couldn't pack array size - it is likely too long\"\n                    \" for the specified arrayLengthFormat\"\n                )\n            return packed_length + b\"\".join(element_encoder(ele) for ele in array)\n\n        if fixed_length is not None:\n            return array_encode_fixed_length\n        elif exhaust_buffer:\n            return array_encode_exhaust\n        else:\n            return array_encode_with_length\n\n    @classmethod\n    def make_object_encode(cls, sub_schema):\n        sub_encoders = {\n            key: StructCodec.make_encode(prop)\n            for key, prop in sub_schema[\"properties\"].items()\n        }\n        defaults = {\n            key: prop[\"default\"]\n            for key, prop in sub_schema[\"properties\"].items()\n            if \"default\" in prop\n        }\n\n        def object_encode(obj):\n            values = []\n            for key, sub_encoder in sub_encoders.items():\n                try:\n                    values.append(sub_encoder(obj[key]))\n                except KeyError:\n                    values.append(sub_encoder(defaults[key]))\n            return b\"\".join(values)\n\n        return object_encode\n\n    @classmethod\n    def make_object_or_null_encode(cls, sub_schema):\n        sub_encoders = {\n            key: StructCodec.make_encode(prop)\n            for key, prop in sub_schema[\"properties\"].items()\n        }\n        defaults = {\n            key: prop[\"default\"]\n            for key, prop in sub_schema[\"properties\"].items()\n            if \"default\" in prop\n        }\n\n        def object_encode(obj):\n            values = []\n            if obj is not None:\n                for key, sub_encoder in sub_encoders.items():\n                    try:\n                        values.append(sub_encoder(obj[key]))\n                    except KeyError:\n                        values.append(sub_encoder(defaults[key]))\n            return b\"\".join(values)\n\n        return object_encode\n\n    @classmethod\n    def make_string_encode(cls, sub_schema):\n        encoding = sub_schema.get(\"stringEncoding\", \"utf-8\")\n        return lambda string: struct.pack(\n            \"<\" + sub_schema[\"binaryFormat\"], string.encode(encoding)\n        )\n\n    @classmethod\n    def make_null_encode(cls, sub_schema):\n        return lambda _: struct.pack(sub_schema.get(\"binaryFormat\", \"0x\"))\n\n    @classmethod\n    def make_numeric_encode(cls, sub_schema):\n        return struct.Struct(\"<\" + sub_schema[\"binaryFormat\"]).pack\n\n    @classmethod\n    def modify_schema(cls, schema: Mapping) -> Mapping:\n        # This codec requires that additional properties are\n        # not allowed. Rather than get schema authors to repeat that everywhere\n        # we add it here, sadly we can't do this in the metaschema as \"default\" isn't\n        # used by the validator.\n        def enforce_fixed_properties(obj):\n            if type(obj) is list:\n                return [enforce_fixed_properties(j) for j in obj]\n            elif type(obj) is dict:\n                ret = {k: enforce_fixed_properties(v) for k, v in obj.items()}\n                if \"object\" in ret.get(\"type\", []):\n                    if ret.get(\"additional_properties\"):\n                        raise ValueError(\n                            \"Struct codec does not support additional_properties\"\n                        )\n                    # To prevent authors having to list required properties the default\n                    # is that all without a default are required.\n                    if \"required\" not in ret:\n                        ret[\"required\"] = [\n                            prop\n                            for prop, sub_schema in ret.get(\"properties\", {}).items()\n                            if \"default\" not in sub_schema\n                        ]\n                    ret[\"additionalProperties\"] = False\n                return ret\n            else:\n                return obj\n\n        schema = enforce_fixed_properties(schema)\n\n        # We also give the schema an explicit ordering\n        return StructCodec.order_by_index(schema)\n\n    def __init__(self, schema: Mapping[str, Any]) -> None:\n        try:\n            StructCodecSchemaValidator.check_schema(schema)\n        except jsonschema.exceptions.SchemaError as ve:\n            raise exceptions.MetadataSchemaValidationError(str(ve)) from ve\n\n        self.encode = StructCodec.make_encode(schema)\n        decoder = StructCodec.make_decode(schema)\n        self.decode = lambda buffer: decoder(iter(buffer))\n\n    def encode(self, obj: Any) -> bytes:\n        # Set by __init__\n        pass  # pragma: nocover\n\n    def decode(self, encoded: bytes) -> Any:\n        # Set by __init__\n        pass  # pragma: nocover\n\n    def numpy_dtype(self, schema):\n        # Mapping from struct format characters to NumPy dtype strings\n        # Note: All are little-endian as enforced by the struct codec\n        # This means they will be the standard size across platforms\n        FORMAT_TO_DTYPE = {\n            # Boolean\n            \"?\": \"?\",\n            # Integers\n            \"b\": \"i1\",\n            \"B\": \"u1\",\n            \"h\": \"i2\",\n            \"H\": \"u2\",\n            \"i\": \"i4\",\n            \"I\": \"u4\",\n            \"l\": \"i4\",\n            \"L\": \"u4\",\n            \"q\": \"i8\",\n            \"Q\": \"u8\",\n            # Floats\n            \"f\": \"f4\",\n            \"d\": \"f8\",\n            # Single character\n            \"c\": \"S1\",\n        }\n\n        def _convert_binary_format(fmt):\n            if fmt.endswith(\"x\"):\n                if fmt == \"x\":\n                    return \"V1\"\n                n = int(fmt[:-1])\n                return f\"V{n}\"\n\n            if fmt.endswith(\"s\"):\n                if fmt == \"s\":\n                    return \"S1\"\n                n = int(fmt[:-1])\n                return f\"S{n}\"\n\n            if fmt.endswith(\"p\"):\n                raise ValueError(\n                    \"Pascal string format ('p') is not supported by NumPy dtypes.\"\n                )\n\n            if fmt in FORMAT_TO_DTYPE:\n                return FORMAT_TO_DTYPE[fmt]\n\n            # As schemas are validated on __init__ this should never happen\n            raise ValueError(f\"Unsupported binary format: {fmt}\")  # pragma: no cover\n\n        def _process_schema_node(node):\n            # The null type with union can only occur at the top-level\n            if set(node.get(\"type\", [])) == {\"object\", \"null\"}:\n                raise ValueError(\"Top level object/null union not supported\")\n            elif node.get(\"type\") == \"object\":\n                fields = []\n                for prop_name, prop_schema in node.get(\"properties\", {}).items():\n                    fields.append((prop_name, _process_schema_node(prop_schema)))\n                return fields\n\n            elif node.get(\"type\") == \"array\":\n                if \"length\" not in node:\n                    raise ValueError(\n                        \"Only fixed-length arrays are supported for NumPy dtype\"\n                        \" conversion. Variable-length arrays cannot be represented\"\n                        \" in a structured dtype.\"\n                    )\n\n                length = node[\"length\"]\n                item_dtype = _process_schema_node(node[\"items\"])\n\n                # Return the item dtype with shape information\n                return (item_dtype, (length,))\n\n            elif node.get(\"type\") in (\"number\", \"integer\", \"boolean\", \"string\", \"null\"):\n                fmt = node[\"binaryFormat\"]\n                dtype_str = _convert_binary_format(fmt)\n\n                if dtype_str[0] not in \"VSU?\":\n                    # Don't add endianness to void, string, unicode or bool types\n                    dtype_str = \"<\" + dtype_str\n\n                return dtype_str\n\n        dtype_spec = _process_schema_node(schema)\n        return np.dtype(dtype_spec)\n\n\nregister_metadata_codec(StructCodec, \"struct\")\n\n\ndef validate_bytes(data: bytes | None) -> None:\n    if data is not None and not isinstance(data, bytes):\n        raise TypeError(\n            f\"If no encoding is set metadata should be bytes, found {type(data)}\"\n        )\n\n\nclass MetadataSchema:\n    \"\"\"\n    Class for validating, encoding and decoding metadata.\n\n    :param dict schema: A dict containing a valid JSONSchema object.\n    \"\"\"\n\n    def __init__(self, schema: Mapping[str, Any] | None) -> None:\n        self._schema = schema\n        self._unmodified_schema = schema\n        self._bypass_validation = False\n\n        if schema is None:\n            self._string = \"\"\n            self._validate_row = validate_bytes\n            self.encode_row = NOOPCodec({}).encode\n            self.decode_row = NOOPCodec({}).decode\n            self.empty_value = b\"\"\n            self.codec_instance = NOOPCodec({})\n        else:\n            try:\n                TSKITMetadataSchemaValidator.check_schema(schema)\n            except jsonschema.exceptions.SchemaError as ve:\n                raise exceptions.MetadataSchemaValidationError(str(ve)) from ve\n            try:\n                codec_cls = codec_registry[schema[\"codec\"]]\n            except KeyError:\n                raise exceptions.MetadataSchemaValidationError(\n                    f\"Unrecognised metadata codec '{schema['codec']}'. \"\n                    f\"Valid options are {str(list(codec_registry.keys()))}.\"\n                )\n            # Codecs can modify the schema, for example to set defaults as the validator\n            # does not.\n            self._schema = codec_cls.modify_schema(schema)\n            self.codec_instance = codec_cls(self._schema)\n            self._string = tskit.canonical_json(self._schema)\n            self._validate_row = getattr(\n                self.codec_instance,\n                \"validate_row\",\n                TSKITMetadataSchemaValidator(self._schema).validate,\n            )\n            self._bypass_validation = codec_cls.is_schema_trivial(schema)\n            self.encode_row = self.codec_instance.encode\n            self.decode_row = self.codec_instance.decode\n\n            # If None is allowed by the schema as the top-level type, it gets used even\n            # in the presence of default and required values.\n            if \"type\" in self._schema and \"null\" in self._schema[\"type\"]:\n                self.empty_value = None\n            else:\n                self.empty_value = {}\n\n    def __repr__(self) -> str:\n        return self._string\n\n    def __str__(self) -> str:\n        if isinstance(self._schema, collections.OrderedDict):\n            s = pprint.pformat(dict(self._schema))\n        else:\n            s = pprint.pformat(self._schema)\n        if \"\\n\" in s:\n            return f\"tskit.MetadataSchema(\\n{s}\\n)\"\n        else:\n            return f\"tskit.MetadataSchema({s})\"\n\n    def __eq__(self, other) -> bool:\n        return self._string == other._string\n\n    @property\n    def schema(self) -> Mapping[str, Any] | None:\n        # Return a copy to avoid unintentional mutation\n        return copy.deepcopy(self._unmodified_schema)\n\n    def asdict(self) -> Mapping[str, Any] | None:\n        \"\"\"\n        Returns a dict representation of this schema. One possible use of this is to\n        modify this dict and then pass it to the ``MetadataSchema`` constructor to create\n        a similar schema.\n        \"\"\"\n        return self.schema\n\n    def validate_and_encode_row(self, row: Any) -> bytes:\n        \"\"\"\n        Validate a row (dict) of metadata against this schema and return the encoded\n        representation (bytes) using the codec specified in the schema.\n        \"\"\"\n        # If the schema is permissive then validation can't fail\n        if not self._bypass_validation:\n            try:\n                self._validate_row(row)\n            except jsonschema.exceptions.ValidationError as ve:\n                raise exceptions.MetadataValidationError(str(ve)) from ve\n        return self.encode_row(row)\n\n    def decode_row(self, row: bytes) -> Any:\n        \"\"\"\n        Decode an encoded row (bytes) of metadata, using the codec specifed in the schema\n        and return a python dict. Note that no validation of the metadata against the\n        schema is performed.\n        \"\"\"\n        # Set by __init__\n        pass  # pragma: no cover\n\n    def encode_row(self, row: Any) -> bytes:\n        \"\"\"\n        Encode a row (dict) of metadata to its binary representation (bytes)\n        using the codec specified in the schema. Note that unlike\n        :meth:`validate_and_encode_row` no validation against the schema is performed.\n        This should only be used for performance if a validation check is not needed.\n        \"\"\"\n        # Set by __init__\n        pass  # pragma: no cover\n\n    def numpy_dtype(self) -> Any:\n        return self.codec_instance.numpy_dtype(self._schema)\n\n    def structured_array_from_buffer(self, buffer: Any) -> Any:\n        \"\"\"\n        Convert a buffer of metadata into a structured NumPy array.\n        \"\"\"\n        dtype = self.numpy_dtype()\n        return np.frombuffer(buffer, dtype=dtype)\n\n    @staticmethod\n    def permissive_json():\n        \"\"\"\n        The simplest, permissive JSON schema. Only specifies the JSON codec and has\n        no constraints on the properties.\n        \"\"\"\n        return MetadataSchema({\"codec\": \"json\"})\n\n    @staticmethod\n    def null():\n        \"\"\"\n        The null schema which defines no properties and results in raw bytes\n        being returned on accessing metadata column.\n        \"\"\"\n        return MetadataSchema(None)\n\n\n# Often many replicate tree sequences are processed with identical schemas, so cache them\n@functools.lru_cache(maxsize=128)\ndef parse_metadata_schema(encoded_schema: str) -> MetadataSchema:\n    \"\"\"\n    Create a schema object from its string encoding. The exact class returned is\n    determined by the ``encoding`` specification in the string.\n\n    :param str encoded_schema: The string encoded schema.\n    :return: A subclass of AbstractMetadataSchema.\n    \"\"\"\n    if encoded_schema == \"\":\n        return MetadataSchema.null()\n    else:\n        try:\n            decoded = json.loads(\n                encoded_schema, object_pairs_hook=collections.OrderedDict\n            )\n        except json.decoder.JSONDecodeError:\n            raise ValueError(f\"Metadata schema is not JSON, found {encoded_schema}\")\n        return MetadataSchema(decoded)\n\n\nclass _CachedMetadata:\n    \"\"\"\n    Descriptor for lazy decoding of metadata on attribute access.\n    \"\"\"\n\n    def __get__(self, row, owner):\n        if row._metadata_decoder is not None:\n            # Some classes that use this are frozen so we need to directly setattr.\n            __builtins__object__setattr__(\n                row, \"_metadata\", row._metadata_decoder(row._metadata)\n            )\n            # Decoder being None indicates that metadata is decoded\n            __builtins__object__setattr__(row, \"_metadata_decoder\", None)\n        return row._metadata\n\n    def __set__(self, row, value):\n        __builtins__object__setattr__(row, \"_metadata\", value)\n\n\ndef lazy_decode(own_init=False):\n    def _lazy_decode(cls):\n        \"\"\"\n        Modifies a dataclass such that it lazily decodes metadata, if it is encoded.\n        If the metadata passed to the constructor is encoded a `metadata_decoder`\n        parameter must be also be passed.\n        \"\"\"\n        if not own_init:\n            wrapped_init = cls.__init__\n\n            # Intercept the init to record the decoder\n            def new_init(self, *args, metadata_decoder=None, **kwargs):\n                __builtins__object__setattr__(\n                    self, \"_metadata_decoder\", metadata_decoder\n                )\n                wrapped_init(self, *args, **kwargs)\n\n            cls.__init__ = new_init\n\n        # Add a descriptor to the class to decode and cache metadata\n        cls.metadata = _CachedMetadata()\n\n        # Add slots needed to the class\n        slots = cls.__slots__\n        slots.extend([\"_metadata\", \"_metadata_decoder\"])\n        dict_ = dict()\n        sloted_members = dict()\n        for k, v in cls.__dict__.items():\n            if k not in slots:\n                dict_[k] = v\n            elif not isinstance(v, types.MemberDescriptorType):\n                sloted_members[k] = v\n        new_cls = type(cls.__name__, cls.__bases__, dict_)\n        for k, v in sloted_members.items():\n            setattr(new_cls, k, v)\n        return new_cls\n\n    return _lazy_decode\n\n\nclass MetadataProvider:\n    \"\"\"\n    Abstract superclass of container objects that provide metadata.\n    \"\"\"\n\n    def __init__(self, ll_object):\n        self._ll_object = ll_object\n\n    @property\n    def metadata_schema(self) -> MetadataSchema:\n        \"\"\"\n        The :class:`tskit.MetadataSchema` for this object.\n        \"\"\"\n        return parse_metadata_schema(self._ll_object.metadata_schema)\n\n    @metadata_schema.setter\n    def metadata_schema(self, schema: MetadataSchema) -> None:\n        # Check the schema is a valid schema instance by roundtripping it.\n        text_version = repr(schema)\n        parse_metadata_schema(text_version)\n        self._ll_object.metadata_schema = text_version\n\n    @property\n    def metadata(self) -> Any:\n        \"\"\"\n        The decoded metadata for this object.\n        \"\"\"\n        return self.metadata_schema.decode_row(self.metadata_bytes)\n\n    @metadata.setter\n    def metadata(self, metadata: bytes | dict | None) -> None:\n        encoded = self.metadata_schema.validate_and_encode_row(metadata)\n        self._ll_object.metadata = encoded\n\n    @property\n    def metadata_bytes(self) -> Any:\n        \"\"\"\n        The raw bytes of metadata for this TableCollection\n        \"\"\"\n        return self._ll_object.metadata\n\n    @property\n    def nbytes(self) -> int:\n        return len(self._ll_object.metadata) + len(self._ll_object.metadata_schema)\n\n    def assert_equals(self, other: MetadataProvider):\n        if self.metadata_schema != other.metadata_schema:\n            raise AssertionError(\n                f\"Metadata schemas differ: self={self.metadata_schema} \"\n                f\"other={other.metadata_schema}\"\n            )\n        if self.metadata != other.metadata:\n            raise AssertionError(\n                f\"Metadata differs: self={self.metadata} other={other.metadata}\"\n            )\n\n\nNOTSET = object()  # Sentinel for unset default values\n\n\nclass TableMetadataReader:\n    # Mixin for table classes that expose decoded metadata\n\n    @property\n    def metadata_schema(self) -> MetadataSchema:\n        \"\"\"\n        The :class:`tskit.MetadataSchema` for this table.\n        \"\"\"\n        # This isn't as inefficient as it looks because we're using an LRU cache on\n        # the parse_metadata_schema function. Thus, we're really only incurring the\n        # cost of creating the unicode string from the low-level schema and looking\n        # up the functools cache.\n        return parse_metadata_schema(self.ll_table.metadata_schema)\n\n    def metadata_vector(self, key, *, dtype=None, default_value=NOTSET):\n        \"\"\"\n        Returns a numpy array of metadata values obtained by extracting ``key``\n        from each metadata entry, and using ``default_value`` if the key is\n        not present. ``key`` may be a list, in which case nested values are returned.\n        For instance, ``key = [\"a\", \"x\"]`` will return an array of\n        ``row.metadata[\"a\"][\"x\"]`` values, iterated over rows in this table.\n\n        :param str key: The name, or a list of names, of metadata entries.\n        :param str dtype: The dtype of the result (can usually be omitted).\n        :param object default_value: The value to be inserted if the metadata key\n            is not present. Note that for numeric columns, a default value of None\n            will result in a non-numeric array. The default behaviour is to raise\n            ``KeyError`` on missing entries.\n        \"\"\"\n        from collections.abc import Mapping\n\n        if default_value == NOTSET:\n\n            def getter(d, k):\n                return d[k]\n\n        else:\n\n            def getter(d, k):\n                return (\n                    d.get(k, default_value) if isinstance(d, Mapping) else default_value\n                )\n\n        if isinstance(key, list):\n            out = np.array(\n                [functools.reduce(getter, key, row.metadata) for row in self],\n                dtype=dtype,\n            )\n        else:\n            out = np.array(\n                [getter(row.metadata, key) for row in self],\n                dtype=dtype,\n            )\n        return out\n\n    def _make_row(self, *args):\n        return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row)\n\n\nclass TableMetadataWriter(TableMetadataReader):\n    # Mixin for tables writing metadata\n\n    @TableMetadataReader.metadata_schema.setter\n    def metadata_schema(self, schema: MetadataSchema) -> None:\n        if not isinstance(schema, MetadataSchema):\n            raise TypeError(\n                \"Only instances of tskit.MetadataSchema can be assigned to \"\n                f\"metadata_schema, not {type(schema)}\"\n            )\n        self.ll_table.metadata_schema = repr(schema)\n\n    def packset_metadata(self, metadatas):\n        \"\"\"\n        Packs the specified list of metadata values and updates the ``metadata``\n        and ``metadata_offset`` columns. The length of the metadatas array\n        must be equal to the number of rows in the table.\n\n        :param list metadatas: A list of metadata bytes values.\n        \"\"\"\n        packed, offset = util.pack_bytes(metadatas)\n        data = self.asdict()\n        data[\"metadata\"] = packed\n        data[\"metadata_offset\"] = offset\n        self.set_columns(**data)\n\n    def drop_metadata(self, *, keep_schema=False):\n        \"\"\"\n        Drops all metadata in this table. By default, the schema is also cleared,\n        except if ``keep_schema`` is True.\n\n        :param bool keep_schema: True if the current schema should be kept intact.\n        \"\"\"\n        data = self.asdict()\n        data[\"metadata\"] = []\n        data[\"metadata_offset\"][:] = 0\n        self.set_columns(**data)\n        if not keep_schema:\n            self.metadata_schema = MetadataSchema.null()\n"
  },
  {
    "path": "python/tskit/metadata_schema.schema.json",
    "content": "{\n  \"$id\": \"http://json-schema.org/draft-07/schema#\",\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"codec\": {\"type\": \"string\"},\n  \"default\": true,\n  \"definitions\": {\n    \"nonNegativeInteger\": {\"minimum\": 0, \"type\": \"integer\"},\n    \"nonNegativeIntegerDefault0\": {\n      \"allOf\": [{\"$ref\": \"#/definitions/nonNegativeInteger\"}, {\"default\": 0}]\n    },\n    \"root\": {\n      \"$id\": \"http://json-schema.org/draft-07/schema#\",\n      \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n      \"default\": true,\n      \"definitions\": {\n        \"nonNegativeInteger\": {\"minimum\": 0, \"type\": \"integer\"},\n        \"nonNegativeIntegerDefault0\": {\n          \"allOf\": [\n            {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n            {\"default\": 0},\n          ]\n        },\n        \"schemaArray\": {\n          \"items\": {\"$ref\": \"#/definitions/root\"},\n          \"minItems\": 1,\n          \"type\": \"array\",\n        },\n        \"simpleTypes\": {\n          \"enum\": [\"array\", \"boolean\", \"integer\", \"null\", \"number\", \"object\", \"string\",]\n        },\n        \"stringArray\": {\n          \"default\": [],\n          \"items\": {\"type\": \"string\"},\n          \"type\": \"array\",\n          \"uniqueItems\": true,\n        },\n      },\n      \"properties\": {\n        \"$comment\": {\"type\": \"string\"},\n        \"$id\": {\"format\": \"uri-reference\", \"type\": \"string\"},\n        \"$ref\": {\"format\": \"uri-reference\", \"type\": \"string\"},\n        \"$schema\": {\"format\": \"uri\", \"type\": \"string\"},\n        \"additionalItems\": {\"$ref\": \"#/definitions/root\"},\n        \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n        \"allOf\": {\"$ref\": \"#/definitions/schemaArray\"},\n        \"anyOf\": {\"$ref\": \"#/definitions/schemaArray\"},\n        \"const\": true,\n        \"contains\": {\"$ref\": \"#/definitions/root\"},\n        \"contentEncoding\": {\"type\": \"string\"},\n        \"contentMediaType\": {\"type\": \"string\"},\n        \"default\": true,\n        \"definitions\": {\n          \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n          \"default\": {},\n          \"type\": \"object\",\n        },\n        \"dependencies\": {\n          \"additionalProperties\": {\n            \"anyOf\": [{\"$ref\": \"#/definitions/root\"},\n              {\"$ref\": \"#/definitions/stringArray\"},\n            ]\n          },\n          \"type\": \"object\",\n        },\n        \"description\": {\"type\": \"string\"},\n        \"else\": {\"$ref\": \"#/definitions/root\"},\n        \"enum\": {\"items\": true, \"type\": \"array\"},\n        \"examples\": {\"items\": true, \"type\": \"array\"},\n        \"exclusiveMaximum\": {\"type\": \"number\"},\n        \"exclusiveMinimum\": {\"type\": \"number\"},\n        \"format\": {\"type\": \"string\"},\n        \"if\": {\"$ref\": \"#/definitions/root\"},\n        \"items\": {\n          \"anyOf\": [\n            {\"$ref\": \"#/definitions/root\"},\n            {\"$ref\": \"#/definitions/schemaArray\"},\n          ],\n          \"default\": true,\n        },\n        \"maxItems\": {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n        \"maxLength\": {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n        \"maxProperties\": {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n        \"maximum\": {\"type\": \"number\"},\n        \"minItems\": {\"$ref\": \"#/definitions/nonNegativeIntegerDefault0\"},\n        \"minLength\": {\"$ref\": \"#/definitions/nonNegativeIntegerDefault0\"},\n        \"minProperties\": {\"$ref\": \"#/definitions/nonNegativeIntegerDefault0\"},\n        \"minimum\": {\"type\": \"number\"},\n        \"multipleOf\": {\"exclusiveMinimum\": 0, \"type\": \"number\"},\n        \"not\": {\"$ref\": \"#/definitions/root\"},\n        \"oneOf\": {\"$ref\": \"#/definitions/schemaArray\"},\n        \"pattern\": {\"format\": \"regex\", \"type\": \"string\"},\n        \"patternProperties\": {\n          \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n          \"default\": {},\n          \"propertyNames\": {\"format\": \"regex\"},\n          \"type\": \"object\",\n        },\n        \"properties\": {\n          \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n          \"default\": {},\n          \"type\": \"object\",\n        },\n        \"propertyNames\": {\"$ref\": \"#/definitions/root\"},\n        \"readOnly\": {\"default\": false, \"type\": \"boolean\"},\n        \"required\": {\"$ref\": \"#/definitions/stringArray\"},\n        \"then\": {\"$ref\": \"#/definitions/root\"},\n        \"title\": {\"type\": \"string\"},\n        \"type\": {\"enum\": [\"object\"]},\n        \"uniqueItems\": {\"default\": false, \"type\": \"boolean\"},\n      },\n      \"title\": \"Core schema meta-schema\",\n      \"type\": [\"object\", \"boolean\"],\n    },\n    \"schemaArray\": {\n      \"items\": {\"$ref\": \"#/definitions/root\"},\n      \"minItems\": 1,\n      \"type\": \"array\",\n    },\n    \"simpleTypes\": {\n      \"enum\": [\"array\", \"boolean\", \"integer\", \"null\", \"number\", \"object\", \"string\",]\n    },\n    \"stringArray\": {\n      \"default\": [],\n      \"items\": {\"type\": \"string\"},\n      \"type\": \"array\",\n      \"uniqueItems\": true,\n    },\n  },\n  \"properties\": {\n    \"$comment\": {\"type\": \"string\"},\n    \"$id\": {\"format\": \"uri-reference\", \"type\": \"string\"},\n    \"$ref\": {\"format\": \"uri-reference\", \"type\": \"string\"},\n    \"$schema\": {\"format\": \"uri\", \"type\": \"string\"},\n    \"additionalItems\": {\"$ref\": \"#/definitions/root\"},\n    \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n    \"allOf\": {\"$ref\": \"#/definitions/schemaArray\"},\n    \"anyOf\": {\"$ref\": \"#/definitions/schemaArray\"},\n    \"const\": true,\n    \"contains\": {\"$ref\": \"#/definitions/root\"},\n    \"contentEncoding\": {\"type\": \"string\"},\n    \"contentMediaType\": {\"type\": \"string\"},\n    \"default\": true,\n    \"definitions\": {\n      \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n      \"default\": {},\n      \"type\": \"object\",\n    },\n    \"dependencies\": {\n      \"additionalProperties\": {\n        \"anyOf\": [\n          {\"$ref\": \"#/definitions/root\"},\n          {\"$ref\": \"#/definitions/stringArray\"},\n        ]\n      },\n      \"type\": \"object\",\n    },\n    \"description\": {\"type\": \"string\"},\n    \"else\": {\"$ref\": \"#/definitions/root\"},\n    \"enum\": {\"items\": true, \"type\": \"array\"},\n    \"examples\": {\"items\": true, \"type\": \"array\"},\n    \"exclusiveMaximum\": {\"type\": \"number\"},\n    \"exclusiveMinimum\": {\"type\": \"number\"},\n    \"format\": {\"type\": \"string\"},\n    \"if\": {\"$ref\": \"#/definitions/root\"},\n    \"items\": {\n      \"anyOf\": [\n        {\"$ref\": \"#/definitions/root\"},\n        {\"$ref\": \"#/definitions/schemaArray\"},\n      ],\n      \"default\": true,\n    },\n    \"maxItems\": {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n    \"maxLength\": {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n    \"maxProperties\": {\"$ref\": \"#/definitions/nonNegativeInteger\"},\n    \"maximum\": {\"type\": \"number\"},\n    \"minItems\": {\"$ref\": \"#/definitions/nonNegativeIntegerDefault0\"},\n    \"minLength\": {\"$ref\": \"#/definitions/nonNegativeIntegerDefault0\"},\n    \"minProperties\": {\"$ref\": \"#/definitions/nonNegativeIntegerDefault0\"},\n    \"minimum\": {\"type\": \"number\"},\n    \"multipleOf\": {\"exclusiveMinimum\": 0, \"type\": \"number\"},\n    \"not\": {\"$ref\": \"#/definitions/root\"},\n    \"oneOf\": {\"$ref\": \"#/definitions/schemaArray\"},\n    \"pattern\": {\"format\": \"regex\", \"type\": \"string\"},\n    \"patternProperties\": {\n      \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n      \"default\": {},\n      \"propertyNames\": {\"format\": \"regex\"},\n      \"type\": \"object\",\n    },\n    \"properties\": {\n      \"additionalProperties\": {\"$ref\": \"#/definitions/root\"},\n      \"default\": {},\n      \"type\": {\"enum\": [\"object\", [\"object\", \"null\"]]},\n    },\n    \"propertyNames\": {\"$ref\": \"#/definitions/root\"},\n    \"readOnly\": {\"default\": false, \"type\": \"boolean\"},\n    \"required\": {\"$ref\": \"#/definitions/stringArray\"},\n    \"then\": {\"$ref\": \"#/definitions/root\"},\n    \"title\": {\"type\": \"string\"},\n    \"type\": {\n      \"anyOf\": [\n        {\"$ref\": \"#/definitions/simpleTypes\"},\n        {\n          \"items\": {\"$ref\": \"#/definitions/simpleTypes\"},\n          \"minItems\": 1,\n          \"type\": \"array\",\n          \"uniqueItems\": true,\n        },\n      ]\n    },\n    \"uniqueItems\": {\"default\": false, \"type\": \"boolean\"},\n  },\n  \"required\": [\"codec\"],\n  \"title\": \"Core schema meta-schema\",\n  \"type\": [\"object\", \"boolean\"],\n}\n"
  },
  {
    "path": "python/tskit/provenance.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (c) 2016-2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nCommon provenance methods used to determine the state and versions\nof various dependencies and the OS.\n\"\"\"\n\nimport json\nimport os.path\nimport platform\nimport sys\nimport time\n\ntry:\n    import resource\nexcept ImportError:\n    resource = None  # resource.getrusage absent on windows\n\nimport jsonschema\n\nimport _tskit\nimport tskit.exceptions as exceptions\n\nfrom . import _version\n\n__version__ = _version.tskit_version\n\n\n# NOTE: the APIs here are all preliminary. We should have a class that encapsulates\n# all of the required functionality, including parsing and printing out provenance\n# records. This will replace the current functions.\n\n\ndef get_environment(extra_libs=None, include_tskit=True):\n    \"\"\"\n    Returns a dictionary describing the environment in which tskit\n    is currently running.\n\n    This API is tentative and will change in the future when a more\n    comprehensive provenance API is implemented.\n    \"\"\"\n    env = {\n        \"os\": {\n            \"system\": platform.system(),\n            \"node\": platform.node(),\n            \"release\": platform.release(),\n            \"version\": platform.version(),\n            \"machine\": platform.machine(),\n        },\n        \"python\": {\n            \"implementation\": platform.python_implementation(),\n            \"version\": platform.python_version(),\n        },\n    }\n    libs = {\"kastore\": {\"version\": \".\".join(map(str, _tskit.get_kastore_version()))}}\n    if include_tskit:\n        libs[\"tskit\"] = {\"version\": __version__}\n    if extra_libs is not None:\n        libs.update(extra_libs)\n    env[\"libraries\"] = libs\n    return env\n\n\ndef get_resources(start_time):\n    # Returns a dict describing the resources used by the current process\n    times = os.times()\n    ret = {\n        \"elapsed_time\": time.time() - start_time,\n        \"user_time\": times.user + times.children_user,\n        \"sys_time\": times.system + times.children_system,\n    }\n    if resource is not None:\n        # Don't report max memory on Windows, we would need an external dep like psutil\n        ret[\"max_memory\"] = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n        if sys.platform != \"darwin\":\n            ret[\"max_memory\"] *= 1024  # Linux, freeBSD et al reports in KiB, not bytes\n\n    return ret\n\n\ndef get_provenance_dict(parameters=None):\n    \"\"\"\n    Returns a dictionary encoding an execution of tskit conforming to the\n    provenance schema.\n    \"\"\"\n    document = {\n        \"schema_version\": \"1.0.0\",\n        \"software\": {\"name\": \"tskit\", \"version\": __version__},\n        \"parameters\": parameters,\n        \"environment\": get_environment(include_tskit=False),\n    }\n    return document\n\n\n# Cache the schema\n_schema = None\n\n\ndef get_schema():\n    \"\"\"\n    Returns the tskit provenance :ref:`provenance schema <sec_provenance>` as\n    a dict.\n\n    :return: The provenance schema.\n    :rtype: dict\n    \"\"\"\n    global _schema\n    if _schema is None:\n        base = os.path.dirname(__file__)\n        schema_file = os.path.join(base, \"provenance.schema.json\")\n        with open(schema_file) as f:\n            _schema = json.load(f)\n    # Return a copy to avoid issues with modifying the cached schema\n    return dict(_schema)\n\n\ndef validate_provenance(provenance):\n    \"\"\"\n    Validates the specified dict-like object against the tskit\n    :ref:`provenance schema <sec_provenance>`. If the input does\n    not represent a valid instance of the schema an exception is\n    raised.\n\n    :param dict provenance: The dictionary representing a JSON document\n        to be validated against the schema.\n    :raises ProvenanceValidationError: if the schema is not valid.\n    \"\"\"\n    schema = get_schema()\n    try:\n        jsonschema.validate(provenance, schema)\n    except jsonschema.exceptions.ValidationError as ve:\n        raise exceptions.ProvenanceValidationError from ve\n"
  },
  {
    "path": "python/tskit/provenance.schema.json",
    "content": "{\n  \"schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"version\": \"1.1.0\",\n  \"title\": \"tskit provenance\",\n  \"description\": \"The combination of software, parameters and environment that produced a tree sequence\",\n  \"type\": \"object\",\n  \"required\": [\"schema_version\", \"software\", \"parameters\", \"environment\"],\n  \"properties\": {\n    \"schema_version\": {\n      \"description\": \"The version of this schema used.\",\n      \"type\": \"string\",\n      \"minLength\": 1\n    },\n    \"software\": {\n      \"description\": \"The primary software used to produce the tree sequence.\",\n      \"type\": \"object\",\n      \"required\": [\"name\", \"version\"],\n      \"properties\": {\n        \"name\": {\n          \"description\": \"The name of the primary software.\",\n          \"type\": \"string\",\n          \"minLength\": 1\n        },\n        \"version\": {\n          \"description\": \"The version of primary software.\",\n          \"type\": \"string\",\n          \"minLength\": 1\n        }\n      }\n    },\n    \"parameters\": {\n      \"description\": \"The parameters used to produce the tree sequence.\",\n      \"type\": \"object\"\n    },\n    \"environment\": {\n      \"description\": \"The computational environment within which the primary software ran.\",\n      \"type\": \"object\",\n      \"properties\": {\n        \"os\": {\n          \"description\": \"Operating system.\",\n          \"type\": \"object\"\n        },\n        \"libraries\": {\n          \"description\": \"Details of libraries the primary software linked against.\",\n          \"type\": \"object\"\n        }\n      }\n    },\n    \"resources\": {\n      \"description\": \"Resources used by this operation.\",\n      \"type\": \"object\",\n      \"properties\": {\n        \"elapsed_time\": {\n          \"description\": \"Wall clock time in used in seconds.\",\n          \"type\": \"number\"\n        },\n        \"user_time\": {\n          \"description\": \"User time used in seconds.\",\n          \"type\": \"number\"\n        },\n        \"sys_time\": {\n          \"description\": \"System time used in seconds.\",\n          \"type\": \"number\"\n        },\n        \"max_memory\": {\n          \"description\": \"Maximum memory used in bytes.\",\n          \"type\": \"number\"\n        }\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "python/tskit/stats.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nModule responsible for computing various statistics on tree sequences.\n\"\"\"\n\nimport sys\nimport threading\n\nimport numpy as np\n\nimport _tskit\n\n\nclass LdCalculator:\n    \"\"\"\n    Class for calculating `linkage disequilibrium\n    <https://en.wikipedia.org/wiki/Linkage_disequilibrium>`_ coefficients\n    between pairs of sites in a :class:`TreeSequence`.\n\n    .. note:: This interface is deprecated and a replacement is planned.\n        Please see https://github.com/tskit-dev/tskit/issues/1900 for\n        more information. Note also that the current implementation is\n        quite limited (see warning below).\n\n    .. warning:: This class does not currently support sites that have more than one\n        mutation. Using it on such a tree sequence will raise a LibraryError with\n        an \"Only infinite sites mutations supported\" message.\n\n        Silent mutations are also not supported and will result in a LibraryError.\n\n    :param TreeSequence tree_sequence: The tree sequence of interest.\n    \"\"\"\n\n    def __init__(self, tree_sequence):\n        self._tree_sequence = tree_sequence\n        self._ll_ld_calculator = _tskit.LdCalculator(\n            tree_sequence.get_ll_tree_sequence()\n        )\n        # To protect low-level C code, only one method may execute on the\n        # low-level objects at one time.\n        self._instance_lock = threading.Lock()\n\n    def get_r2(self, a, b):\n        # Deprecated alias for r2(a, b)\n        return self.r2(a, b)\n\n    def r2(self, a, b):\n        \"\"\"\n        Returns the value of the :math:`r^2` statistic between the pair of\n        sites at the specified indexes. This method is *not* an efficient\n        method for computing large numbers of pairwise LD values; please use either\n        :meth:`.r2_array` or :meth:`.r2_matrix` for this purpose.\n\n        :param int a: The index of the first site.\n        :param int b: The index of the second site.\n        :return: The value of :math:`r^2` between the sites at indexes\n            ``a`` and ``b``.\n        :rtype: float\n        \"\"\"\n        with self._instance_lock:\n            return self._ll_ld_calculator.get_r2(a, b)\n\n    def get_r2_array(self, a, direction=1, max_mutations=None, max_distance=None):\n        # Deprecated alias for r2_array\n        return self.r2_array(\n            a,\n            direction=direction,\n            max_mutations=max_mutations,\n            max_distance=max_distance,\n        )\n\n    def r2_array(\n        self, a, direction=1, max_mutations=None, max_distance=None, max_sites=None\n    ):\n        \"\"\"\n        Returns the value of the :math:`r^2` statistic between the focal\n        site at index :math:`a` and a set of other sites. The method\n        operates by starting at the focal site and iterating over adjacent\n        sites (in either the forward or backwards direction) until either a\n        maximum number of other sites have been considered (using the\n        ``max_sites`` parameter), a maximum distance in sequence\n        coordinates has been reached (using the ``max_distance`` parameter) or\n        the start/end of the sequence has been reached. For every site\n        :math:`b` considered, we then insert the value of :math:`r^2` between\n        :math:`a` and :math:`b` at the corresponding index in an array, and\n        return the entire array. If the returned array is :math:`x` and\n        ``direction`` is :data:`tskit.FORWARD` then :math:`x[0]` is the\n        value of the statistic for :math:`a` and :math:`a + 1`, :math:`x[1]`\n        the value for :math:`a` and :math:`a + 2`, etc. Similarly, if\n        ``direction`` is :data:`tskit.REVERSE` then :math:`x[0]` is the\n        value of the statistic for :math:`a` and :math:`a - 1`, :math:`x[1]`\n        the value for :math:`a` and :math:`a - 2`, etc.\n\n        :param int a: The index of the focal sites.\n        :param int direction: The direction in which to travel when\n            examining other sites. Must be either\n            :data:`tskit.FORWARD` or :data:`tskit.REVERSE`. Defaults\n            to :data:`tskit.FORWARD`.\n        :param int max_sites: The maximum number of sites to return\n            :math:`r^2` values for. Defaults to as many sites as\n            possible.\n        :param int max_mutations: Deprecated synonym for max_sites.\n        :param float max_distance: The maximum absolute distance between\n            the focal sites and those for which :math:`r^2` values\n            are returned.\n        :return: An array of double precision floating point values\n            representing the :math:`r^2` values for sites in the\n            specified direction.\n        :rtype: numpy.ndarray\n        \"\"\"\n        if max_mutations is not None and max_sites is not None:\n            raise ValueError(\"max_mutations is a deprecated synonym for max_sites\")\n        if max_mutations is not None:\n            max_sites = max_mutations\n        max_sites = -1 if max_sites is None else max_sites\n        if max_distance is None:\n            max_distance = sys.float_info.max\n        with self._instance_lock:\n            return self._ll_ld_calculator.get_r2_array(\n                a,\n                direction=direction,\n                max_sites=max_sites,\n                max_distance=max_distance,\n            )\n\n    def get_r2_matrix(self):\n        # Deprecated alias for r2_matrix\n        return self.r2_matrix()\n\n    def r2_matrix(self):\n        \"\"\"\n        Returns the complete :math:`m \\\\times m` matrix of pairwise\n        :math:`r^2` values in a tree sequence with :math:`m` sites.\n\n        :return: An 2 dimensional square array of double precision\n            floating point values representing the :math:`r^2` values for\n            all pairs of sites.\n        :rtype: numpy.ndarray\n        \"\"\"\n        m = self._tree_sequence.num_sites\n        A = np.ones((m, m), dtype=float)\n        for j in range(m - 1):\n            a = self.get_r2_array(j)\n            A[j, j + 1 :] = a\n            A[j + 1 :, j] = a\n        return A\n"
  },
  {
    "path": "python/tskit/tables.py",
    "content": "#\n# MIT License\n#\n# Copyright (c) 2018-2024 Tskit Developers\n# Copyright (c) 2017 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nTree sequence IO via the tables API.\n\"\"\"\n\nimport collections\nimport dataclasses\nimport datetime\nimport json\nimport numbers\nimport operator\nimport warnings\nfrom dataclasses import dataclass\n\nimport numpy as np\n\nimport _tskit\nimport tskit\nimport tskit.metadata as metadata\nimport tskit.provenance as provenance\nimport tskit.util as util\nfrom tskit import UNKNOWN_TIME\nfrom tskit.exceptions import ImmutableTableError\n\ndataclass_options = {\"frozen\": True}\n\n\ndef _ragged_selection_indices(indexed_offsets, lengths64):\n    \"\"\"\n    Return absolute indices into a ragged column for the provided row selection.\n    \"\"\"\n    total = int(lengths64.sum())\n    if total == 0:\n        return np.empty(0, dtype=np.int64)\n    row_ids = np.repeat(np.arange(lengths64.size, dtype=np.int64), lengths64)\n    start_offsets = indexed_offsets.astype(np.int64, copy=False)[row_ids]\n    within_row = np.arange(total, dtype=np.int64) - np.repeat(\n        np.cumsum(lengths64, dtype=np.int64) - lengths64, lengths64\n    )\n    return start_offsets + within_row\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass IndividualTableRow(util.Dataclass):\n    \"\"\"\n    A row in an :class:`IndividualTable`.\n    \"\"\"\n\n    __slots__ = [\"flags\", \"location\", \"parents\", \"metadata\"]\n    flags: int\n    \"\"\"\n    See :attr:`Individual.flags`\n    \"\"\"\n    location: np.ndarray\n    \"\"\"\n    See :attr:`Individual.location`\n    \"\"\"\n    parents: np.ndarray\n    \"\"\"\n    See :attr:`Individual.parents`\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Individual.metadata`\n    \"\"\"\n\n    # We need a custom eq for the numpy arrays\n    def __eq__(self, other):\n        return (\n            isinstance(other, IndividualTableRow)\n            and self.flags == other.flags\n            and np.array_equal(self.location, other.location)\n            and np.array_equal(self.parents, other.parents)\n            and self.metadata == other.metadata\n        )\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass NodeTableRow(util.Dataclass):\n    \"\"\"\n    A row in a :class:`NodeTable`.\n    \"\"\"\n\n    __slots__ = [\"flags\", \"time\", \"population\", \"individual\", \"metadata\"]\n    flags: int\n    \"\"\"\n    See :attr:`Node.flags`\n    \"\"\"\n    time: float\n    \"\"\"\n    See :attr:`Node.time`\n    \"\"\"\n    population: int\n    \"\"\"\n    See :attr:`Node.population`\n    \"\"\"\n    individual: int\n    \"\"\"\n    See :attr:`Node.individual`\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Node.metadata`\n    \"\"\"\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass EdgeTableRow(util.Dataclass):\n    \"\"\"\n    A row in an :class:`EdgeTable`.\n    \"\"\"\n\n    __slots__ = [\"left\", \"right\", \"parent\", \"child\", \"metadata\"]\n    left: float\n    \"\"\"\n    See :attr:`Edge.left`\n    \"\"\"\n    right: float\n    \"\"\"\n    See :attr:`Edge.right`\n    \"\"\"\n    parent: int\n    \"\"\"\n    See :attr:`Edge.parent`\n    \"\"\"\n    child: int\n    \"\"\"\n    See :attr:`Edge.child`\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Edge.metadata`\n    \"\"\"\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass MigrationTableRow(util.Dataclass):\n    \"\"\"\n    A row in a :class:`MigrationTable`.\n    \"\"\"\n\n    __slots__ = [\"left\", \"right\", \"node\", \"source\", \"dest\", \"time\", \"metadata\"]\n    left: float\n    \"\"\"\n    See :attr:`Migration.left`\n    \"\"\"\n    right: float\n    \"\"\"\n    See :attr:`Migration.right`\n    \"\"\"\n    node: int\n    \"\"\"\n    See :attr:`Migration.node`\n    \"\"\"\n    source: int\n    \"\"\"\n    See :attr:`Migration.source`\n    \"\"\"\n    dest: int\n    \"\"\"\n    See :attr:`Migration.dest`\n    \"\"\"\n    time: float\n    \"\"\"\n    See :attr:`Migration.time`\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Migration.metadata`\n    \"\"\"\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass SiteTableRow(util.Dataclass):\n    \"\"\"\n    A row in a :class:`SiteTable`.\n    \"\"\"\n\n    __slots__ = [\"position\", \"ancestral_state\", \"metadata\"]\n    position: float\n    \"\"\"\n    See :attr:`Site.position`\n    \"\"\"\n    ancestral_state: str\n    \"\"\"\n    See :attr:`Site.ancestral_state`\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Site.metadata`\n    \"\"\"\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass MutationTableRow(util.Dataclass):\n    \"\"\"\n    A row in a :class:`MutationTable`.\n    \"\"\"\n\n    __slots__ = [\"site\", \"node\", \"derived_state\", \"parent\", \"metadata\", \"time\"]\n    site: int\n    \"\"\"\n    See :attr:`Mutation.site`\n    \"\"\"\n    node: int\n    \"\"\"\n    See :attr:`Mutation.node`\n    \"\"\"\n    derived_state: str\n    \"\"\"\n    See :attr:`Mutation.derived_state`\n    \"\"\"\n    parent: int\n    \"\"\"\n    See :attr:`Mutation.parent`\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Mutation.metadata`\n    \"\"\"\n    time: float\n    \"\"\"\n    See :attr:`Mutation.time`\n    \"\"\"\n\n    # We need a custom eq here as we have unknown times (nans) to check\n    def __eq__(self, other):\n        return (\n            isinstance(other, MutationTableRow)\n            and self.site == other.site\n            and self.node == other.node\n            and self.derived_state == other.derived_state\n            and self.parent == other.parent\n            and self.metadata == other.metadata\n            and (\n                self.time == other.time\n                or (util.is_unknown_time(self.time) and util.is_unknown_time(other.time))\n            )\n        )\n\n\n@metadata.lazy_decode()\n@dataclass(**dataclass_options)\nclass PopulationTableRow(util.Dataclass):\n    \"\"\"\n    A row in a :class:`PopulationTable`.\n    \"\"\"\n\n    __slots__ = [\"metadata\"]\n    metadata: bytes | dict | None\n    \"\"\"\n    See :attr:`Population.metadata`\n    \"\"\"\n\n\n@dataclass(**dataclass_options)\nclass ProvenanceTableRow(util.Dataclass):\n    \"\"\"\n    A row in a :class:`ProvenanceTable`.\n    \"\"\"\n\n    __slots__ = [\"timestamp\", \"record\"]\n    timestamp: str\n    \"\"\"\n    See :attr:`Provenance.timestamp`\n    \"\"\"\n    record: str\n    \"\"\"\n    See :attr:`Provenance.record`\n    \"\"\"\n\n\n@dataclass(**dataclass_options)\nclass TableCollectionIndexes(util.Dataclass):\n    \"\"\"\n    A class encapsulating the indexes of a :class:`TableCollection`\n    \"\"\"\n\n    edge_insertion_order: np.ndarray = None\n    edge_removal_order: np.ndarray = None\n\n    def asdict(self):\n        return {k: v for k, v in dataclasses.asdict(self).items() if v is not None}\n\n    @property\n    def nbytes(self) -> int:\n        \"\"\"\n        The number of bytes taken by the indexes\n        \"\"\"\n        total = 0\n        if self.edge_removal_order is not None:\n            total += self.edge_removal_order.nbytes\n        if self.edge_insertion_order is not None:\n            total += self.edge_insertion_order.nbytes\n        return total\n\n\ndef keep_with_offset(keep, data, offset):\n    \"\"\"\n    Used when filtering _offset columns in tables\n    \"\"\"\n    # We need the astype here for 32 bit machines\n    lens = np.diff(offset).astype(np.int32)\n    return (\n        data[np.repeat(keep, lens)],\n        np.concatenate(\n            [\n                np.array([0], dtype=offset.dtype),\n                np.cumsum(lens[keep], dtype=offset.dtype),\n            ]\n        ),\n    )\n\n\nclass BaseTable:\n    # Base class for all tables, with only immutable methods,\n    # or those that don't use separate low-level table implementations.\n\n    # The list of columns in the table. Must be set by subclasses.\n    column_names = []\n    mutable = None\n\n    def _check_required_args(self, **kwargs):\n        for k, v in kwargs.items():\n            if v is None:\n                raise TypeError(f\"{k} is required\")\n\n    @property\n    def nbytes(self) -> int:\n        \"\"\"\n        Returns the total number of bytes required to store the data\n        in this table. Note that this may not be equal to\n        the actual memory footprint.\n        \"\"\"\n        # It's not ideal that we run asdict() here to do this as we're\n        # currently creating copies of the column arrays, so it would\n        # be more efficient to have dedicated low-level methods. However,\n        # if we do have read-only views on the underlying memory for the\n        # column arrays then this will be a perfectly good way of\n        # computing the nbytes values and the overhead minimal.\n        d = self.asdict()\n        nbytes = 0\n        # Some tables don't have a metadata_schema\n        metadata_schema = d.pop(\"metadata_schema\", None)\n        if metadata_schema is not None:\n            nbytes += len(metadata_schema.encode())\n        nbytes += sum(col.nbytes for col in d.values())\n        return nbytes\n\n    def _equals_internal(self, other, ignore_metadata=False, *, ignore_timestamps=False):\n\n        if self is other:\n            return True\n\n        if not isinstance(other, BaseTable) or self.table_name != other.table_name:\n            return False\n\n        # Can only use mutable fast path if both tables are mutable\n        base = self\n        if self.mutable and not other.mutable:\n            base = other\n            other = self\n        return base._fast_equals(\n            other, ignore_metadata=ignore_metadata, ignore_timestamps=ignore_timestamps\n        )\n\n    def equals(self, other, ignore_metadata=False):\n        \"\"\"\n        Returns True if  `self` and `other` are equal. By default, two tables\n        are considered equal if their columns and metadata schemas are\n        byte-for-byte identical.\n\n        :param other: Another table instance\n        :param bool ignore_metadata: If True exclude metadata and metadata schemas\n            from the comparison.\n        :return: True if other is equal to this table; False otherwise.\n        :rtype: bool\n        \"\"\"\n        return self._equals_internal(\n            other, ignore_metadata=ignore_metadata, ignore_timestamps=False\n        )\n\n    def _assert_equals_internal(\n        self, other, *, ignore_metadata=False, ignore_timestamps=False\n    ):\n        if self is other:\n            return\n        if not isinstance(other, BaseTable) or self.table_name != other.table_name:\n            raise AssertionError(f\"Types differ: self={type(self)} other={type(other)}\")\n\n        if not self._equals_internal(\n            other, ignore_metadata=ignore_metadata, ignore_timestamps=ignore_timestamps\n        ):\n            self._assert_equals(\n                other,\n                ignore_metadata=ignore_metadata,\n                ignore_timestamps=ignore_timestamps,\n            )\n\n    def assert_equals(self, other, *, ignore_metadata=False):\n        \"\"\"\n        Raise an AssertionError for the first found difference between\n        this and another table of the same type.\n\n        :param other: Another table instance\n        :param bool ignore_metadata: If True exclude metadata and metadata schemas\n            from the comparison.\n        \"\"\"\n        self._assert_equals_internal(\n            other, ignore_metadata=ignore_metadata, ignore_timestamps=False\n        )\n\n    def _assert_equals(self, other, *, ignore_metadata=False, ignore_timestamps=False):\n        if (\n            not ignore_metadata\n            and hasattr(self, \"metadata_schema\")\n            and hasattr(other, \"metadata_schema\")\n            and self.metadata_schema != other.metadata_schema\n        ):\n            raise AssertionError(\n                f\"{type(self).__name__} metadata schemas differ: \"\n                f\"self={self.metadata_schema} \"\n                f\"other={other.metadata_schema}\"\n            )\n\n        for n, (row_self, row_other) in enumerate(zip(self, other)):\n            if ignore_metadata:\n                row_self = dataclasses.replace(row_self, metadata=None)\n                row_other = dataclasses.replace(row_other, metadata=None)\n            if ignore_timestamps:\n                row_self = dataclasses.replace(row_self, timestamp=None)\n                row_other = dataclasses.replace(row_other, timestamp=None)\n            if row_self != row_other:\n                self_dict = dataclasses.asdict(self[n])\n                other_dict = dataclasses.asdict(other[n])\n                diff_string = []\n                for col in self_dict.keys():\n                    if ignore_timestamps and col == \"timestamp\":\n                        continue\n                    if isinstance(self_dict[col], np.ndarray):\n                        equal = np.array_equal(self_dict[col], other_dict[col])\n                    else:\n                        equal = self_dict[col] == other_dict[col]\n                    if not equal:\n                        diff_string.append(\n                            f\"self.{col}={self_dict[col]} other.{col}={other_dict[col]}\"\n                        )\n                diff_string = \"\\n\".join(diff_string)\n                raise AssertionError(\n                    f\"{type(self).__name__} row {n} differs:\\n{diff_string}\"\n                )\n\n        if self.num_rows != other.num_rows:\n            raise AssertionError(\n                f\"{type(self).__name__} number of rows differ: self={self.num_rows} \"\n                f\"other={other.num_rows}\"\n            )\n\n        # We can reach this point if the metadata schemas byte representations\n        # differ when the decoded schema is the same\n        if (\n            not ignore_metadata\n            and hasattr(self, \"ll_table\")\n            and hasattr(other, \"ll_table\")\n            and self.ll_table.metadata_schema != other.ll_table.metadata_schema\n            and self.metadata_schema == other.metadata_schema\n        ):\n            return\n\n        raise AssertionError(\n            \"Tables differ in an undetected way - \"\n            \"this is a bug, please report an issue on github\"\n        )  # pragma: no cover\n\n    def __eq__(self, other):\n        return self.equals(other)\n\n    def __len__(self):\n        return self.num_rows\n\n    def asdict(self):\n        \"\"\"\n        Returns a dictionary mapping the names of the columns in this table\n        to the corresponding numpy arrays.\n        \"\"\"\n        ret = {col: getattr(self, col) for col in self.column_names}\n        # Not all tables have metadata\n        try:\n            ret[\"metadata_schema\"] = repr(self.metadata_schema)\n        except AttributeError:\n            pass\n        return ret\n\n    def __str__(self):\n        headers, rows = self._text_header_and_rows(\n            limit=tskit._print_options[\"max_lines\"]\n        )\n        return util.unicode_table(rows, header=headers, row_separator=False)\n\n    def _repr_html_(self):\n        \"\"\"\n        Called e.g. by jupyter notebooks to render tables\n        \"\"\"\n        headers, rows = self._text_header_and_rows(\n            limit=tskit._print_options[\"max_lines\"]\n        )\n        return util.html_table(rows, header=headers)\n\n    def _columns_all_integer(self, *colnames):\n        # For displaying floating point values without loads of decimal places\n        return all(\n            np.all(getattr(self, col) == np.floor(getattr(self, col)))\n            for col in colnames\n        )\n\n    def _text_header_and_rows(self, limit=None):\n        \"\"\"\n        Returns headers and rows for table display.\n        \"\"\"\n        # Generate headers: \"id\" + column names (excluding offset columns)\n        display_columns = [\n            col for col in self.column_names if not col.endswith(\"_offset\")\n        ]\n        headers = (\"id\",) + tuple(display_columns)\n\n        rows = []\n        row_indexes = util.truncate_rows(self.num_rows, limit)\n\n        float_columns = {}\n        for col in display_columns:\n            arr = getattr(self, col)\n            if np.issubdtype(arr.dtype, np.floating):\n                float_columns[col] = 0 if self._columns_all_integer(col) else 8\n\n        for j in row_indexes:\n            if j == -1:\n                rows.append(f\"__skipped__{self.num_rows - limit}\")\n            else:\n                row = self[j]\n                formatted_values = [f\"{j:,}\"]  # ID column\n                for col in display_columns:\n                    value = getattr(row, col)\n                    if col == \"metadata\":\n                        formatted_values.append(util.render_metadata(value))\n                    elif col in [\"location\", \"parents\"]:\n                        # Array columns - join with commas\n                        if col == \"parents\":\n                            formatted_values.append(\", \".join([f\"{p:,}\" for p in value]))\n                        else:\n                            formatted_values.append(\", \".join(map(str, value)))\n                    elif col in float_columns:\n                        dp = float_columns[col]\n                        formatted_values.append(f\"{value:,.{dp}f}\")\n                    elif isinstance(value, (int, np.integer)):\n                        formatted_values.append(f\"{value:,}\")\n                    else:\n                        formatted_values.append(str(value))\n                rows.append(formatted_values)\n        return headers, rows\n\n\ndef _assert_table_collections_equal(\n    tc1,\n    tc2,\n    *,\n    ignore_metadata=False,\n    ignore_ts_metadata=False,\n    ignore_provenance=False,\n    ignore_timestamps=False,\n    ignore_reference_sequence=False,\n    ignore_tables=False,\n):\n    # This is shared between TableCollection and ImmutableTableCollection,\n    # could go in a base class, but there's not much else in common\n\n    if not (ignore_metadata or ignore_ts_metadata):\n        if tc1.metadata_schema != tc2.metadata_schema:\n            raise AssertionError(\n                f\"Metadata schemas differ: self={tc1.metadata_schema} \"\n                f\"other={tc2.metadata_schema}\"\n            )\n        if tc1.metadata != tc2.metadata:\n            raise AssertionError(\n                f\"Metadata differs: self={tc1.metadata} other={tc2.metadata}\"\n            )\n\n    if not ignore_reference_sequence:\n        tc1.reference_sequence.assert_equals(\n            tc2.reference_sequence, ignore_metadata=ignore_metadata\n        )\n\n    if tc1.time_units != tc2.time_units:\n        raise AssertionError(\n            f\"Time units differs: self={tc1.time_units} other={tc2.time_units}\"\n        )\n\n    if tc1.sequence_length != tc2.sequence_length:\n        raise AssertionError(\n            f\"Sequence Length differs: self={tc1.sequence_length} \"\n            f\"other={tc2.sequence_length}\"\n        )\n\n    if not ignore_tables:\n        for table_name, table in tc1.table_name_map.items():\n            if table_name == \"provenances\":\n                continue\n            other_table = getattr(tc2, table_name)\n            if isinstance(table, ImmutableBaseTable):\n                table.assert_equals(other_table, ignore_metadata=ignore_metadata)\n            elif isinstance(other_table, ImmutableBaseTable):\n                other_table.assert_equals(table, ignore_metadata=ignore_metadata)\n            else:\n                table.assert_equals(other_table, ignore_metadata=ignore_metadata)\n\n    if not ignore_provenance and not ignore_tables:\n        prov1 = tc1.provenances\n        prov2 = tc2.provenances\n        if isinstance(prov1, ImmutableProvenanceTable):\n            prov1.assert_equals(prov2, ignore_timestamps=ignore_timestamps)\n        elif isinstance(prov2, ImmutableProvenanceTable):\n            prov2.assert_equals(prov1, ignore_timestamps=ignore_timestamps)\n        else:\n            prov1.assert_equals(prov2, ignore_timestamps=ignore_timestamps)\n\n    if (\n        not ignore_metadata\n        and hasattr(tc1, \"_ll_object\")\n        and hasattr(tc2, \"_ll_object\")\n        and hasattr(tc1._ll_object, \"metadata_schema\")\n        and hasattr(tc2._ll_object, \"metadata_schema\")\n        and tc1._ll_object.metadata_schema != tc2._ll_object.metadata_schema\n        and tc1.metadata_schema == tc2.metadata_schema\n    ):\n        # Schemas differ in byte representation but are equivalent when decoded\n        return\n\n    # If we reach here, all comparisons matched; treat collections as equal.\n    return\n\n\nclass MutableBaseTable(BaseTable):\n    # Abstract base class for mutable tables that use the low-level table implementation.\n\n    mutable = True\n\n    def __init__(self, ll_table, row_class):\n        self.ll_table = ll_table\n        self.row_class = row_class\n\n    def _fast_equals(self, other, **kwargs):\n        return self.ll_table.equals(\n            other.ll_table, **{k: v for k, v in kwargs.items() if v is True}\n        )\n\n    @property\n    def num_rows(self) -> int:\n        return self.ll_table.num_rows\n\n    @property\n    def max_rows(self) -> int:\n        return self.ll_table.max_rows\n\n    @property\n    def max_rows_increment(self) -> int:\n        return self.ll_table.max_rows_increment\n\n    def __getattr__(self, name):\n        if name in self.column_names:\n            return getattr(self.ll_table, name)\n        else:\n            raise AttributeError(\n                f\"{self.__class__.__name__} object has no attribute {name}\"\n            )\n\n    def __setattr__(self, name, value):\n        if name in self.column_names:\n            d = self.asdict()\n            d[name] = value\n            self.set_columns(**d)\n        else:\n            object.__setattr__(self, name, value)\n\n    def _make_row(self, *args):\n        try:\n            return self.row_class(\n                *args, metadata_decoder=self.metadata_schema.decode_row\n            )\n        except AttributeError:\n            return self.row_class(*args)\n\n    def __getitem__(self, index):\n        \"\"\"\n        If passed an integer, return the specified row of this table, decoding metadata\n        if it is present. Supports negative indexing, e.g. ``table[-5]``.\n        If passed a slice, iterable or array return a new table containing the specified\n        rows. Similar to numpy fancy indexing, if the array or iterables contains\n        booleans then the index acts as a mask, returning those rows for which the mask\n        is True. Note that as the result is a new table, the row ids will change as tskit\n        row ids are row indexes.\n\n        :param index: the index of a desired row, a slice of the desired rows, an\n            iterable or array of the desired row numbers, or a boolean array to use as\n            a mask.\n        \"\"\"\n\n        if isinstance(index, numbers.Integral):\n            # Single row by integer\n            if index < 0:\n                index += len(self)\n            if index < 0 or index >= len(self):\n                raise IndexError(\"Index out of bounds\")\n            return self._make_row(*self.ll_table.get_row(index))\n        elif isinstance(index, numbers.Number):\n            raise TypeError(\"Index must be integer, slice or iterable\")\n        elif isinstance(index, slice):\n            index = range(*index.indices(len(self)))\n        else:\n            index = np.asarray(index)\n            if index.dtype == np.bool_:\n                if len(index) != len(self):\n                    raise IndexError(\"Boolean index must be same length as table\")\n                index = np.flatnonzero(index)\n            index = util.safe_np_int_cast(index, np.int32)\n\n        ret = self.__class__()\n        # Not all tables have metadata schemas; guard access\n        try:\n            ret.metadata_schema = self.metadata_schema\n        except AttributeError:\n            pass\n        ret.ll_table.extend(self.ll_table, row_indexes=index)\n\n        return ret\n\n    def __setitem__(self, index, new_row):\n        \"\"\"\n        Replaces a row of this table at the specified index with information from a\n        row-like object. Metadata, will be validated and encoded according to the table's\n        :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.\n\n        :param index: the index of the row to change\n        :param row-like new_row: An object that has attributes corresponding to the\n            properties of the new row. Both the objects returned from ``table[i]`` and\n            e.g. ``ts.individual(i)`` work for this purpose, along with any other\n            object with the correct attributes.\n        \"\"\"\n        if isinstance(index, numbers.Integral):\n            # Single row by integer\n            if index < 0:\n                index += len(self)\n            if index < 0 or index >= len(self):\n                raise IndexError(\"Index out of bounds\")\n        else:\n            raise TypeError(\"Index must be integer\")\n\n        row_data = {\n            column: getattr(new_row, column)\n            for column in self.column_names\n            if \"_offset\" not in column\n        }\n\n        # Encode the metadata - note that if this becomes a perf bottleneck it is\n        # possible to use the cached, encoded metadata in the row object, rather than\n        # decode and reencode\n        if \"metadata\" in row_data:\n            row_data[\"metadata\"] = self.metadata_schema.validate_and_encode_row(\n                row_data[\"metadata\"]\n            )\n\n        self.ll_table.update_row(row_index=index, **row_data)\n\n    def append(self, row):\n        \"\"\"\n        Adds a new row to this table and returns the ID of the new row. Metadata, if\n        specified, will be validated and encoded according to the table's\n        :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.\n\n        :param row-like row: An object that has attributes corresponding to the\n            properties of the new row. Both the objects returned from ``table[i]`` and\n            e.g. ``ts.individual(i)`` work for this purpose, along with any other\n            object with the correct attributes.\n        :return: The index of the newly added row.\n        :rtype: int\n        \"\"\"\n        return self.add_row(\n            **{\n                column: getattr(row, column)\n                for column in self.column_names\n                if \"_offset\" not in column\n            }\n        )\n\n    def replace_with(self, other):\n        # Overwrite the contents of this table with a copy of the other table\n        self.set_columns(**other.asdict())\n\n    def clear(self):\n        \"\"\"\n        Deletes all rows in this table.\n        \"\"\"\n        self.ll_table.clear()\n\n    def reset(self):\n        # Deprecated alias for clear\n        self.clear()\n\n    def truncate(self, num_rows):\n        \"\"\"\n        Truncates this table so that the only the first ``num_rows`` are retained.\n\n        :param int num_rows: The number of rows to retain in this table.\n        \"\"\"\n        return self.ll_table.truncate(num_rows)\n\n    def keep_rows(self, keep):\n        \"\"\"\n        .. include:: substitutions/table_keep_rows_main.rst\n\n        :param array-like keep: The rows to keep as a boolean array. Must\n            be the same length as the table, and convertible to a numpy\n            array of dtype bool.\n        :return: The mapping between old and new row IDs as a numpy\n            array (dtype int32).\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        # We do this check here rather than in the C code because calling\n        # len() on the input will cause a more readable exception to be\n        # raised than the inscrutable errors we get from numpy when\n        # converting arguments of the wrong type.\n        if len(keep) != len(self):\n            msg = (\n                \"Argument for keep_rows must be a boolean array of \"\n                \"the same length as the table. \"\n                f\"(need:{len(self)}, got:{len(keep)})\"\n            )\n            raise ValueError(msg)\n        return self.ll_table.keep_rows(keep)\n\n    # Pickle support\n    def __getstate__(self):\n        return self.asdict()\n\n    # Unpickle support\n    def __setstate__(self, state):\n        self.__init__()\n        self.set_columns(**state)\n\n    def copy(self):\n        \"\"\"\n        Returns a deep copy of this table\n        \"\"\"\n        copy = self.__class__()\n        copy.set_columns(**self.asdict())\n        return copy\n\n    def set_columns(self, **kwargs):\n        \"\"\"\n        Sets the values for each column in this :class:`Table` using values\n        provided in numpy arrays. Overwrites existing data in all the table columns.\n        \"\"\"\n        raise NotImplementedError()\n\n\nclass ImmutableBaseTable(BaseTable):\n    # List of all mutation methods that should give a nice error\n    _MUTATION_METHODS = {\n        \"add_row\",\n        \"clear\",\n        \"set_columns\",\n        \"truncate\",\n        \"replace_with\",\n        \"append_columns\",\n        \"keep_rows\",\n        \"append\",\n        \"reset\",\n        \"drop_metadata\",\n        \"packset_metadata\",\n        \"packset_location\",\n        \"packset_parents\",\n        \"packset_ancestral_state\",\n        \"packset_derived_state\",\n        \"packset_record\",\n        \"packset_timestamp\",\n        \"squash\",\n    }\n\n    mutable = False\n    # These are set by subclasses.\n    _row_field_indices = None\n    table_name = None\n    mutable_class = None\n\n    def __init__(self, ll_tree_sequence, row_indices=None, row_slice=None):\n        object.__setattr__(self, \"_initialised\", False)\n        self._llts = ll_tree_sequence\n        singular_name = self.table_name.rstrip(\"s\")\n        self.row_class = globals()[f\"{singular_name.capitalize()}TableRow\"]\n        self._ll_row_getter = f\"get_{singular_name}\"\n        self._set_column_names = set(self.column_names)\n\n        self._row_indices = row_indices\n        self._row_slice = row_slice\n        if row_indices is None:\n            if row_slice is None:\n                self.num_rows = getattr(self._llts, f\"get_num_{self.table_name}\")()\n            else:\n                self.num_rows = max(0, row_slice.stop - row_slice.start)\n                self._row_slice = row_slice\n        else:\n            self.num_rows = len(row_indices)\n            self._row_slice = None\n        object.__setattr__(self, \"_initialised\", True)\n\n    def copy(self):\n        \"\"\"\n        Returns a mutable deep copy of this ImmutableTableCollection.\n\n        :return: A deep copy of this ImmutableTableCollection.\n        :rtype: tskit.TableCollection\n        \"\"\"\n        mutable_table = self.mutable_class()\n        column_data = self.asdict()\n        mutable_table.set_columns(**column_data)\n        return mutable_table\n\n    def __len__(self):\n        return self.num_rows\n\n    def __iter__(self):\n        row_factory = self._create_row_object\n        if self._row_indices is not None:\n            for ll_index in self._row_indices:\n                yield row_factory(ll_index)\n            return\n        if self._row_slice is None:\n            start = 0\n            stop = self.num_rows\n        else:\n            start = self._row_slice.start\n            stop = self._row_slice.stop\n        for ll_index in range(start, stop):\n            yield row_factory(ll_index)\n\n    def _fast_equals(self, other, *, ignore_metadata=False, ignore_timestamps=False):\n        if self.num_rows != other.num_rows:\n            return False\n        if (\n            not ignore_metadata\n            and hasattr(self, \"metadata_schema\")\n            and hasattr(other, \"metadata_schema\")\n            and self.metadata_schema != other.metadata_schema\n        ):\n            return False\n        for column_name in self.column_names:\n            if ignore_metadata and column_name.startswith(\"metadata\"):\n                continue\n            if (\n                ignore_timestamps\n                and getattr(self, \"table_name\", None) == \"provenances\"\n                and column_name in (\"timestamp\", \"timestamp_offset\")\n            ):\n                continue\n            if not np.array_equal(\n                getattr(self, column_name), getattr(other, column_name), equal_nan=True\n            ):\n                return False\n        return True\n\n    def __getattr__(self, name):\n        # Handle attribute access. This method is only called when an attribute\n        # is not found through normal lookup, so we can lazily calculate column\n        # contents.\n        if name in self._set_column_names:\n            full_array = getattr(self._llts, f\"{self.table_name}_{name}\")\n            # TableCollection methods use the LWT code, which is stuck returning\n            # int8 for compatibility see https://github.com/tskit-dev/tskit/issues/3284\n            if name == \"metadata\":\n                full_array = full_array.view(np.int8)\n            if not (self._row_indices is None and self._row_slice is None):\n                is_offset = name.endswith(\"_offset\")\n                is_ragged = f\"{name}_offset\" in self._set_column_names\n                if self._row_indices is None:\n                    subset_array = self._slice_column(\n                        full_array, name, is_offset, is_ragged\n                    )\n                else:\n                    subset_array = self._select_column(\n                        full_array, name, is_offset, is_ragged\n                    )\n            else:\n                subset_array = full_array\n            # Store the result, so on the next access we don't need to calculate it again\n            object.__setattr__(self, name, subset_array)\n            return subset_array\n\n        if name in self._MUTATION_METHODS:\n            raise ImmutableTableError(\n                f\"Cannot call {name}() on immutable {self.table_name} table. \"\n                f\"Use TreeSequence.dump_tables() for mutable copy.\"\n            )\n\n        # If it's not a blocked method or column, delegate to parent classes\n        # This allows metadata mixins to handle metadata_schema and other attributes\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object has no attribute '{name}'\"\n        )\n\n    def _slice_column(self, full_array, name, is_offset, is_ragged):\n        row_slice = self._row_slice\n        start = row_slice.start\n        stop = row_slice.stop\n        if is_offset:\n            return full_array[start : stop + 1]\n        elif is_ragged:\n            offset_array = getattr(self._llts, f\"{self.table_name}_{name}_offset\")\n            return full_array[offset_array[start] : offset_array[stop]]\n        else:\n            return full_array[row_slice]\n\n    def _select_column(self, full_array, name, is_offset, is_ragged):\n        indices = self._row_indices\n        if is_ragged:\n            ragged, offsets = self._select_column_ragged(full_array, name, indices)\n            # We calculated _offset, so might as well store it so it doesn't\n            # need to be recalculated if accessed\n            object.__setattr__(self, f\"{name}_offset\", offsets)\n            return ragged\n        elif is_offset:\n            return self._select_column_offset(full_array, indices)\n        else:\n            return full_array[indices]\n\n    def _select_column_offset(self, offset_array, indices):\n        lengths = offset_array[indices + 1] - offset_array[indices]\n        result = np.empty(lengths.size + 1, dtype=offset_array.dtype)\n        result[0] = 0\n        if lengths.size > 0:\n            np.cumsum(lengths, dtype=offset_array.dtype, out=result[1:])\n        return result\n\n    def _select_column_ragged(self, full_array, name, indices):\n        offset_array = getattr(self._llts, f\"{self.table_name}_{name}_offset\")\n        indexed_offsets = offset_array[indices]\n        lengths64 = (offset_array[indices + 1] - indexed_offsets).astype(\n            np.int64, copy=False\n        )\n        gather_indices = _ragged_selection_indices(indexed_offsets, lengths64)\n        result = full_array[gather_indices]\n        offsets_result = self._select_column_offset(offset_array, indices)\n        return result, offsets_result\n\n    def __getitem__(self, index):\n        try:\n            row_index = operator.index(index)\n        except TypeError:\n            selector = self._resolve_selector(index)\n            if isinstance(selector, slice):\n                return self.__class__(self._llts, row_slice=selector)\n            return self.__class__(self._llts, row_indices=selector)\n\n        if row_index < 0:\n            row_index += self.num_rows\n        if row_index < 0 or row_index >= self.num_rows:\n            raise IndexError(\"Index out of bounds\")\n        ll_index = self._resolve_single_index(row_index)\n        return self._create_row_object(ll_index)\n\n    def _current_ll_indices(self):\n        if self._row_indices is None:\n            if self._row_slice is None:\n                start = 0\n                stop = self.num_rows\n            else:\n                start = self._row_slice.start\n                stop = self._row_slice.stop\n            return np.arange(start, stop, dtype=np.int64)\n        return np.asarray(self._row_indices)\n\n    def _resolve_single_index(self, row_index):\n        if self._row_indices is None:\n            base_start = 0 if self._row_slice is None else self._row_slice.start\n            return int(base_start + row_index)\n        return int(self._row_indices[row_index])\n\n    def _resolve_selector(self, selector):\n        if isinstance(selector, slice):\n            step = selector.step or 1\n            if step == 1 and self._row_indices is None:\n                start, stop, _ = selector.indices(self.num_rows)\n                base_start = 0 if self._row_slice is None else self._row_slice.start\n                return slice(base_start + start, base_start + stop)\n            indices = np.arange(self.num_rows, dtype=np.int64)\n            selector = indices[selector]\n\n        selector = np.asarray(selector)\n        if selector.dtype == np.bool_:\n            if len(selector) != self.num_rows:\n                raise IndexError(\"Boolean index must be same length as table\")\n            selector = np.flatnonzero(selector)\n        else:\n            selector = util.safe_np_int_cast(selector, np.int64)\n\n        ll_indices = self._current_ll_indices()\n        resolved = ll_indices[selector]\n        if resolved.dtype != np.int32:\n            resolved = util.safe_np_int_cast(resolved, np.int32)\n        return resolved\n\n    def _create_row_object(self, ll_index):\n        raw_row = getattr(self._llts, self._ll_row_getter)(int(ll_index))\n        spec = self._row_field_indices\n        if spec is None:\n            values = list(raw_row)\n        else:\n            values = [raw_row[i] for i in spec]\n        try:\n            return self.row_class(\n                *values, metadata_decoder=self.metadata_schema.decode_row\n            )\n        except AttributeError:\n            return self.row_class(*values)\n\n    def __setattr__(self, name, value):\n        # Allow all assignments during initialization\n        if not self._initialised:\n            object.__setattr__(self, name, value)\n            return\n        # Allow internal/private attributes\n        if name.startswith(\"_\"):\n            object.__setattr__(self, name, value)\n            return\n        raise ImmutableTableError(\n            f\"Cannot set attribute '{name}' on immutable {self.table_name} table. \"\n            f\"Use TreeSequence.dump_tables() for mutable copy.\"\n        )\n\n\nclass MutableMetadataTable(MutableBaseTable, metadata.TableMetadataWriter):\n    pass\n\n\nclass ImmutableMetadataTable(ImmutableBaseTable, metadata.TableMetadataReader):\n    @property\n    def metadata_schema(self):\n        \"\"\"\n        The :class:`tskit.MetadataSchema` for this table.\n        Overrides the base implementation to access schema from tree sequence.\n        \"\"\"\n        try:\n            return self._metadata_schema\n        except AttributeError:\n            self._metadata_schema = metadata.parse_metadata_schema(\n                getattr(\n                    self._llts.get_table_metadata_schemas(),\n                    # Use singular form for table name\n                    self.table_name.rstrip(\"s\"),\n                )\n            )\n            return self._metadata_schema\n\n\nclass IndividualTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the individuals in a tree sequence. Note that although\n    each Individual has associated nodes, reference to these is not stored in\n    the individual table, but rather reference to the individual is stored for\n    each node in the :class:`NodeTable`.  This is similar to the way in which\n    the relationship between sites and mutations is modelled.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar flags: The array of flags values.\n    :vartype flags: numpy.ndarray, dtype=np.uint32\n    :ivar location: The flattened array of floating point location values. See\n        :ref:`sec_encoding_ragged_columns` for more details.\n    :vartype location: numpy.ndarray, dtype=np.float64\n    :ivar location_offset: The array of offsets into the location column. See\n        :ref:`sec_encoding_ragged_columns` for more details.\n    :vartype location_offset: numpy.ndarray, dtype=np.uint32\n    :ivar parents: The flattened array of parent individual ids. See\n        :ref:`sec_encoding_ragged_columns` for more details.\n    :vartype parents: numpy.ndarray, dtype=np.int32\n    :ivar parents_offset: The array of offsets into the parents column. See\n        :ref:`sec_encoding_ragged_columns` for more details.\n    :vartype parents_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"individuals\"\n    column_names = [\n        \"flags\",\n        \"location\",\n        \"location_offset\",\n        \"parents\",\n        \"parents_offset\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.IndividualTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, IndividualTableRow)\n\n    def add_row(self, flags=0, location=None, parents=None, metadata=None):\n        \"\"\"\n        Adds a new row to this :class:`IndividualTable` and returns the ID of the\n        corresponding individual. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.\n\n        :param int flags: The bitwise flags for the new node.\n        :param array-like location: A list of numeric values or one-dimensional numpy\n            array describing the location of this individual. If not specified\n            or None, a zero-dimensional location is stored.\n        :param array-like parents: A list or array of ids of parent individuals. If not\n            specified an empty array is stored.\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added individual.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(\n            flags=flags, location=location, parents=parents, metadata=metadata\n        )\n\n    def set_columns(\n        self,\n        flags=None,\n        location=None,\n        location_offset=None,\n        parents=None,\n        parents_offset=None,\n        metadata=None,\n        metadata_offset=None,\n        metadata_schema=None,\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`IndividualTable` using the\n        values in the specified arrays. Overwrites existing data in all the table\n        columns.\n\n        The ``flags`` array is mandatory and defines the number of individuals\n        the table will contain.\n        The ``location`` and ``location_offset`` parameters must be supplied\n        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        The ``parents`` and ``parents_offset`` parameters must be supplied\n        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        The ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param flags: The bitwise flags for each individual. Required.\n        :type flags: numpy.ndarray, dtype=np.uint32\n        :param location: The flattened location array. Must be specified along\n            with ``location_offset``. If not specified or None, an empty location\n            value is stored for each individual.\n        :type location: numpy.ndarray, dtype=np.float64\n        :param location_offset: The offsets into the ``location`` array.\n        :type location_offset: numpy.ndarray, dtype=np.uint32.\n        :param parents: The flattened parents array. Must be specified along\n            with ``parents_offset``. If not specified or None, an empty parents array\n            is stored for each individual.\n        :type parents: numpy.ndarray, dtype=np.int32\n        :param parents_offset: The offsets into the ``parents`` array.\n        :type parents_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each individual.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n\n        \"\"\"\n        self._check_required_args(flags=flags)\n        self.ll_table.set_columns(\n            dict(\n                flags=flags,\n                location=location,\n                location_offset=location_offset,\n                parents=parents,\n                parents_offset=parents_offset,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(\n        self,\n        flags=None,\n        location=None,\n        location_offset=None,\n        parents=None,\n        parents_offset=None,\n        metadata=None,\n        metadata_offset=None,\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns in this\n        :class:`IndividualTable`. This allows many new rows to be added at once.\n\n        The ``flags`` array is mandatory and defines the number of\n        extra individuals to add to the table.\n        The ``parents`` and ``parents_offset`` parameters must be supplied\n        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        The ``location`` and ``location_offset`` parameters must be supplied\n        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        The ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param flags: The bitwise flags for each individual. Required.\n        :type flags: numpy.ndarray, dtype=np.uint32\n        :param location: The flattened location array. Must be specified along\n            with ``location_offset``. If not specified or None, an empty location\n            value is stored for each individual.\n        :type location: numpy.ndarray, dtype=np.float64\n        :param location_offset: The offsets into the ``location`` array.\n        :type location_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each individual.\n        :param parents: The flattened parents array. Must be specified along\n            with ``parents_offset``. If not specified or None, an empty parents array\n            is stored for each individual.\n        :type parents: numpy.ndarray, dtype=np.int32\n        :param parents_offset: The offsets into the ``parents`` array.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self._check_required_args(flags=flags)\n        self.ll_table.append_columns(\n            dict(\n                flags=flags,\n                location=location,\n                location_offset=location_offset,\n                parents=parents,\n                parents_offset=parents_offset,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        )\n\n    def packset_location(self, locations):\n        \"\"\"\n        Packs the specified list of location values and updates the ``location``\n        and ``location_offset`` columns. The length of the locations array\n        must be equal to the number of rows in the table.\n\n        :param list locations: A list of locations interpreted as numpy float64\n            arrays.\n        \"\"\"\n        packed, offset = util.pack_arrays(locations)\n        d = self.asdict()\n        d[\"location\"] = packed\n        d[\"location_offset\"] = offset\n        self.set_columns(**d)\n\n    def packset_parents(self, parents):\n        \"\"\"\n        Packs the specified list of parent values and updates the ``parent``\n        and ``parent_offset`` columns. The length of the parents array\n        must be equal to the number of rows in the table.\n\n        :param list parents: A list of list of parent ids, interpreted as numpy int32\n            arrays.\n        \"\"\"\n        packed, offset = util.pack_arrays(parents, np.int32)\n        d = self.asdict()\n        d[\"parents\"] = packed\n        d[\"parents_offset\"] = offset\n        self.set_columns(**d)\n\n    def keep_rows(self, keep):\n        \"\"\"\n        .. include:: substitutions/table_keep_rows_main.rst\n\n        The values in the ``parents`` column are updated according to this\n        map, so that reference integrity within the table is maintained.\n        As a consequence of this, the values in the ``parents`` column\n        for kept rows are bounds-checked and an error raised if they\n        are not valid. Rows that are deleted are not checked for\n        parent ID integrity.\n\n        If an attempt is made to delete rows that are referred to by\n        the ``parents`` column of rows that are retained, an error\n        is raised.\n\n        These error conditions are checked before any alterations to\n        the table are made.\n\n        :param array-like keep: The rows to keep as a boolean array. Must\n            be the same length as the table, and convertible to a numpy\n            array of dtype bool.\n        :return: The mapping between old and new row IDs as a numpy\n            array (dtype int32).\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        return super().keep_rows(keep)\n\n\nclass NodeTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the nodes in a tree sequence. See the\n    :ref:`definitions <sec_node_table_definition>` for details on the columns\n    in this table and the\n    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section\n    for the properties needed for a node table to be a part of a valid tree sequence.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar time: The array of time values.\n    :vartype time: numpy.ndarray, dtype=np.float64\n    :ivar flags: The array of flags values.\n    :vartype flags: numpy.ndarray, dtype=np.uint32\n    :ivar population: The array of population IDs.\n    :vartype population: numpy.ndarray, dtype=np.int32\n    :ivar individual: The array of individual IDs that each node belongs to.\n    :vartype individual: numpy.ndarray, dtype=np.int32\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"nodes\"\n    column_names = [\n        \"time\",\n        \"flags\",\n        \"population\",\n        \"individual\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.NodeTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, NodeTableRow)\n\n    def add_row(self, flags=0, time=0, population=-1, individual=-1, metadata=None):\n        \"\"\"\n        Adds a new row to this :class:`NodeTable` and returns the ID of the\n        corresponding node. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.NodeTable.metadata_schema>`.\n\n        :param int flags: The bitwise flags for the new node.\n        :param float time: The birth time for the new node.\n        :param int population: The ID of the population in which the new node was born.\n            Defaults to :data:`tskit.NULL`.\n        :param int individual: The ID of the individual in which the new node was born.\n            Defaults to :data:`tskit.NULL`.\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added node.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(flags, time, population, individual, metadata)\n\n    def set_columns(\n        self,\n        flags=None,\n        time=None,\n        population=None,\n        individual=None,\n        metadata=None,\n        metadata_offset=None,\n        metadata_schema=None,\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`NodeTable` using the values in\n        the specified arrays. Overwrites existing data in all the table columns.\n\n        The ``flags``, ``time`` and ``population`` arrays must all be of the same length,\n        which is equal to the number of nodes the table will contain. The\n        ``metadata`` and ``metadata_offset`` parameters must be supplied together, and\n        meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param flags: The bitwise flags for each node. Required.\n        :type flags: numpy.ndarray, dtype=np.uint32\n        :param time: The time values for each node. Required.\n        :type time: numpy.ndarray, dtype=np.float64\n        :param population: The population values for each node. If not specified\n            or None, the :data:`tskit.NULL` value is stored for each node.\n        :type population: numpy.ndarray, dtype=np.int32\n        :param individual: The individual values for each node. If not specified\n            or None, the :data:`tskit.NULL` value is stored for each node.\n        :type individual: numpy.ndarray, dtype=np.int32\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n        \"\"\"\n        self._check_required_args(flags=flags, time=time)\n        self.ll_table.set_columns(\n            dict(\n                flags=flags,\n                time=time,\n                population=population,\n                individual=individual,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(\n        self,\n        flags=None,\n        time=None,\n        population=None,\n        individual=None,\n        metadata=None,\n        metadata_offset=None,\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns in this\n        :class:`NodeTable`. This allows many new rows to be added at once.\n\n        The ``flags``, ``time`` and ``population`` arrays must all be of the same length,\n        which is equal to the number of nodes that will be added to the table. The\n        ``metadata`` and ``metadata_offset`` parameters must be supplied together, and\n        meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param flags: The bitwise flags for each node. Required.\n        :type flags: numpy.ndarray, dtype=np.uint32\n        :param time: The time values for each node. Required.\n        :type time: numpy.ndarray, dtype=np.float64\n        :param population: The population values for each node. If not specified\n            or None, the :data:`tskit.NULL` value is stored for each node.\n        :type population: numpy.ndarray, dtype=np.int32\n        :param individual: The individual values for each node. If not specified\n            or None, the :data:`tskit.NULL` value is stored for each node.\n        :type individual: numpy.ndarray, dtype=np.int32\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self._check_required_args(flags=flags, time=time)\n        self.ll_table.append_columns(\n            dict(\n                flags=flags,\n                time=time,\n                population=population,\n                individual=individual,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        )\n\n\nclass EdgeTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the edges in a tree sequence. See the\n    :ref:`definitions <sec_edge_table_definition>` for details on the columns\n    in this table and the\n    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section\n    for the properties needed for an edge table to be a part of a valid tree sequence.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar left: The array of left coordinates.\n    :vartype left: numpy.ndarray, dtype=np.float64\n    :ivar right: The array of right coordinates.\n    :vartype right: numpy.ndarray, dtype=np.float64\n    :ivar parent: The array of parent node IDs.\n    :vartype parent: numpy.ndarray, dtype=np.int32\n    :ivar child: The array of child node IDs.\n    :vartype child: numpy.ndarray, dtype=np.int32\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"edges\"\n    column_names = [\n        \"left\",\n        \"right\",\n        \"parent\",\n        \"child\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.EdgeTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, EdgeTableRow)\n\n    def add_row(self, left, right, parent, child, metadata=None):\n        \"\"\"\n        Adds a new row to this :class:`EdgeTable` and returns the ID of the\n        corresponding edge. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.EdgeTable.metadata_schema>`.\n\n        :param float left: The left coordinate (inclusive).\n        :param float right: The right coordinate (exclusive).\n        :param int parent: The ID of parent node.\n        :param int child: The ID of child node.\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added edge.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(left, right, parent, child, metadata)\n\n    def set_columns(\n        self,\n        left=None,\n        right=None,\n        parent=None,\n        child=None,\n        metadata=None,\n        metadata_offset=None,\n        metadata_schema=None,\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`EdgeTable` using the values\n        in the specified arrays. Overwrites existing data in all the table columns.\n\n        The ``left``, ``right``, ``parent`` and ``child`` parameters are mandatory,\n        and must be numpy arrays of the same length (which is equal to the number of\n        edges the table will contain).\n        The ``metadata`` and ``metadata_offset`` parameters must be supplied together,\n        and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n\n        :param left: The left coordinates (inclusive).\n        :type left: numpy.ndarray, dtype=np.float64\n        :param right: The right coordinates (exclusive).\n        :type right: numpy.ndarray, dtype=np.float64\n        :param parent: The parent node IDs.\n        :type parent: numpy.ndarray, dtype=np.int32\n        :param child: The child node IDs.\n        :type child: numpy.ndarray, dtype=np.int32\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n        \"\"\"\n        self._check_required_args(left=left, right=right, parent=parent, child=child)\n        self.ll_table.set_columns(\n            dict(\n                left=left,\n                right=right,\n                parent=parent,\n                child=child,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(\n        self, left, right, parent, child, metadata=None, metadata_offset=None\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns of this\n        :class:`EdgeTable`. This allows many new rows to be added at once.\n\n        The ``left``, ``right``, ``parent`` and ``child`` parameters are mandatory,\n        and must be numpy arrays of the same length (which is equal to the number of\n        additional edges to add to the table). The ``metadata`` and\n        ``metadata_offset`` parameters must be supplied together, and\n        meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n\n        :param left: The left coordinates (inclusive).\n        :type left: numpy.ndarray, dtype=np.float64\n        :param right: The right coordinates (exclusive).\n        :type right: numpy.ndarray, dtype=np.float64\n        :param parent: The parent node IDs.\n        :type parent: numpy.ndarray, dtype=np.int32\n        :param child: The child node IDs.\n        :type child: numpy.ndarray, dtype=np.int32\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.append_columns(\n            dict(\n                left=left,\n                right=right,\n                parent=parent,\n                child=child,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        )\n\n    def squash(self):\n        \"\"\"\n        Sorts, then condenses the table into the smallest possible number of rows by\n        combining any adjacent edges.\n        A pair of edges is said to be `adjacent` if they have the same parent and child\n        nodes, and if the left coordinate of one of the edges is equal to the right\n        coordinate of the other edge.\n        The ``squash`` method modifies an :class:`EdgeTable` in place so that any set of\n        adjacent edges is replaced by a single edge.\n        The new edge will have the same parent and child node, a left coordinate\n        equal to the smallest left coordinate in the set, and a right coordinate\n        equal to the largest right coordinate in the set.\n        The new edge table will be sorted in the order (P, C, L, R): if the node table\n        is ordered by increasing node time, as is common, this order will meet the\n        :ref:`sec_edge_requirements` for a valid tree sequence, otherwise you will need\n        to call :meth:`.sort` on the entire :class:`TableCollection`.\n\n        .. note::\n            Note that this method will fail if any edges have non-empty metadata.\n\n        \"\"\"\n        self.ll_table.squash()\n\n\nclass MigrationTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the migrations in a tree sequence. See the\n    :ref:`definitions <sec_migration_table_definition>` for details on the columns\n    in this table and the\n    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section\n    for the properties needed for a migration table to be a part of a valid tree\n    sequence.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar left: The array of left coordinates.\n    :vartype left: numpy.ndarray, dtype=np.float64\n    :ivar right: The array of right coordinates.\n    :vartype right: numpy.ndarray, dtype=np.float64\n    :ivar node: The array of node IDs.\n    :vartype node: numpy.ndarray, dtype=np.int32\n    :ivar source: The array of source population IDs.\n    :vartype source: numpy.ndarray, dtype=np.int32\n    :ivar dest: The array of destination population IDs.\n    :vartype dest: numpy.ndarray, dtype=np.int32\n    :ivar time: The array of time values.\n    :vartype time: numpy.ndarray, dtype=np.float64\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"migrations\"\n    column_names = [\n        \"left\",\n        \"right\",\n        \"node\",\n        \"source\",\n        \"dest\",\n        \"time\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.MigrationTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, MigrationTableRow)\n\n    def add_row(self, left, right, node, source, dest, time, metadata=None):\n        \"\"\"\n        Adds a new row to this :class:`MigrationTable` and returns the ID of the\n        corresponding migration. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.MigrationTable.metadata_schema>`.\n\n        :param float left: The left coordinate (inclusive).\n        :param float right: The right coordinate (exclusive).\n        :param int node: The node ID.\n        :param int source: The ID of the source population.\n        :param int dest: The ID of the destination population.\n        :param float time: The time of the migration event.\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added migration.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(left, right, node, source, dest, time, metadata)\n\n    def set_columns(\n        self,\n        left=None,\n        right=None,\n        node=None,\n        source=None,\n        dest=None,\n        time=None,\n        metadata=None,\n        metadata_offset=None,\n        metadata_schema=None,\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`MigrationTable` using the values\n        in the specified arrays. Overwrites existing data in all the table columns.\n\n        All parameters except ``metadata`` and ``metadata_offset`` and are mandatory,\n        and must be numpy arrays of the same length (which is equal to the number of\n        migrations the table will contain).\n        The ``metadata`` and ``metadata_offset`` parameters must be supplied together,\n        and meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param left: The left coordinates (inclusive).\n        :type left: numpy.ndarray, dtype=np.float64\n        :param right: The right coordinates (exclusive).\n        :type right: numpy.ndarray, dtype=np.float64\n        :param node: The node IDs.\n        :type node: numpy.ndarray, dtype=np.int32\n        :param source: The source population IDs.\n        :type source: numpy.ndarray, dtype=np.int32\n        :param dest: The destination population IDs.\n        :type dest: numpy.ndarray, dtype=np.int32\n        :param time: The time of each migration.\n        :type time: numpy.ndarray, dtype=np.int64\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each migration.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n        \"\"\"\n        self._check_required_args(\n            left=left, right=right, node=node, source=source, dest=dest, time=time\n        )\n        self.ll_table.set_columns(\n            dict(\n                left=left,\n                right=right,\n                node=node,\n                source=source,\n                dest=dest,\n                time=time,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(\n        self,\n        left,\n        right,\n        node,\n        source,\n        dest,\n        time,\n        metadata=None,\n        metadata_offset=None,\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns of this\n        :class:`MigrationTable`. This allows many new rows to be added at once.\n\n        All parameters except ``metadata`` and ``metadata_offset`` and are mandatory,\n        and must be numpy arrays of the same length (which is equal to the number of\n        additional migrations to add to the table). The ``metadata`` and\n        ``metadata_offset`` parameters must be supplied together, and\n        meet the requirements for :ref:`sec_encoding_ragged_columns`.\n        See :ref:`sec_tables_api_binary_columns` for more information and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param left: The left coordinates (inclusive).\n        :type left: numpy.ndarray, dtype=np.float64\n        :param right: The right coordinates (exclusive).\n        :type right: numpy.ndarray, dtype=np.float64\n        :param node: The node IDs.\n        :type node: numpy.ndarray, dtype=np.int32\n        :param source: The source population IDs.\n        :type source: numpy.ndarray, dtype=np.int32\n        :param dest: The destination population IDs.\n        :type dest: numpy.ndarray, dtype=np.int32\n        :param time: The time of each migration.\n        :type time: numpy.ndarray, dtype=np.int64\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each migration.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.append_columns(\n            dict(\n                left=left,\n                right=right,\n                node=node,\n                source=source,\n                dest=dest,\n                time=time,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        )\n\n\nclass SiteTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the sites in a tree sequence. See the\n    :ref:`definitions <sec_site_table_definition>` for details on the columns\n    in this table and the\n    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section\n    for the properties needed for a site table to be a part of a valid tree\n    sequence.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar position: The array of site position coordinates.\n    :vartype position: numpy.ndarray, dtype=np.float64\n    :ivar ancestral_state: The flattened array of ancestral state strings.\n        See :ref:`sec_tables_api_text_columns` for more details.\n    :vartype ancestral_state: numpy.ndarray, dtype=np.int8\n    :ivar ancestral_state_offset: The offsets of rows in the ancestral_state\n        array. See :ref:`sec_tables_api_text_columns` for more details.\n    :vartype ancestral_state_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"sites\"\n    column_names = [\n        \"position\",\n        \"ancestral_state\",\n        \"ancestral_state_offset\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.SiteTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, SiteTableRow)\n\n    def add_row(self, position, ancestral_state, metadata=None):\n        \"\"\"\n        Adds a new row to this :class:`SiteTable` and returns the ID of the\n        corresponding site. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.SiteTable.metadata_schema>`.\n\n        :param float position: The position of this site in genome coordinates.\n        :param str ancestral_state: The state of this site at the root of the tree.\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added site.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(position, ancestral_state, metadata)\n\n    def set_columns(\n        self,\n        position=None,\n        ancestral_state=None,\n        ancestral_state_offset=None,\n        metadata=None,\n        metadata_offset=None,\n        metadata_schema=None,\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`SiteTable` using the values\n        in the specified arrays. Overwrites existing data in all the table columns.\n\n        The ``position``, ``ancestral_state`` and ``ancestral_state_offset``\n        parameters are mandatory, and must be 1D numpy arrays. The length\n        of the ``position`` array determines the number of rows in table.\n        The ``ancestral_state`` and ``ancestral_state_offset`` parameters must\n        be supplied together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_text_columns` for more information). The\n        ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information) and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param position: The position of each site in genome coordinates.\n        :type position: numpy.ndarray, dtype=np.float64\n        :param ancestral_state: The flattened ancestral_state array. Required.\n        :type ancestral_state: numpy.ndarray, dtype=np.int8\n        :param ancestral_state_offset: The offsets into the ``ancestral_state`` array.\n        :type ancestral_state_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n        \"\"\"\n        self._check_required_args(\n            position=position,\n            ancestral_state=ancestral_state,\n            ancestral_state_offset=ancestral_state_offset,\n        )\n        self.ll_table.set_columns(\n            dict(\n                position=position,\n                ancestral_state=ancestral_state,\n                ancestral_state_offset=ancestral_state_offset,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(\n        self,\n        position,\n        ancestral_state,\n        ancestral_state_offset,\n        metadata=None,\n        metadata_offset=None,\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns of this\n        :class:`SiteTable`. This allows many new rows to be added at once.\n\n        The ``position``, ``ancestral_state`` and ``ancestral_state_offset``\n        parameters are mandatory, and must be 1D numpy arrays. The length\n        of the ``position`` array determines the number of additional rows\n        to add the table.\n        The ``ancestral_state`` and ``ancestral_state_offset`` parameters must\n        be supplied together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_text_columns` for more information). The\n        ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information) and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param position: The position of each site in genome coordinates.\n        :type position: numpy.ndarray, dtype=np.float64\n        :param ancestral_state: The flattened ancestral_state array. Required.\n        :type ancestral_state: numpy.ndarray, dtype=np.int8\n        :param ancestral_state_offset: The offsets into the ``ancestral_state`` array.\n        :type ancestral_state_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.append_columns(\n            dict(\n                position=position,\n                ancestral_state=ancestral_state,\n                ancestral_state_offset=ancestral_state_offset,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        )\n\n    def packset_ancestral_state(self, ancestral_states):\n        \"\"\"\n        Packs the specified list of ancestral_state values and updates the\n        ``ancestral_state`` and ``ancestral_state_offset`` columns. The length\n        of the ancestral_states array must be equal to the number of rows in\n        the table.\n\n        :param list(str) ancestral_states: A list of string ancestral state values.\n        \"\"\"\n        packed, offset = util.pack_strings(ancestral_states)\n        d = self.asdict()\n        d[\"ancestral_state\"] = packed\n        d[\"ancestral_state_offset\"] = offset\n        self.set_columns(**d)\n\n\nclass MutationTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the mutations in a tree sequence. See the\n    :ref:`definitions <sec_mutation_table_definition>` for details on the columns\n    in this table and the\n    :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section\n    for the properties needed for a mutation table to be a part of a valid tree\n    sequence.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar site: The array of site IDs.\n    :vartype site: numpy.ndarray, dtype=np.int32\n    :ivar node: The array of node IDs.\n    :vartype node: numpy.ndarray, dtype=np.int32\n    :ivar time: The array of time values.\n    :vartype time: numpy.ndarray, dtype=np.float64\n    :ivar derived_state: The flattened array of derived state strings.\n        See :ref:`sec_tables_api_text_columns` for more details.\n    :vartype derived_state: numpy.ndarray, dtype=np.int8\n    :ivar derived_state_offset: The offsets of rows in the derived_state\n        array. See :ref:`sec_tables_api_text_columns` for more details.\n    :vartype derived_state_offset: numpy.ndarray, dtype=np.uint32\n    :ivar parent: The array of parent mutation IDs.\n    :vartype parent: numpy.ndarray, dtype=np.int32\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"mutations\"\n    column_names = [\n        \"site\",\n        \"node\",\n        \"time\",\n        \"derived_state\",\n        \"derived_state_offset\",\n        \"parent\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.MutationTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, MutationTableRow)\n\n    def add_row(self, site, node, derived_state, parent=-1, metadata=None, time=None):\n        \"\"\"\n        Adds a new row to this :class:`MutationTable` and returns the ID of the\n        corresponding mutation. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.MutationTable.metadata_schema>`.\n\n        :param int site: The ID of the site that this mutation occurs at.\n        :param int node: The ID of the first node inheriting this mutation.\n        :param str derived_state: The state of the site at this mutation's node.\n        :param int parent: The ID of the parent mutation. If not specified,\n            defaults to :attr:`NULL`.\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added mutation.\n        :param float time: The occurrence time for the new mutation. If not specified,\n            defaults to ``UNKNOWN_TIME``, indicating the time is unknown.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(\n            site,\n            node,\n            derived_state,\n            parent,\n            metadata,\n            UNKNOWN_TIME if time is None else time,\n        )\n\n    def set_columns(\n        self,\n        site=None,\n        node=None,\n        time=None,\n        derived_state=None,\n        derived_state_offset=None,\n        parent=None,\n        metadata=None,\n        metadata_offset=None,\n        metadata_schema=None,\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`MutationTable` using the values\n        in the specified arrays. Overwrites existing data in all the the table columns.\n\n        The ``site``, ``node``, ``derived_state`` and ``derived_state_offset``\n        parameters are mandatory, and must be 1D numpy arrays. The\n        ``site`` and ``node`` (also ``parent`` and ``time``, if supplied) arrays\n        must be of equal length, and determine the number of rows in the table.\n        The ``derived_state`` and ``derived_state_offset`` parameters must\n        be supplied together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_text_columns` for more information). The\n        ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information) and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param site: The ID of the site each mutation occurs at.\n        :type site: numpy.ndarray, dtype=np.int32\n        :param node: The ID of the node each mutation is associated with.\n        :type node: numpy.ndarray, dtype=np.int32\n        :param time: The time values for each mutation.\n        :type time: numpy.ndarray, dtype=np.float64\n        :param derived_state: The flattened derived_state array. Required.\n        :type derived_state: numpy.ndarray, dtype=np.int8\n        :param derived_state_offset: The offsets into the ``derived_state`` array.\n        :type derived_state_offset: numpy.ndarray, dtype=np.uint32.\n        :param parent: The ID of the parent mutation for each mutation.\n        :type parent: numpy.ndarray, dtype=np.int32\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n        \"\"\"\n        self._check_required_args(\n            site=site,\n            node=node,\n            derived_state=derived_state,\n            derived_state_offset=derived_state_offset,\n        )\n        self.ll_table.set_columns(\n            dict(\n                site=site,\n                node=node,\n                parent=parent,\n                time=time,\n                derived_state=derived_state,\n                derived_state_offset=derived_state_offset,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(\n        self,\n        site,\n        node,\n        derived_state,\n        derived_state_offset,\n        parent=None,\n        time=None,\n        metadata=None,\n        metadata_offset=None,\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns of this\n        :class:`MutationTable`. This allows many new rows to be added at once.\n\n        The ``site``, ``node``, ``derived_state`` and ``derived_state_offset``\n        parameters are mandatory, and must be 1D numpy arrays. The\n        ``site`` and ``node`` (also ``time`` and ``parent``, if supplied) arrays\n        must be of equal length, and determine the number of additional\n        rows to add to the table.\n        The ``derived_state`` and ``derived_state_offset`` parameters must\n        be supplied together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_text_columns` for more information). The\n        ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information) and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param site: The ID of the site each mutation occurs at.\n        :type site: numpy.ndarray, dtype=np.int32\n        :param node: The ID of the node each mutation is associated with.\n        :type node: numpy.ndarray, dtype=np.int32\n        :param time: The time values for each mutation.\n        :type time: numpy.ndarray, dtype=np.float64\n        :param derived_state: The flattened derived_state array. Required.\n        :type derived_state: numpy.ndarray, dtype=np.int8\n        :param derived_state_offset: The offsets into the ``derived_state`` array.\n        :type derived_state_offset: numpy.ndarray, dtype=np.uint32.\n        :param parent: The ID of the parent mutation for each mutation.\n        :type parent: numpy.ndarray, dtype=np.int32\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.append_columns(\n            dict(\n                site=site,\n                node=node,\n                time=time,\n                parent=parent,\n                derived_state=derived_state,\n                derived_state_offset=derived_state_offset,\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        )\n\n    def packset_derived_state(self, derived_states):\n        \"\"\"\n        Packs the specified list of derived_state values and updates the\n        ``derived_state`` and ``derived_state_offset`` columns. The length\n        of the derived_states array must be equal to the number of rows in\n        the table.\n\n        :param list(str) derived_states: A list of string derived state values.\n        \"\"\"\n        packed, offset = util.pack_strings(derived_states)\n        d = self.asdict()\n        d[\"derived_state\"] = packed\n        d[\"derived_state_offset\"] = offset\n        self.set_columns(**d)\n\n    def keep_rows(self, keep):\n        \"\"\"\n        .. include:: substitutions/table_keep_rows_main.rst\n\n        The values in the ``parent`` column are updated according to this\n        map, so that reference integrity within the table is maintained.\n        As a consequence of this, the values in the ``parent`` column\n        for kept rows are bounds-checked and an error raised if they\n        are not valid. Rows that are deleted are not checked for\n        parent ID integrity.\n\n        If an attempt is made to delete rows that are referred to by\n        the ``parent`` column of rows that are retained, an error\n        is raised.\n\n        These error conditions are checked before any alterations to\n        the table are made.\n\n        :param array-like keep: The rows to keep as a boolean array. Must\n            be the same length as the table, and convertible to a numpy\n            array of dtype bool.\n        :return: The mapping between old and new row IDs as a numpy\n            array (dtype int32).\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        return super().keep_rows(keep)\n\n\nclass PopulationTable(MutableMetadataTable):\n    \"\"\"\n    A table defining the populations referred to in a tree sequence.\n    The PopulationTable stores metadata for populations that may be referred to\n    in the NodeTable and MigrationTable\".  Note that although nodes\n    may be associated with populations, this association is stored in\n    the :class:`NodeTable`: only metadata on each population is stored\n    in the population table.\n\n    .. include:: substitutions/table_edit_warning.rst\n\n    :ivar metadata: The flattened array of binary metadata values. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata: numpy.ndarray, dtype=np.int8\n    :ivar metadata_offset: The array of offsets into the metadata column. See\n        :ref:`sec_tables_api_binary_columns` for more details.\n    :vartype metadata_offset: numpy.ndarray, dtype=np.uint32\n    :ivar metadata_schema: The metadata schema for this table's metadata column\n    :vartype metadata_schema: tskit.MetadataSchema\n    \"\"\"\n\n    table_name = \"populations\"\n    column_names = [\"metadata\", \"metadata_offset\"]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.PopulationTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, PopulationTableRow)\n\n    def add_row(self, metadata=None):\n        \"\"\"\n        Adds a new row to this :class:`PopulationTable` and returns the ID of the\n        corresponding population. Metadata, if specified, will be validated and encoded\n        according to the table's\n        :attr:`metadata_schema<tskit.PopulationTable.metadata_schema>`.\n\n        :param object metadata: Any object that is valid metadata for the table's schema.\n            Defaults to the default metadata value for the table's schema. This is\n            typically ``{}``. For no schema, ``None``.\n        :return: The ID of the newly added population.\n        :rtype: int\n        \"\"\"\n        if metadata is None:\n            metadata = self.metadata_schema.empty_value\n        metadata = self.metadata_schema.validate_and_encode_row(metadata)\n        return self.ll_table.add_row(metadata=metadata)\n\n    def set_columns(self, metadata=None, metadata_offset=None, metadata_schema=None):\n        \"\"\"\n        Sets the values for each column in this :class:`PopulationTable` using the\n        values in the specified arrays. Overwrites existing data in all the table\n        columns.\n\n        The ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information) and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        :param metadata_schema: The encoded metadata schema. If None (default)\n            do not overwrite the exising schema. Note that a schema will need to be\n            encoded as a string, e.g. via ``repr(new_metadata_schema)``.\n        :type metadata_schema: str\n        \"\"\"\n        self.ll_table.set_columns(\n            dict(\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n                metadata_schema=metadata_schema,\n            )\n        )\n\n    def append_columns(self, metadata=None, metadata_offset=None):\n        \"\"\"\n        Appends the specified arrays to the end of the columns of this\n        :class:`PopulationTable`. This allows many new rows to be added at once.\n\n        The ``metadata`` and ``metadata_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information) and\n        :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.\n\n        :param metadata: The flattened metadata array. Must be specified along\n            with ``metadata_offset``. If not specified or None, an empty metadata\n            value is stored for each node.\n        :type metadata: numpy.ndarray, dtype=np.int8\n        :param metadata_offset: The offsets into the ``metadata`` array.\n        :type metadata_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.append_columns(\n            dict(metadata=metadata, metadata_offset=metadata_offset)\n        )\n\n\nclass ProvenanceTable(MutableBaseTable):\n    \"\"\"\n    A table recording the provenance (i.e., history) of this table, so that the\n    origin of the underlying data and sequence of subsequent operations can be\n    traced. Each row contains a \"record\" string (recommended format: JSON) and\n    a timestamp.\n\n    .. todo::\n        The format of the `record` field will be more precisely specified in\n        the future.\n\n    :ivar record: The flattened array containing the record strings.\n        :ref:`sec_tables_api_text_columns` for more details.\n    :vartype record: numpy.ndarray, dtype=np.int8\n    :ivar record_offset: The array of offsets into the record column. See\n        :ref:`sec_tables_api_text_columns` for more details.\n    :vartype record_offset: numpy.ndarray, dtype=np.uint32\n    :ivar timestamp: The flattened array containing the timestamp strings.\n        :ref:`sec_tables_api_text_columns` for more details.\n    :vartype timestamp: numpy.ndarray, dtype=np.int8\n    :ivar timestamp_offset: The array of offsets into the timestamp column. See\n        :ref:`sec_tables_api_text_columns` for more details.\n    :vartype timestamp_offset: numpy.ndarray, dtype=np.uint32\n    \"\"\"\n\n    table_name = \"provenances\"\n    column_names = [\"record\", \"record_offset\", \"timestamp\", \"timestamp_offset\"]\n\n    def __init__(self, max_rows_increment=0, ll_table=None):\n        if ll_table is None:\n            ll_table = _tskit.ProvenanceTable(max_rows_increment=max_rows_increment)\n        super().__init__(ll_table, ProvenanceTableRow)\n\n    def add_row(self, record, timestamp=None):\n        \"\"\"\n        Adds a new row to this ProvenanceTable consisting of the specified record and\n        timestamp. If timestamp is not specified, it is automatically generated from\n        the current time.\n\n        :param str record: A provenance record, describing the parameters and\n            environment used to generate the current set of tables.\n        :param str timestamp: A string timestamp. This should be in ISO8601 form.\n        \"\"\"\n        if timestamp is None:\n            timestamp = datetime.datetime.now().isoformat()\n        # Note that the order of the positional arguments has been reversed\n        # from the low-level module, which is a bit confusing. However, we\n        # want the default behaviour here to be to add a row to the table at\n        # the current time as simply as possible.\n        return self.ll_table.add_row(record=record, timestamp=timestamp)\n\n    def set_columns(\n        self, timestamp=None, timestamp_offset=None, record=None, record_offset=None\n    ):\n        \"\"\"\n        Sets the values for each column in this :class:`ProvenanceTable` using the\n        values in the specified arrays. Overwrites existing data in all the table\n        columns.\n\n        The ``timestamp`` and ``timestamp_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information). Likewise\n        for the ``record`` and ``record_offset`` columns\n\n        :param timestamp: The flattened timestamp array. Must be specified along\n            with ``timestamp_offset``. If not specified or None, an empty timestamp\n            value is stored for each node.\n        :type timestamp: numpy.ndarray, dtype=np.int8\n        :param timestamp_offset: The offsets into the ``timestamp`` array.\n        :type timestamp_offset: numpy.ndarray, dtype=np.uint32.\n        :param record: The flattened record array. Must be specified along\n            with ``record_offset``. If not specified or None, an empty record\n            value is stored for each node.\n        :type record: numpy.ndarray, dtype=np.int8\n        :param record_offset: The offsets into the ``record`` array.\n        :type record_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.set_columns(\n            dict(\n                timestamp=timestamp,\n                timestamp_offset=timestamp_offset,\n                record=record,\n                record_offset=record_offset,\n            )\n        )\n\n    def append_columns(\n        self, timestamp=None, timestamp_offset=None, record=None, record_offset=None\n    ):\n        \"\"\"\n        Appends the specified arrays to the end of the columns of this\n        :class:`ProvenanceTable`. This allows many new rows to be added at once.\n\n        The ``timestamp`` and ``timestamp_offset`` parameters must be supplied\n        together, and meet the requirements for\n        :ref:`sec_encoding_ragged_columns` (see\n        :ref:`sec_tables_api_binary_columns` for more information). Likewise\n        for the ``record`` and ``record_offset`` columns\n\n        :param timestamp: The flattened timestamp array. Must be specified along\n            with ``timestamp_offset``. If not specified or None, an empty timestamp\n            value is stored for each node.\n        :type timestamp: numpy.ndarray, dtype=np.int8\n        :param timestamp_offset: The offsets into the ``timestamp`` array.\n        :type timestamp_offset: numpy.ndarray, dtype=np.uint32.\n        :param record: The flattened record array. Must be specified along\n            with ``record_offset``. If not specified or None, an empty record\n            value is stored for each node.\n        :type record: numpy.ndarray, dtype=np.int8\n        :param record_offset: The offsets into the ``record`` array.\n        :type record_offset: numpy.ndarray, dtype=np.uint32.\n        \"\"\"\n        self.ll_table.append_columns(\n            dict(\n                timestamp=timestamp,\n                timestamp_offset=timestamp_offset,\n                record=record,\n                record_offset=record_offset,\n            )\n        )\n\n    def packset_record(self, records):\n        \"\"\"\n        Packs the specified list of record values and updates the\n        ``record`` and ``record_offset`` columns. The length\n        of the records array must be equal to the number of rows in\n        the table.\n\n        :param list(str) records: A list of string record values.\n        \"\"\"\n        packed, offset = util.pack_strings(records)\n        d = self.asdict()\n        d[\"record\"] = packed\n        d[\"record_offset\"] = offset\n        self.set_columns(**d)\n\n    def packset_timestamp(self, timestamps):\n        \"\"\"\n        Packs the specified list of timestamp values and updates the\n        ``timestamp`` and ``timestamp_offset`` columns. The length\n        of the timestamps array must be equal to the number of rows in\n        the table.\n\n        :param list(str) timestamps: A list of string timestamp values.\n        \"\"\"\n        packed, offset = util.pack_strings(timestamps)\n        d = self.asdict()\n        d[\"timestamp\"] = packed\n        d[\"timestamp_offset\"] = offset\n        self.set_columns(**d)\n\n    def equals(self, other, ignore_timestamps=False):\n        \"\"\"\n        Returns True if  `self` and `other` are equal. By default, two provenance\n        tables are considered equal if their columns are byte-for-byte identical.\n\n        :param other: Another provenance table instance\n        :param bool ignore_timestamps: If True exclude the timestamp column\n            from the comparison.\n        :return: True if other is equal to this provenance table; False otherwise.\n        :rtype: bool\n        \"\"\"\n        return self._equals_internal(other, ignore_timestamps=ignore_timestamps)\n\n    def assert_equals(self, other, *, ignore_timestamps=False):\n        \"\"\"\n        Raise an AssertionError for the first found difference between\n        this and another provenance table.\n\n        :param other: Another provenance table instance\n        :param bool ignore_timestamps: If True exclude the timestamp column\n            from the comparison.\n        \"\"\"\n        self._assert_equals_internal(other, ignore_timestamps=ignore_timestamps)\n\n\n# We define segment ordering by (left, right, node) tuples\n@dataclasses.dataclass(eq=True, order=True)\nclass IdentitySegment:\n    \"\"\"\n    A single segment of identity by descent spanning a genomic interval\n    for a specific ancestor node.\n    \"\"\"\n\n    left: float\n    \"\"\"The left genomic coordinate (inclusive).\"\"\"\n    right: float\n    \"\"\"The right genomic coordinate (exclusive).\"\"\"\n    node: int\n    \"\"\"The ID of the most recent common ancestor node.\"\"\"\n\n    @property\n    def span(self) -> float:\n        \"\"\"\n        The length of the genomic region spanned by this identity segment.\n        \"\"\"\n        return self.right - self.left\n\n\nclass IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):\n    \"\"\"\n    A summary of identity-by-descent segments for some pair of samples in a\n    :class:`.IdentitySegments` result. If the ``store_segments`` argument\n    has been specified to :meth:`.TreeSequence.ibd_segments`, this class\n    can be treated as a sequence of :class:`.IdentitySegment` objects.\n\n    Access to the segment data via numpy arrays is also available via\n    the :attr:`.IdentitySegmentList.left`, :attr:`.IdentitySegmentList.right`\n    and :attr:`.IdentitySegmentList.node` attributes.\n\n    If ``store_segments`` is False, only the overall summary values\n    such as :attr:`.IdentitySegmentList.total_span` and ``len()`` are\n    available. Attempting to iterate over the list or access per-segment\n    arrays (``left``, ``right``, or ``node``) in this case will raise an\n    ``IdentitySegmentsNotStoredError``.\n\n    .. warning:: The order of segments within an IdentitySegmentList is\n        arbitrary and may change in the future\n\n    \"\"\"\n\n    def __init__(self, ll_segment_list):\n        self._ll_segment_list = ll_segment_list\n\n    def __iter__(self):\n        for left, right, node in zip(self.left, self.right, self.node):\n            yield IdentitySegment(float(left), float(right), int(node))\n\n    def __len__(self):\n        return self._ll_segment_list.num_segments\n\n    def __str__(self):\n        return (\n            f\"IdentitySegmentList(num_segments={len(self)}, \"\n            f\"total_span={self.total_span})\"\n        )\n\n    def __repr__(self):\n        return f\"IdentitySegmentList({repr(list(self))})\"\n\n    def __eq__(self, other):\n        if not isinstance(other, IdentitySegmentList):\n            return False\n        return list(self) == list(other)\n\n    @property\n    def total_span(self):\n        \"\"\"\n        The total genomic span covered by segments in this list. Equal to\n        ``sum(seg.span for seg in seglst)``.\n        \"\"\"\n        return self._ll_segment_list.total_span\n\n    @property\n    def left(self):\n        \"\"\"\n        A numpy array (dtype=np.float64) of the ``left`` coordinates of segments.\n        \"\"\"\n        return self._ll_segment_list.left\n\n    @property\n    def right(self):\n        \"\"\"\n        A numpy array (dtype=np.float64) of the ``right`` coordinates of segments.\n        \"\"\"\n        return self._ll_segment_list.right\n\n    @property\n    def node(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) of the MRCA node IDs in segments.\n        \"\"\"\n        return self._ll_segment_list.node\n\n\nclass IdentitySegments(collections.abc.Mapping):\n    \"\"\"\n    A class summarising and optionally storing the segments of identity\n    by descent returned by :meth:`.TreeSequence.ibd_segments`. See the\n    :ref:`sec_identity` for more information and examples.\n\n    Along with the documented methods and attributes, the class supports\n    the Python mapping protocol, and can be regarded as a dictionary\n    mapping sample node pair tuples to the corresponding\n    :class:`.IdentitySegmentList`.\n\n    .. note:: It is important to note that the facilities available\n       for a given instance of this class are determined by the\n       ``store_pairs`` and ``store_segments`` arguments provided to\n       :meth:`.TreeSequence.ibd_segments`. For example, attempting\n       to access per-sample pair information (such as indexing with\n       ``[(a, b)]``, iterating over the mapping, or accessing\n       :attr:`.IdentitySegments.pairs`) if ``store_pairs`` is False will\n       result in an ``IdentityPairsNotStoredError`` being raised.\n\n    .. warning:: This class should not be instantiated directly.\n    \"\"\"\n\n    def __init__(self, ll_result, *, max_time, min_span, store_segments, store_pairs):\n        self._ll_identity_segments = ll_result\n        self.max_time = max_time\n        self.min_span = min_span\n        self.store_segments = store_segments\n        self.store_pairs = store_pairs\n\n    @property\n    def num_segments(self):\n        \"\"\"\n        The total number of identity segments found.\n        \"\"\"\n        return self._ll_identity_segments.num_segments\n\n    @property\n    def num_pairs(self):\n        \"\"\"\n        The total number of distinct sample pairs for which identity\n        segments were found. (Only available when ``store_pairs`` or\n        ``store_segments`` is specified).\n        \"\"\"\n        return self._ll_identity_segments.num_pairs\n\n    @property\n    def total_span(self):\n        \"\"\"\n        The total genomic sequence length spanned by all identity\n        segments that were found.\n        \"\"\"\n        return self._ll_identity_segments.total_span\n\n    @property\n    def pairs(self):\n        \"\"\"\n        A numpy array with shape ``(segs.num_pairs, 2)`` and dtype=np.int32\n        containing the sample pairs for which IBD segments were found.\n        \"\"\"\n        return self._ll_identity_segments.get_keys()\n\n    # We have two different versions of repr - one where we list out the segments\n    # for debugging, and the other that just shows the standard representation.\n    # We could have repr fail if store_segments isn't true, but then printing,\n    # e.g., a list of IdentitySegments objects would fail unexpectedly.\n    def __repr__(self):\n        if self.store_segments:\n            return f\"IdentitySegments({dict(self)})\"\n        return super().__repr__()\n\n    def __str__(self):\n        # TODO it would be nice to add horizontal lines as\n        # table separators to distinguish the two parts of the\n        # table like suggested here:\n        # https://github.com/tskit-dev/tskit/pull/1902#issuecomment-989943424\n        rows = [\n            [\"Parameters:\", \"\"],\n            [\"max_time\", str(self.max_time)],\n            [\"min_span\", str(self.min_span)],\n            [\"store_pairs\", str(self.store_pairs)],\n            [\"store_segments\", str(self.store_segments)],\n            [\"Results:\", \"\"],\n            [\"num_segments\", str(self.num_segments)],\n            [\"total_span\", str(self.total_span)],\n        ]\n        if self.store_pairs:\n            rows.append([\"num_pairs\", str(len(self))])\n        return util.unicode_table(rows, title=\"IdentitySegments\", row_separator=False)\n\n    def __getitem__(self, key):\n        sample_a, sample_b = key\n        return IdentitySegmentList(self._ll_identity_segments.get(sample_a, sample_b))\n\n    def __iter__(self):\n        return map(tuple, self._ll_identity_segments.get_keys())\n\n    def __len__(self):\n        return self.num_pairs\n\n\n# TODO move to reference_sequence.py when we start adding more functionality.\nclass ReferenceSequence(metadata.MetadataProvider):\n    \"\"\"\n    The :ref:`reference sequence<sec_data_model_reference_sequence>` associated\n    with a given :class:`.TableCollection` or :class:`.TreeSequence`.\n\n    Metadata concerning reference sequences can be described using the\n    :attr:`.metadata_schema` and stored in the :attr:`.metadata` attribute.\n    See the :ref:`examples<sec_metadata_examples_reference_sequence>` for\n    idiomatic usage.\n\n    .. warning:: This API is preliminary and currently only supports accessing\n       reference sequence information via the ``.data`` attribute. Future versions\n       will also enable transparent fetching of known reference sequences\n       from a URL (see https://github.com/tskit-dev/tskit/issues/2022).\n    \"\"\"\n\n    def __init__(self, ll_reference_sequence):\n        super().__init__(ll_reference_sequence)\n        self._ll_reference_sequence = ll_reference_sequence\n\n    def is_null(self) -> bool:\n        \"\"\"\n        Returns True if this :class:`.ReferenceSequence` is null, i.e.,\n        all fields are empty.\n        \"\"\"\n        return bool(self._ll_reference_sequence.is_null())\n\n    def clear(self):\n        self.data = \"\"\n        self.url = \"\"\n        self.metadata_schema = tskit.MetadataSchema(None)\n        self.metadata = b\"\"\n\n    # https://github.com/tskit-dev/tskit/issues/1984\n    # TODO add a __str__ method\n    # TODO add a _repr_html_\n    # FIXME This is a shortcut, we want to put the values in explicitly\n    # here to get more control over how they are displayed.\n    def __repr__(self):\n        return f\"ReferenceSequence({repr(self.asdict())})\"\n\n    @property\n    def data(self) -> str:\n        \"\"\"\n        The string encoding of the reference sequence such that ``data[j]``\n        represents the reference nucleotide at base ``j``. If this reference\n        sequence is writable, the value can be assigned, e.g.\n        ``tables.reference_sequence.data = \"ACGT\"``\n        \"\"\"\n        return self._ll_reference_sequence.data\n\n    @data.setter\n    def data(self, value):\n        self._ll_reference_sequence.data = value\n\n    @property\n    def url(self) -> str:\n        return self._ll_reference_sequence.url\n\n    @url.setter\n    def url(self, value):\n        self._ll_reference_sequence.url = value\n\n    def asdict(self) -> dict:\n        return {\n            \"metadata_schema\": repr(self.metadata_schema),\n            \"metadata\": self.metadata_bytes,\n            \"data\": self.data,\n            \"url\": self.url,\n        }\n\n    def __eq__(self, other):\n        return self.equals(other)\n\n    def equals(self, other, ignore_metadata=False):\n        try:\n            self.assert_equals(other, ignore_metadata)\n            return True\n        except AssertionError:\n            return False\n\n    def assert_equals(self, other, ignore_metadata=False):\n        if not ignore_metadata:\n            super().assert_equals(other)\n\n        if self.data != other.data:\n            raise AssertionError(\n                f\"Reference sequence data differs: self={self.data} other={other.data}\"\n            )\n        if self.url != other.url:\n            raise AssertionError(\n                f\"Reference sequence url differs: self={self.url} other={other.url}\"\n            )\n\n    @property\n    def nbytes(self):\n        # TODO this will be inefficient when we work with large references.\n        # Make a dedicated low-level method for getting the length of data.\n        return super().nbytes + len(self.url) + len(self.data)\n\n\nclass TableCollection(metadata.MetadataProvider):\n    \"\"\"\n    A collection of mutable tables defining a tree sequence. See the\n    :ref:`sec_data_model` section for definition on the various tables\n    and how they together define a :class:`TreeSequence`. Arbitrary\n    data can be stored in a TableCollection, but there are certain\n    :ref:`requirements <sec_valid_tree_sequence_requirements>` that must be\n    satisfied for these tables to be interpreted as a tree sequence.\n\n    To obtain an immutable :class:`TreeSequence` instance corresponding to the\n    current state of a ``TableCollection``, please use the :meth:`.tree_sequence`\n    method.\n    \"\"\"\n\n    set_err_text = (\n        \"Cannot set tables in a table collection: use table.replace_with() instead.\"\n    )\n\n    def __init__(self, sequence_length=0, *, ll_tables=None):\n        self._ll_tables = ll_tables\n        if ll_tables is None:\n            self._ll_tables = _tskit.TableCollection(sequence_length)\n        super().__init__(self._ll_tables)\n        self._individuals = IndividualTable(ll_table=self._ll_tables.individuals)\n        self._nodes = NodeTable(ll_table=self._ll_tables.nodes)\n        self._edges = EdgeTable(ll_table=self._ll_tables.edges)\n        self._migrations = MigrationTable(ll_table=self._ll_tables.migrations)\n        self._sites = SiteTable(ll_table=self._ll_tables.sites)\n        self._mutations = MutationTable(ll_table=self._ll_tables.mutations)\n        self._populations = PopulationTable(ll_table=self._ll_tables.populations)\n        self._provenances = ProvenanceTable(ll_table=self._ll_tables.provenances)\n\n    @property\n    def individuals(self) -> IndividualTable:\n        \"\"\"\n        The :ref:`sec_individual_table_definition` in this collection.\n        \"\"\"\n        return self._individuals\n\n    @individuals.setter\n    def individuals(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def nodes(self) -> NodeTable:\n        \"\"\"\n        The :ref:`sec_node_table_definition` in this collection.\n        \"\"\"\n        return self._nodes\n\n    @nodes.setter\n    def nodes(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def edges(self) -> EdgeTable:\n        \"\"\"\n        The :ref:`sec_edge_table_definition` in this collection.\n        \"\"\"\n        return self._edges\n\n    @edges.setter\n    def edges(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def migrations(self) -> MigrationTable:\n        \"\"\"\n        The :ref:`sec_migration_table_definition` in this collection\n        \"\"\"\n        return self._migrations\n\n    @migrations.setter\n    def migrations(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def sites(self) -> SiteTable:\n        \"\"\"\n        The :ref:`sec_site_table_definition` in this collection.\n        \"\"\"\n        return self._sites\n\n    @sites.setter\n    def sites(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def mutations(self) -> MutationTable:\n        \"\"\"\n        The :ref:`sec_mutation_table_definition` in this collection.\n        \"\"\"\n        return self._mutations\n\n    @mutations.setter\n    def mutations(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def populations(self) -> PopulationTable:\n        \"\"\"\n        The :ref:`sec_population_table_definition` in this collection.\n        \"\"\"\n        return self._populations\n\n    @populations.setter\n    def populations(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def provenances(self) -> ProvenanceTable:\n        \"\"\"\n        The :ref:`sec_provenance_table_definition` in this collection.\n        \"\"\"\n        return self._provenances\n\n    @provenances.setter\n    def provenances(self, value):\n        raise AttributeError(self.set_err_text)\n\n    @property\n    def indexes(self) -> TableCollectionIndexes:\n        \"\"\"\n        The edge insertion and removal indexes.\n        \"\"\"\n        indexes = self._ll_tables.indexes\n        return TableCollectionIndexes(**indexes)\n\n    @indexes.setter\n    def indexes(self, indexes):\n        self._ll_tables.indexes = indexes.asdict()\n\n    @property\n    def sequence_length(self) -> float:\n        \"\"\"\n        The sequence length defining the coordinate space.\n        \"\"\"\n        return self._ll_tables.sequence_length\n\n    @sequence_length.setter\n    def sequence_length(self, sequence_length):\n        self._ll_tables.sequence_length = sequence_length\n\n    @property\n    def file_uuid(self) -> str:\n        \"\"\"\n        The UUID for the file this TableCollection is derived\n        from, or None if not derived from a file.\n        \"\"\"\n        return self._ll_tables.file_uuid\n\n    @property\n    def time_units(self) -> str:\n        \"\"\"\n        The units used for the time dimension of this TableCollection\n        \"\"\"\n        return self._ll_tables.time_units\n\n    @time_units.setter\n    def time_units(self, time_units: str) -> None:\n        self._ll_tables.time_units = time_units\n\n    def has_reference_sequence(self):\n        \"\"\"\n        Returns True if this :class:`.TableCollection` has an associated\n        :ref:`reference sequence<sec_data_model_reference_sequence>`.\n        \"\"\"\n        return bool(self._ll_tables.has_reference_sequence())\n\n    @property\n    def reference_sequence(self):\n        \"\"\"\n        The :class:`.ReferenceSequence` associated with this :class:`.TableCollection`.\n\n        .. note:: Note that the behaviour of this attribute differs from\n            :attr:`.TreeSequence.reference_sequence` in that we return a valid\n            instance of :class:`.ReferenceSequence` even when\n            :attr:`.TableCollection.has_reference_sequence` is False. This is\n            to allow us to update the state of the reference sequence.\n        \"\"\"\n        # NOTE: arguably we should cache the reference to this object\n        # during init, rather than creating a new instance each time.\n        # However, following the pattern of the Table classes for now\n        # for consistency.\n        return ReferenceSequence(self._ll_tables.reference_sequence)\n\n    @reference_sequence.setter\n    def reference_sequence(self, value: ReferenceSequence):\n        self.reference_sequence.metadata_schema = value.metadata_schema\n        self.reference_sequence.metadata = value.metadata\n        self.reference_sequence.data = value.data\n        self.reference_sequence.url = value.url\n\n    def asdict(self, force_offset_64=False):\n        \"\"\"\n        Returns the nested dictionary representation of this TableCollection\n        used for interchange.\n\n        Note: the semantics of this method changed at tskit 0.1.0. Previously a\n        map of table names to the tables themselves was returned.\n\n        :param bool force_offset_64: If True, all offset columns will have dtype\n            np.uint64. If False (the default) the offset array columns will have\n            a dtype of either np.uint32 or np.uint64, depending on the size of the\n            corresponding data array.\n        :return: The dictionary representation of this table collection.\n        :rtype: dict\n        \"\"\"\n        return self._ll_tables.asdict(force_offset_64)\n\n    @property\n    def table_name_map(self) -> dict:\n        \"\"\"\n        Returns a dictionary mapping table names to the corresponding\n        table instances. For example, the returned dictionary will contain the\n        key \"edges\" that maps to an :class:`.EdgeTable` instance.\n        \"\"\"\n        return {\n            \"edges\": self.edges,\n            \"individuals\": self.individuals,\n            \"migrations\": self.migrations,\n            \"mutations\": self.mutations,\n            \"nodes\": self.nodes,\n            \"populations\": self.populations,\n            \"provenances\": self.provenances,\n            \"sites\": self.sites,\n        }\n\n    @property\n    def name_map(self) -> dict:\n        # Deprecated in 0.4.1\n        warnings.warn(\n            \"name_map is deprecated; use table_name_map instead\",\n            FutureWarning,\n            stacklevel=4,\n        )\n        return self.table_name_map\n\n    @property\n    def nbytes(self) -> int:\n        \"\"\"\n        Returns the total number of bytes required to store the data\n        in this table collection. Note that this may not be equal to\n        the actual memory footprint.\n        \"\"\"\n        return sum(\n            (\n                8,  # sequence_length takes 8 bytes\n                super().nbytes,  # metadata\n                len(self.time_units.encode()),\n                self.indexes.nbytes,\n                self.reference_sequence.nbytes,\n                sum(table.nbytes for table in self.table_name_map.values()),\n            )\n        )\n\n    def __str__(self):\n        \"\"\"\n        Return a plain text summary of this TableCollection\n        \"\"\"\n        return \"\\n\".join(\n            [\n                \"TableCollection\",\n                \"\",\n                f\"Sequence Length: {self.sequence_length}\",\n                f\"Time units: {self.time_units}\",\n                f\"Metadata: {self.metadata}\",\n                \"\",\n                \"Individuals\",\n                str(self.individuals),\n                \"Nodes\",\n                str(self.nodes),\n                \"Edges\",\n                str(self.edges),\n                \"Sites\",\n                str(self.sites),\n                \"Mutations\",\n                str(self.mutations),\n                \"Migrations\",\n                str(self.migrations),\n                \"Populations\",\n                str(self.populations),\n                \"Provenances\",\n                str(self.provenances),\n            ]\n        )\n\n    def equals(\n        self,\n        other,\n        *,\n        ignore_metadata=False,\n        ignore_ts_metadata=False,\n        ignore_provenance=False,\n        ignore_timestamps=False,\n        ignore_tables=False,\n        ignore_reference_sequence=False,\n    ):\n        \"\"\"\n        Returns True if  `self` and `other` are equal. By default, two table\n        collections are considered equal if their\n\n        - ``sequence_length`` properties are identical;\n        - top-level tree sequence metadata and metadata schemas are\n          byte-wise identical;\n        - constituent tables are byte-wise identical.\n\n        Some of the requirements in this definition can be relaxed using the\n        parameters, which can be used to remove certain parts of the data model\n        from the comparison.\n\n        Table indexes are not considered in the equality comparison.\n\n        :param TableCollection other: Another table collection.\n        :param bool ignore_metadata: If True *all* metadata and metadata schemas\n            will be excluded from the comparison. This includes the top-level\n            tree sequence and constituent table metadata (default=False).\n        :param bool ignore_ts_metadata: If True the top-level tree sequence\n            metadata and metadata schemas will be excluded from the comparison.\n            If ``ignore_metadata`` is True, this parameter has no effect.\n        :param bool ignore_provenance: If True the provenance tables are\n            not included in the comparison.\n        :param bool ignore_timestamps: If True the provenance timestamp column\n            is ignored in the comparison. If ``ignore_provenance`` is True, this\n            parameter has no effect.\n        :param bool ignore_tables: If True no tables are included in the\n            comparison, thus comparing only the top-level information.\n        :param bool ignore_reference_sequence: If True the reference sequence\n            is not included in the comparison.\n        :return: True if other is equal to this table collection; False otherwise.\n        :rtype: bool\n        \"\"\"\n        if self is other:\n            return True\n\n        ret = False\n        if type(other) is type(self):\n            ret = bool(\n                self._ll_tables.equals(\n                    other._ll_tables,\n                    ignore_metadata=bool(ignore_metadata),\n                    ignore_ts_metadata=bool(ignore_ts_metadata),\n                    ignore_provenance=bool(ignore_provenance),\n                    ignore_timestamps=bool(ignore_timestamps),\n                    ignore_tables=bool(ignore_tables),\n                    ignore_reference_sequence=bool(ignore_reference_sequence),\n                )\n            )\n        elif hasattr(other, \"_llts\") and not hasattr(other, \"_ll_tables\"):\n            ret = other.equals(\n                self,\n                ignore_metadata=ignore_metadata,\n                ignore_ts_metadata=ignore_ts_metadata,\n                ignore_provenance=ignore_provenance,\n                ignore_timestamps=ignore_timestamps,\n                ignore_tables=ignore_tables,\n                ignore_reference_sequence=ignore_reference_sequence,\n            )\n        return ret\n\n    def assert_equals(\n        self,\n        other,\n        *,\n        ignore_metadata=False,\n        ignore_ts_metadata=False,\n        ignore_provenance=False,\n        ignore_timestamps=False,\n        ignore_tables=False,\n        ignore_reference_sequence=False,\n    ):\n        \"\"\"\n        Raise an AssertionError for the first found difference between\n        this and another table collection. Note that table indexes are not checked.\n\n        :param other: Another table collection (TableCollection or\n            ImmutableTableCollection).\n        :param bool ignore_metadata: If True *all* metadata and metadata schemas\n            will be excluded from the comparison. This includes the top-level\n            tree sequence and constituent table metadata (default=False).\n        :param bool ignore_ts_metadata: If True the top-level tree sequence\n            metadata and metadata schemas will be excluded from the comparison.\n            If ``ignore_metadata`` is True, this parameter has no effect.\n        :param bool ignore_provenance: If True the provenance tables are\n            not included in the comparison.\n        :param bool ignore_timestamps: If True the provenance timestamp column\n            is ignored in the comparison. If ``ignore_provenance`` is True, this\n            parameter has no effect.\n        :param bool ignore_tables: If True no tables are included in the\n            comparison, thus comparing only the top-level information.\n        :param bool ignore_reference_sequence: If True the reference sequence\n            is not included in the comparison.\n        \"\"\"\n        # Check using the low-level method to avoid slowly going through everything\n        if type(other) is type(self) and self.equals(\n            other,\n            ignore_metadata=ignore_metadata,\n            ignore_ts_metadata=ignore_ts_metadata,\n            ignore_provenance=ignore_provenance,\n            ignore_timestamps=ignore_timestamps,\n            ignore_tables=ignore_tables,\n            ignore_reference_sequence=ignore_reference_sequence,\n        ):\n            return\n\n        valid_types = (TableCollection, ImmutableTableCollection)\n        if not isinstance(other, valid_types):\n            raise AssertionError(f\"Types differ: self={type(self)} other={type(other)}\")\n\n        _assert_table_collections_equal(\n            self,\n            other,\n            ignore_metadata=ignore_metadata,\n            ignore_ts_metadata=ignore_ts_metadata,\n            ignore_provenance=ignore_provenance,\n            ignore_timestamps=ignore_timestamps,\n            ignore_tables=ignore_tables,\n            ignore_reference_sequence=ignore_reference_sequence,\n        )\n\n    def __eq__(self, other):\n        return self.equals(other)\n\n    def __getstate__(self):\n        return self.asdict()\n\n    @classmethod\n    def load(cls, file_or_path, *, skip_tables=False, skip_reference_sequence=False):\n        \"\"\"\n        Load a :class:`TableCollection` from a file or path, saved in the format defined\n        by :meth:`.dump`. Although the file must be in the correct format, unlike\n        :func:`tskit.load` further validation is not performed to check that the tables\n        meet the :ref:`sec_valid_tree_sequence_requirements`. For instance,\n        unsorted tables can be loaded without error and then sorted before\n        :meth:`creating <TableCollection.tree_sequence>` a :class:`TreeSequence`.\n\n        :param file_or_path: The file object or path from which to load the\n            TableCollection.\n        :param bool skip_tables: If True, no tables are read from the file and\n            only the top-level information is populated in the returned TableCollection.\n        :param bool skip_reference_sequence: If True, tables are read\n            without loading any reference sequence.\n        :return: A TableCollection instance\n        :rtype: TableCollection\n        :raises: **tskit.FileFormatError** -- If the file is not in a valid format.\n        \"\"\"\n        file, local_file = util.convert_file_like_to_open_file(file_or_path, \"rb\")\n        ll_tc = _tskit.TableCollection()\n        try:\n            ll_tc.load(\n                file,\n                skip_tables=skip_tables,\n                skip_reference_sequence=skip_reference_sequence,\n            )\n            return TableCollection(ll_tables=ll_tc)\n        except tskit.FileFormatError as e:\n            util.raise_known_file_format_errors(file, e)\n        finally:\n            if local_file:\n                file.close()\n\n    def dump(self, file_or_path):\n        \"\"\"\n        Writes the table collection to the specified path or file object.\n\n        :param str file_or_path: The file object or path to which to write this\n            TableCollection.\n        \"\"\"\n        file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n        try:\n            self._ll_tables.dump(file)\n        finally:\n            if local_file:\n                file.close()\n\n    # Unpickle support\n    def __setstate__(self, state):\n        self.__init__()\n        self._ll_tables.fromdict(state)\n\n    @staticmethod\n    def fromdict(tables_dict):\n        ll_tc = _tskit.TableCollection()\n        ll_tc.fromdict(tables_dict)\n        return TableCollection(ll_tables=ll_tc)\n\n    def copy(self):\n        \"\"\"\n        Returns a deep copy of this TableCollection.\n\n        :return: A deep copy of this TableCollection.\n        :rtype: tskit.TableCollection\n        \"\"\"\n        return TableCollection.fromdict(self.asdict())\n\n    def tree_sequence(self):\n        \"\"\"\n        Returns a :class:`TreeSequence` instance from the tables defined in this\n        :class:`TableCollection`, building the required indexes if they have not yet\n        been created by :meth:`.build_index`. If the table collection does not meet\n        the :ref:`sec_valid_tree_sequence_requirements`, for example if the tables\n        are not correctly sorted or if they cannot be interpreted as a tree sequence,\n        an exception is raised. Note that in the former case, the :meth:`.sort`\n        method may be used to ensure that sorting requirements are met.\n\n        :return: A :class:`TreeSequence` instance reflecting the structures\n            defined in this set of tables.\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        if not self.has_index():\n            self.build_index()\n        return tskit.TreeSequence.load_tables(self)\n\n    def simplify(\n        self,\n        samples=None,\n        *,\n        reduce_to_site_topology=False,\n        filter_populations=None,\n        filter_individuals=None,\n        filter_sites=None,\n        filter_nodes=None,\n        update_sample_flags=None,\n        keep_unary=False,\n        keep_unary_in_individuals=None,\n        keep_input_roots=False,\n        record_provenance=True,\n        filter_zero_mutation_sites=None,  # Deprecated alias for filter_sites\n    ):\n        \"\"\"\n        Simplifies the tables in place to retain only the information necessary\n        to reconstruct the tree sequence describing the given ``samples``.\n        If ``filter_nodes`` is True (the default), this can change the ID of\n        the nodes, so that the node ``samples[k]`` will have ID ``k`` in the\n        result, resulting in a NodeTable where only the first ``len(samples)``\n        nodes are marked as samples. The mapping from node IDs in the current\n        set of tables to their equivalent values in the simplified tables is\n        returned as a numpy array. If an array ``a`` is returned by this\n        function and ``u`` is the ID of a node in the input table, then\n        ``a[u]`` is the ID of this node in the output table. For any node ``u``\n        that is not mapped into the output tables, this mapping will equal\n        ``tskit.NULL`` (``-1``).\n\n        Tables operated on by this function must: be sorted (see\n        :meth:`TableCollection.sort`), have children be born strictly after their\n        parents, and the intervals on which any node is a child must be\n        disjoint. Other than this the tables need not satisfy remaining\n        requirements to specify a valid tree sequence (but the resulting tables\n        will).\n\n        .. note::\n            To invert the returned ``node_map``, that is, to obtain a reverse\n            mapping from the node ID in the output table to the node ID in\n            the input table, you can use::\n\n                rev_map = np.zeros_like(node_map, shape=simplified_ts.num_nodes)\n                kept = node_map != tskit.NULL\n                rev_map[node_map[kept]] = np.arange(len(node_map))[kept]\n\n            In this case, no elements of the ``rev_map`` array will be set to\n            ``tskit.NULL``.\n\n        .. seealso::\n            This is identical to :meth:`TreeSequence.simplify` but acts *in place* to\n            alter the data in this :class:`TableCollection`. Please see the\n            :meth:`TreeSequence.simplify` method for a description of the remaining\n            parameters.\n\n        :param list[int] samples: A list of node IDs to retain as samples. They\n            need not be nodes marked as samples in the original tree sequence, but\n            will constitute the entire set of samples in the returned tree sequence.\n            If not specified or None, use all nodes marked with the IS_SAMPLE flag.\n            The list may be provided as a numpy array (or array-like) object\n            (dtype=np.int32).\n        :param bool reduce_to_site_topology: Whether to reduce the topology down\n            to the trees that are present at sites. (Default: False).\n        :param bool filter_populations: If True, remove any populations that are\n            not referenced by nodes after simplification; new population IDs are\n            allocated sequentially from zero. If False, the population table will\n            not be altered in any way. (Default: None, treated as True)\n        :param bool filter_individuals: If True, remove any individuals that are\n            not referenced by nodes after simplification; new individual IDs are\n            allocated sequentially from zero. If False, the individual table will\n            not be altered in any way. (Default: None, treated as True)\n        :param bool filter_sites: If True, remove any sites that are\n            not referenced by mutations after simplification; new site IDs are\n            allocated sequentially from zero. If False, the site table will not\n            be altered in any way. (Default: None, treated as True)\n        :param bool filter_nodes: If True, remove any nodes that are\n            not referenced by edges after simplification. If False, the only\n            potential change to the node table may be to change the node flags\n            (if ``samples`` is specified and different from the existing samples).\n            (Default: None, treated as True)\n        :param bool update_sample_flags: If True, update node flags to so that\n            nodes in the specified list of samples have the NODE_IS_SAMPLE\n            flag after simplification, and nodes that are not in this list\n            do not. (Default: None, treated as True)\n        :param bool keep_unary: If True, preserve unary nodes (i.e. nodes with\n            exactly one child) that exist on the path from samples to root.\n            (Default: False)\n        :param bool keep_unary_in_individuals: If True, preserve unary nodes\n            that exist on the path from samples to root, but only if they are\n            associated with an individual in the individuals table. Cannot be\n            specified at the same time as ``keep_unary``. (Default: ``None``,\n            equivalent to False)\n        :param bool keep_input_roots: Whether to retain history ancestral to the\n            MRCA of the samples. If ``False``, no topology older than the MRCAs of the\n            samples will be included. If ``True`` the roots of all trees in the returned\n            tree sequence will be the same roots as in the original tree sequence.\n            (Default: False)\n        :param bool record_provenance: If True, record details of this call to\n            simplify in the returned tree sequence's provenance information\n            (Default: True).\n        :param bool filter_zero_mutation_sites: Deprecated alias for ``filter_sites``.\n        :return: A numpy array mapping node IDs in the input tables to their\n            corresponding node IDs in the output tables.\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        if filter_zero_mutation_sites is not None:\n            # Deprecated in msprime 0.6.1.\n            warnings.warn(\n                \"filter_zero_mutation_sites is deprecated; use filter_sites instead\",\n                FutureWarning,\n                stacklevel=4,\n            )\n            filter_sites = filter_zero_mutation_sites\n        if samples is None:\n            flags = self.nodes.flags\n            samples = np.where(np.bitwise_and(flags, _tskit.NODE_IS_SAMPLE) != 0)[\n                0\n            ].astype(np.int32)\n        else:\n            samples = util.safe_np_int_cast(samples, np.int32)\n        if filter_populations is None:\n            filter_populations = True\n        if filter_individuals is None:\n            filter_individuals = True\n        if filter_sites is None:\n            filter_sites = True\n        if filter_nodes is None:\n            filter_nodes = True\n        if update_sample_flags is None:\n            update_sample_flags = True\n        if keep_unary_in_individuals is None:\n            keep_unary_in_individuals = False\n\n        node_map = self._ll_tables.simplify(\n            samples,\n            filter_sites=filter_sites,\n            filter_individuals=filter_individuals,\n            filter_populations=filter_populations,\n            filter_nodes=filter_nodes,\n            update_sample_flags=update_sample_flags,\n            reduce_to_site_topology=reduce_to_site_topology,\n            keep_unary=keep_unary,\n            keep_unary_in_individuals=keep_unary_in_individuals,\n            keep_input_roots=keep_input_roots,\n        )\n        if record_provenance:\n            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n            # TODO also make sure we convert all the arguments so that they are\n            # definitely JSON encodable.\n            parameters = {\"command\": \"simplify\", \"TODO\": \"add simplify parameters\"}\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n        return node_map\n\n    def link_ancestors(self, samples, ancestors):\n        \"\"\"\n        Returns an :class:`EdgeTable` instance describing a subset of the genealogical\n        relationships between the nodes in ``samples`` and ``ancestors``.\n\n        Each row ``parent, child, left, right`` in the output table indicates that\n        ``child`` has inherited the segment ``[left, right)`` from ``parent`` more\n        recently than from any other node in these lists.\n\n        In particular, suppose ``samples`` is a list of nodes such that ``time`` is 0\n        for each node, and ``ancestors`` is a list of nodes such that ``time`` is\n        greater than 0.0 for each node. Then each row of the output table will show\n        an interval ``[left, right)`` over which a node in ``samples`` has inherited\n        most recently from a node in ``ancestors``, or an interval over which one of\n        these ``ancestors`` has inherited most recently from another node in\n        ``ancestors``.\n\n        The following table shows which ``parent->child`` pairs will be shown in the\n        output of ``link_ancestors``.\n        A node is a relevant descendant on a given interval if it also appears somewhere\n        in the ``parent`` column of the outputted table.\n\n        ========================  ===============================================\n        Type of relationship      Shown in output of ``link_ancestors``\n        ------------------------  -----------------------------------------------\n        ``ancestor->sample``      Always\n        ``ancestor1->ancestor2``  Only if ``ancestor2`` has a relevant descendant\n        ``sample1->sample2``      Always\n        ``sample->ancestor``      Only if ``ancestor`` has a relevant descendant\n        ========================  ===============================================\n\n        The difference between ``samples`` and ``ancestors`` is that information about\n        the ancestors of a node in ``ancestors`` will only be retained if it also has a\n        relevant descendant, while information about the ancestors of a node in\n        ``samples`` will always be retained.\n        The node IDs in ``parent`` and ``child`` refer to the IDs in the node table\n        of the inputted tree sequence.\n\n        The supplied nodes must be non-empty lists of the node IDs in the tree sequence:\n        in particular, they do not have to be *samples* of the tree sequence. The lists\n        of ``samples`` and ``ancestors`` may overlap, although adding a node from\n        ``samples`` to ``ancestors`` will not change the output. So, setting ``samples``\n        and ``ancestors`` to the same list of nodes will find all genealogical\n        relationships within this list.\n\n        If none of the nodes in ``ancestors`` or ``samples`` are ancestral to ``samples``\n        anywhere in the tree sequence, an empty table will be returned.\n\n        :param list[int] samples: A list of node IDs to retain as samples.\n        :param list[int] ancestors: A list of node IDs to use as ancestors.\n        :return: An :class:`EdgeTable` instance displaying relationships between\n            the `samples` and `ancestors`.\n        \"\"\"\n        samples = util.safe_np_int_cast(samples, np.int32)\n        ancestors = util.safe_np_int_cast(ancestors, np.int32)\n        ll_edge_table = self._ll_tables.link_ancestors(samples, ancestors)\n        return EdgeTable(ll_table=ll_edge_table)\n\n    def map_ancestors(self, *args, **kwargs):\n        # A deprecated alias for link_ancestors()\n        return self.link_ancestors(*args, **kwargs)\n\n    def sort(self, edge_start=0, *, site_start=0, mutation_start=0):\n        \"\"\"\n        Sorts the tables in place. This ensures that all tree sequence ordering\n        requirements listed in the\n        :ref:`sec_valid_tree_sequence_requirements` section are met, as long\n        as each site has at most one mutation (see below).\n\n        If the ``edge_start`` parameter is provided, this specifies the index\n        in the edge table where sorting should start. Only rows with index\n        greater than or equal to ``edge_start`` are sorted; rows before this index\n        are not affected. This parameter is provided to allow for efficient sorting\n        when the user knows that the edges up to a given index are already sorted.\n\n        If both ``site_start`` and ``mutation_start`` are equal to the number of rows\n        in their retrospective tables then neither is sorted. Note that a partial\n        non-sorting is not possible, and both or neither must be skipped.\n\n        The node, individual, population and provenance tables are not affected\n        by this method.\n\n        Edges are sorted as follows:\n\n        - time of parent, then\n        - parent node ID, then\n        - child node ID, then\n        - left endpoint.\n\n        Note that this sorting order exceeds the\n        :ref:`edge sorting requirements <sec_edge_requirements>` for a valid\n        tree sequence. For a valid tree sequence, we require that all edges for a\n        given parent ID are adjacent, but we do not require that they be listed in\n        sorted order.\n\n        Sites are sorted by position, and sites with the same position retain\n        their relative ordering.\n\n        Mutations are sorted by site, then time (if known), then the mutation's\n        node's time, then number of descendant mutations (ensuring that parent\n        mutations occur before children), then node, then original order in the\n        tables.\n\n        Migrations are sorted by ``time``, ``source``, ``dest``, ``left`` and\n        ``node`` values. This defines a total sort order, such that any permutation\n        of a valid migration table will be sorted into the same output order.\n        Note that this sorting order exceeds the\n        :ref:`migration sorting requirements <sec_migration_requirements>` for a\n        valid tree sequence, which only requires that migrations are sorted by\n        time value.\n\n        :param int edge_start: The index in the edge table where sorting starts\n            (default=0; must be <= len(edges)).\n        :param int site_start: The index in the site table where sorting starts\n            (default=0; must be one of [0, len(sites)]).\n        :param int mutation_start: The index in the mutation table where sorting starts\n            (default=0; must be one of [0, len(mutations)]).\n        \"\"\"\n        self._ll_tables.sort(edge_start, site_start, mutation_start)\n        # TODO add provenance\n\n    def sort_individuals(self):\n        \"\"\"\n        Sorts the individual table in place, so that parents come before children,\n        and the parent column is remapped as required. Node references to individuals\n        are also updated. This is a stricter order than is required for a valid tree\n        sequence.\n        \"\"\"\n        self._ll_tables.sort_individuals()\n        # TODO add provenance\n\n    def canonicalise(self, remove_unreferenced=None):\n        \"\"\"\n        This puts the tables in *canonical* form, imposing a stricter order on the\n        tables than :ref:`required <sec_valid_tree_sequence_requirements>` for\n        a valid tree sequence. In particular, the population table is sorted to\n        place populations with the lowest node IDs first, and the individual table\n        is sorted firstly as in :meth:`.sort_individuals` and secondarily\n        by the lowest ID of the nodes that refer to each individual\n        (see :meth:`TreeSequence.subset`). The remaining tables are sorted\n        as in :meth:`.sort`, with the modification that mutations are sorted by\n        site, then time (if known), then the mutation's node's time, then number\n        of descendant mutations (ensuring that parent mutations occur before\n        children), then node, then original order in the tables. This ensures\n        that any two tables with the same information\n        and node order should be identical after canonical sorting (note\n        that no canonical order exists for the node table).\n\n        By default, the method removes sites, individuals, and populations that\n        are not referenced (by mutations and nodes, respectively). If you wish\n        to keep these, pass ``remove_unreferenced=False``, but note that\n        unreferenced individuals and populations are put at the end of the tables\n        in their original order.\n\n        .. seealso::\n\n            :meth:`.sort` for sorting edges, mutations, and sites, and\n            :meth:`.subset` for reordering nodes, individuals, and populations.\n\n        :param bool remove_unreferenced: Whether to remove unreferenced sites,\n            individuals, and populations (default=True).\n        \"\"\"\n        remove_unreferenced = (\n            True if remove_unreferenced is None else remove_unreferenced\n        )\n        self._ll_tables.canonicalise(remove_unreferenced=remove_unreferenced)\n        # TODO add provenance\n\n    def compute_mutation_parents(self):\n        \"\"\"\n        Modifies the tables in place, computing the ``parent`` column of the\n        mutation table. For this to work, the node and edge tables must be\n        valid, and the site and mutation tables must be sorted (see\n        :meth:`TableCollection.sort`).  This will produce an error if mutations\n        are not sorted (i.e., if a mutation appears before its mutation parent)\n        *unless* the two mutations occur on the same branch, and have unknown times\n        in which case there is no way to detect the error.\n\n        The ``parent`` of a given mutation is the ID of the next mutation\n        encountered traversing the tree upwards from that mutation, or\n        ``NULL`` if there is no such mutation.\n        \"\"\"\n        self._ll_tables.compute_mutation_parents()\n        # TODO add provenance\n\n    def compute_mutation_times(self):\n        \"\"\"\n        Modifies the tables in place, computing valid values for the ``time`` column of\n        the mutation table. For this to work, the node and edge tables must be\n        valid, and the site and mutation tables must be sorted and indexed(see\n        :meth:`TableCollection.sort` and :meth:`TableCollection.build_index`).\n\n        For a single mutation on an edge at a site, the ``time`` assigned to a mutation\n        by this method is the mid-point between the times of the nodes above and below\n        the mutation. In the case where there is more than one mutation on an edge for\n        a site, the times are evenly spread along the edge. For mutations that are\n        above a root node, the time of the root node is assigned.\n\n        The mutation table will be sorted if the new times mean that the original order\n        is no longer valid.\n\n        \"\"\"\n        self._ll_tables.compute_mutation_times()\n        # TODO add provenance\n\n    def deduplicate_sites(self):\n        \"\"\"\n        Modifies the tables in place, removing entries in the site table with\n        duplicate ``position`` (and keeping only the *first* entry for each\n        site), and renumbering the ``site`` column of the mutation table\n        appropriately.  This requires the site table to be sorted by position.\n\n        .. warning:: This method does not sort the tables afterwards, so\n            mutations may no longer be sorted by time.\n        \"\"\"\n        self._ll_tables.deduplicate_sites()\n        # TODO add provenance\n\n    def delete_sites(self, site_ids, record_provenance=True):\n        \"\"\"\n        Remove the specified sites entirely from the sites and mutations tables in this\n        collection. This is identical to :meth:`TreeSequence.delete_sites` but acts\n        *in place* to alter the data in this :class:`TableCollection`.\n\n        :param list[int] site_ids: A list of site IDs specifying the sites to remove.\n        :param bool record_provenance: If ``True``, add details of this operation\n            to the provenance table in this TableCollection. (Default: ``True``).\n        \"\"\"\n        keep_sites = np.ones(len(self.sites), dtype=bool)\n        site_ids = util.safe_np_int_cast(site_ids, np.int32)\n        if np.any(site_ids < 0) or np.any(site_ids >= len(self.sites)):\n            raise ValueError(\"Site ID out of bounds\")\n        keep_sites[site_ids] = 0\n        new_as, new_as_offset = keep_with_offset(\n            keep_sites, self.sites.ancestral_state, self.sites.ancestral_state_offset\n        )\n        new_md, new_md_offset = keep_with_offset(\n            keep_sites, self.sites.metadata, self.sites.metadata_offset\n        )\n        self.sites.set_columns(\n            position=self.sites.position[keep_sites],\n            ancestral_state=new_as,\n            ancestral_state_offset=new_as_offset,\n            metadata=new_md,\n            metadata_offset=new_md_offset,\n        )\n        # We also need to adjust the mutations table, as it references into sites\n        keep_mutations = keep_sites[self.mutations.site]\n        new_ds, new_ds_offset = keep_with_offset(\n            keep_mutations,\n            self.mutations.derived_state,\n            self.mutations.derived_state_offset,\n        )\n        new_md, new_md_offset = keep_with_offset(\n            keep_mutations, self.mutations.metadata, self.mutations.metadata_offset\n        )\n        # Site numbers will have changed\n        site_map = np.cumsum(keep_sites, dtype=self.mutations.site.dtype) - 1\n        # Mutation numbers will change, so the parent references need altering\n        mutation_map = np.cumsum(keep_mutations, dtype=self.mutations.parent.dtype) - 1\n        # Map parent == -1 to -1, and check this has worked (assumes tskit.NULL == -1)\n        mutation_map = np.append(mutation_map, -1).astype(self.mutations.parent.dtype)\n        assert mutation_map[tskit.NULL] == tskit.NULL\n        self.mutations.set_columns(\n            site=site_map[self.mutations.site[keep_mutations]],\n            node=self.mutations.node[keep_mutations],\n            time=self.mutations.time[keep_mutations],\n            derived_state=new_ds,\n            derived_state_offset=new_ds_offset,\n            parent=mutation_map[self.mutations.parent[keep_mutations]],\n            metadata=new_md,\n            metadata_offset=new_md_offset,\n        )\n        if record_provenance:\n            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n            parameters = {\"command\": \"delete_sites\", \"TODO\": \"add parameters\"}\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def delete_intervals(self, intervals, simplify=True, record_provenance=True):\n        \"\"\"\n        Delete all information from this set of tables which lies *within* the\n        specified list of genomic intervals. This is identical to\n        :meth:`TreeSequence.delete_intervals` but acts *in place* to alter\n        the data in this :class:`TableCollection`.\n\n        :param array_like intervals: A list (start, end) pairs describing the\n            genomic intervals to delete. Intervals must be non-overlapping and\n            in increasing order. The list of intervals must be interpretable as a\n            2D numpy array with shape (N, 2), where N is the number of intervals.\n        :param bool simplify: If True, run simplify on the tables so that nodes\n            no longer used are discarded. (Default: True).\n        :param bool record_provenance: If ``True``, add details of this operation\n            to the provenance table in this TableCollection. (Default: ``True``).\n        \"\"\"\n        self.keep_intervals(\n            util.negate_intervals(intervals, 0, self.sequence_length),\n            simplify=simplify,\n            record_provenance=False,\n        )\n        if record_provenance:\n            parameters = {\"command\": \"delete_intervals\", \"TODO\": \"add parameters\"}\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def keep_intervals(self, intervals, simplify=True, record_provenance=True):\n        \"\"\"\n        Delete all information from this set of tables which lies *outside* the\n        specified list of genomic intervals. This is identical to\n        :meth:`TreeSequence.keep_intervals` but acts *in place* to alter\n        the data in this :class:`TableCollection`.\n\n        :param array_like intervals: A list (start, end) pairs describing the\n            genomic intervals to keep. Intervals must be non-overlapping and\n            in increasing order. The list of intervals must be interpretable as a\n            2D numpy array with shape (N, 2), where N is the number of intervals.\n        :param bool simplify: If True, run simplify on the tables so that nodes\n            no longer used are discarded. Must be ``False`` if input tree sequence\n            includes migrations. (Default: True).\n        :param bool record_provenance: If ``True``, add details of this operation\n            to the provenance table in this TableCollection. (Default: ``True``).\n        \"\"\"\n        intervals = util.intervals_to_np_array(intervals, 0, self.sequence_length)\n\n        edges = self.edges.copy()\n        self.edges.clear()\n        migrations = self.migrations.copy()\n        self.migrations.clear()\n        keep_sites = np.repeat(False, self.sites.num_rows)\n        for s, e in intervals:\n            curr_keep_sites = np.logical_and(\n                self.sites.position >= s, self.sites.position < e\n            )\n            keep_sites = np.logical_or(keep_sites, curr_keep_sites)\n            keep_edges = np.logical_not(np.logical_or(edges.right <= s, edges.left >= e))\n            metadata, metadata_offset = keep_with_offset(\n                keep_edges, edges.metadata, edges.metadata_offset\n            )\n            self.edges.append_columns(\n                left=np.fmax(s, edges.left[keep_edges]),\n                right=np.fmin(e, edges.right[keep_edges]),\n                parent=edges.parent[keep_edges],\n                child=edges.child[keep_edges],\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n            keep_migrations = np.logical_not(\n                np.logical_or(migrations.right <= s, migrations.left >= e)\n            )\n            metadata, metadata_offset = keep_with_offset(\n                keep_migrations, migrations.metadata, migrations.metadata_offset\n            )\n            self.migrations.append_columns(\n                left=np.fmax(s, migrations.left[keep_migrations]),\n                right=np.fmin(e, migrations.right[keep_migrations]),\n                node=migrations.node[keep_migrations],\n                source=migrations.source[keep_migrations],\n                dest=migrations.dest[keep_migrations],\n                time=migrations.time[keep_migrations],\n                metadata=metadata,\n                metadata_offset=metadata_offset,\n            )\n        self.delete_sites(\n            np.where(np.logical_not(keep_sites))[0], record_provenance=False\n        )\n\n        self.sort()\n        if simplify:\n            self.simplify(record_provenance=False)\n        if record_provenance:\n            parameters = {\"command\": \"keep_intervals\", \"TODO\": \"add parameters\"}\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def _check_trim_conditions(self):\n        if self.has_reference_sequence():\n            raise ValueError(\n                \"Cannot trim if there is a reference sequence. Please remove the \"\n                \"reference sequence by calling `.reference_sequence.clear()` first.\"\n            )\n        if self.migrations.num_rows > 0:\n            if (np.min(self.migrations.left) < np.min(self.edges.left)) and (\n                np.max(self.migrations.right) > np.max(self.edges.right)\n            ):\n                raise ValueError(\n                    \"Cannot trim a tree sequence with migrations which exist to the\"\n                    \"left of the leftmost edge or to the right of the rightmost edge.\"\n                )\n        if self.edges.num_rows == 0:\n            raise ValueError(\n                \"Trimming a tree sequence with no edges would reduce the sequence length\"\n                \" to zero, which is not allowed\"\n            )\n\n    def ltrim(self, record_provenance=True):\n        \"\"\"\n        Reset the coordinate system used in these tables, changing the left and right\n        genomic positions in the edge table such that the leftmost edge now starts at\n        position 0. This is identical to :meth:`TreeSequence.ltrim` but acts *in place*\n        to alter the data in this :class:`TableCollection`.\n\n        :param bool record_provenance: If ``True``, add details of this operation\n            to the provenance table in this TableCollection. (Default: ``True``).\n        \"\"\"\n        self._check_trim_conditions()\n        leftmost = np.min(self.edges.left)\n        self.delete_sites(\n            np.where(self.sites.position < leftmost), record_provenance=False\n        )\n        self.edges.set_columns(\n            left=self.edges.left - leftmost,\n            right=self.edges.right - leftmost,\n            parent=self.edges.parent,\n            child=self.edges.child,\n        )\n        self.sites.set_columns(\n            position=self.sites.position - leftmost,\n            ancestral_state=self.sites.ancestral_state,\n            ancestral_state_offset=self.sites.ancestral_state_offset,\n            metadata=self.sites.metadata,\n            metadata_offset=self.sites.metadata_offset,\n        )\n        self.migrations.set_columns(\n            left=self.migrations.left - leftmost,\n            right=self.migrations.right - leftmost,\n            time=self.migrations.time,\n            node=self.migrations.node,\n            source=self.migrations.source,\n            dest=self.migrations.dest,\n        )\n        self.sequence_length = self.sequence_length - leftmost\n        if record_provenance:\n            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n            parameters = {\n                \"command\": \"ltrim\",\n            }\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def rtrim(self, record_provenance=True):\n        \"\"\"\n        Reset the ``sequence_length`` property so that the sequence ends at the end of\n        the last edge. This is identical to :meth:`TreeSequence.rtrim` but acts\n        *in place* to alter the data in this :class:`TableCollection`.\n\n        :param bool record_provenance: If ``True``, add details of this operation\n            to the provenance table in this TableCollection. (Default: ``True``).\n        \"\"\"\n        self._check_trim_conditions()\n        rightmost = np.max(self.edges.right)\n        self.delete_sites(\n            np.where(self.sites.position >= rightmost), record_provenance=False\n        )\n        self.sequence_length = rightmost\n        if record_provenance:\n            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n            parameters = {\n                \"command\": \"rtrim\",\n            }\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def trim(self, record_provenance=True):\n        \"\"\"\n        Trim away any empty regions on the right and left of the tree sequence encoded by\n        these tables. This is identical to :meth:`TreeSequence.trim` but acts *in place*\n        to alter the data in this :class:`TableCollection`.\n\n        :param bool record_provenance: If ``True``, add details of this operation\n            to the provenance table in this TableCollection. (Default: ``True``).\n        \"\"\"\n        self.rtrim(record_provenance=False)\n        self.ltrim(record_provenance=False)\n        if record_provenance:\n            # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243\n            parameters = {\n                \"command\": \"trim\",\n            }\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def shift(self, value, *, sequence_length=None, record_provenance=True):\n        \"\"\"\n        Shift the coordinate system (used by edges, sites, and migrations) of this\n        TableCollection by a given value. This is identical to :meth:`TreeSequence.shift`\n        but acts *in place* to alter the data in this :class:`TableCollection`.\n\n        .. note::\n            No attempt is made to check that the new coordinate system or sequence length\n            is valid: if you wish to do this, use {meth}`TreeSequence.shift` instead.\n\n        :param value: The amount by which to shift the coordinate system.\n        :param sequence_length: The new sequence length of the tree sequence. If\n            ``None`` (default) add `value` to the sequence length.\n        \"\"\"\n        if self.has_reference_sequence():\n            raise ValueError(\n                \"Cannot shift if there is a reference sequence. Please remove the \"\n                \"reference sequence by calling `.reference_sequence.clear()` first.\"\n            )\n        self.drop_index()\n        self.edges.left += value\n        self.edges.right += value\n        self.migrations.left += value\n        self.migrations.right += value\n        self.sites.position += value\n        if sequence_length is None:\n            self.sequence_length += value\n        else:\n            self.sequence_length = sequence_length\n        if record_provenance:\n            parameters = {\n                \"command\": \"shift\",\n                \"value\": value,\n                \"sequence_length\": sequence_length,\n            }\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def delete_older(self, time):\n        \"\"\"\n        Deletes edge, mutation and migration information at least as old as\n        the specified time.\n\n        .. seealso:: This method is similar to the higher-level\n            :meth:`TreeSequence.decapitate` method, which also splits\n            edges that intersect with the given time.\n            :meth:`TreeSequence.decapitate`\n            is more useful for most purposes, and may be what\n            you need instead of this method!\n\n        For the purposes of this method, an edge covers the times from the\n        child node up until the *parent* node, so that any any edge with parent\n        node time > ``time`` will be removed.\n\n        Any mutation whose time is >= ``time`` will be removed. A mutation's time\n        is its associated ``time`` value, or the time of its node if the\n        mutation's time was marked as unknown (:data:`UNKNOWN_TIME`).\n\n        Any migration with time >= ``time`` will be removed.\n\n        The node table is not affected by this operation.\n\n        .. note:: This method does not have any specific sorting requirements\n            and will maintain mutation parent mappings.\n\n        :param float time: The cutoff time.\n        \"\"\"\n        self._ll_tables.delete_older(time)\n\n    def clear(\n        self,\n        clear_provenance=False,\n        clear_metadata_schemas=False,\n        clear_ts_metadata_and_schema=False,\n    ):\n        \"\"\"\n        Remove all rows of the data tables, optionally remove provenance, metadata\n        schemas and ts-level metadata.\n\n        :param bool clear_provenance: If ``True``, remove all rows of the provenance\n            table. (Default: ``False``).\n        :param bool clear_metadata_schemas: If ``True``, clear the table metadata\n            schemas. (Default: ``False``).\n        :param bool clear_ts_metadata_and_schema: If ``True``, clear the tree-sequence\n            level metadata and schema (Default: ``False``).\n        \"\"\"\n        self._ll_tables.clear(\n            clear_provenance=clear_provenance,\n            clear_metadata_schemas=clear_metadata_schemas,\n            clear_ts_metadata_and_schema=clear_ts_metadata_and_schema,\n        )\n\n    def has_index(self):\n        \"\"\"\n        Returns True if this TableCollection is indexed. See :ref:`sec_table_indexes`\n        for information on indexes.\n        \"\"\"\n        return bool(self._ll_tables.has_index())\n\n    def build_index(self):\n        \"\"\"\n        Builds an index on this TableCollection. Any existing indexes are automatically\n        dropped.  See :ref:`sec_table_indexes` for information on indexes.\n        \"\"\"\n        self._ll_tables.build_index()\n\n    def drop_index(self):\n        \"\"\"\n        Drops any indexes present on this table collection. If the tables are not\n        currently indexed this method has no effect.  See :ref:`sec_table_indexes`\n        for information on indexes.\n        \"\"\"\n        self._ll_tables.drop_index()\n\n    def subset(\n        self,\n        nodes,\n        record_provenance=True,\n        *,\n        reorder_populations=None,\n        remove_unreferenced=None,\n    ):\n        \"\"\"\n        Modifies the tables in place to contain only the entries referring to\n        the provided list of node IDs, with nodes reordered according to the\n        order they appear in the list. Other tables are :meth:`sorted <sort>`\n        to conform to the :ref:`sec_valid_tree_sequence_requirements`, and\n        additionally sorted as described in the documentation for the equivalent\n        tree sequence method :meth:`TreeSequence.subset`: please see this for more\n        detail.\n\n        :param list nodes: The list of nodes for which to retain information. This\n            may be a numpy array (or array-like) object (dtype=np.int32).\n        :param bool record_provenance: Whether to record a provenance entry\n            in the provenance table for this operation.\n        :param bool reorder_populations: Whether to reorder the population table\n            (default: True).  If False, the population table will not be altered\n            in any way.\n        :param bool remove_unreferenced: Whether sites, individuals, and populations\n            that are not referred to by any retained entries in the tables should\n            be removed (default: True). See the description for details.\n        \"\"\"\n        reorder_populations = (\n            True if reorder_populations is None else reorder_populations\n        )\n        remove_unreferenced = (\n            True if remove_unreferenced is None else remove_unreferenced\n        )\n        nodes = util.safe_np_int_cast(nodes, np.int32)\n        self._ll_tables.subset(\n            nodes,\n            reorder_populations=reorder_populations,\n            remove_unreferenced=remove_unreferenced,\n        )\n        self.sort()\n        if record_provenance:\n            parameters = {\"command\": \"subset\", \"nodes\": nodes.tolist()}\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def union(\n        self,\n        other,\n        node_mapping,\n        check_shared_equality=True,\n        add_populations=True,\n        record_provenance=True,\n        *,\n        all_edges=False,\n        all_mutations=False,\n    ):\n        \"\"\"\n        Modifies the table collection in place by adding the non-shared\n        portions of ``other`` to itself. To perform the node-wise union,\n        the method relies on a ``node_mapping`` array, that maps nodes in\n        ``other`` to its equivalent node in ``self`` or ``tskit.NULL`` if\n        the node is exclusive to ``other``. See :meth:`TreeSequence.union` for a more\n        detailed description.\n\n        :param TableCollection other: Another table collection.\n        :param list node_mapping: An array of node IDs that relate nodes in\n            ``other`` to nodes in ``self``: the k-th element of ``node_mapping``\n            should be the index of the equivalent node in ``self``, or\n            ``tskit.NULL`` if the node is not present in ``self`` (in which case it\n            will be added to self).\n        :param bool check_shared_equality: If True, the shared portions of the\n            table collections will be checked for equality.\n        :param bool add_populations: If True, nodes new to ``self`` will be\n            assigned new population IDs.\n        :param bool record_provenance: Whether to record a provenance entry\n            in the provenance table for this operation.\n        :param bool all_edges: If True, then all edges in ``other`` are added\n            to ``self``.\n        :param bool all_mutations: If True, then all mutations in ``other`` are added\n            to ``self``.\n        \"\"\"\n        node_mapping = util.safe_np_int_cast(node_mapping, np.int32)\n        self._ll_tables.union(\n            other._ll_tables,\n            node_mapping,\n            check_shared_equality=check_shared_equality,\n            add_populations=add_populations,\n            all_edges=all_edges,\n            all_mutations=all_mutations,\n        )\n        if record_provenance:\n            other_records = [prov.record for prov in other.provenances]\n            other_timestamps = [prov.timestamp for prov in other.provenances]\n            parameters = {\n                \"command\": \"union\",\n                \"other\": {\"timestamp\": other_timestamps, \"record\": other_records},\n                \"node_mapping\": node_mapping.tolist(),\n            }\n            self.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n    def ibd_segments(\n        self,\n        *,\n        within=None,\n        between=None,\n        max_time=None,\n        min_span=None,\n        store_pairs=None,\n        store_segments=None,\n    ):\n        \"\"\"\n        Equivalent to the :meth:`TreeSequence.ibd_segments` method; please see its\n        documentation for more details, and use this method only if you specifically need\n        to work with a :class:`TableCollection` object.\n\n        This method has the same data requirements as\n        :meth:`TableCollection.simplify`. In particular, the tables in the collection\n        have :ref:`required <sec_valid_tree_sequence_requirements>` sorting orders.\n        To enforce this, you can call :meth:`TableCollection.sort` before using this\n        method. If the edge table contains any edges with identical\n        parents and children over adjacent genomic intervals, any IBD intervals\n        underneath the edges will also be split across the breakpoint(s). To prevent this\n        behaviour in this situation, use :meth:`EdgeTable.squash` beforehand.\n\n        :param list within: As for the :meth:`TreeSequence.ibd_segments` method.\n        :param list[list] between: As for the :meth:`TreeSequence.ibd_segments` method.\n        :param float max_time: As for the :meth:`TreeSequence.ibd_segments` method.\n        :param float min_span: As for the :meth:`TreeSequence.ibd_segments` method.\n        :param bool store_pairs: As for the :meth:`TreeSequence.ibd_segments` method.\n        :param bool store_segments: As for the :meth:`TreeSequence.ibd_segments` method.\n        :return: An :class:`.IdentitySegments` object containing the recorded\n            IBD information.\n        :rtype: IdentitySegments\n        \"\"\"\n        max_time = np.inf if max_time is None else max_time\n        min_span = 0 if min_span is None else min_span\n        store_pairs = False if store_pairs is None else store_pairs\n        store_segments = False if store_segments is None else store_segments\n        if within is not None and between is not None:\n            raise ValueError(\n                \"The ``within`` and ``between`` arguments are mutually exclusive\"\n            )\n        if between is not None:\n            sample_set_sizes = np.array(\n                [len(sample_set) for sample_set in between], dtype=np.uint64\n            )\n            # hstack has some annoying quirks around its handling of empty\n            # lists which we need to work around. In a way it would be more\n            # convenient to detect these conditions as errors, but then we\n            # end up having to workaround edge cases in the tests and its\n            # mathematically neater this way.\n            pre_flattened = [lst for lst in between if len(lst) > 0]\n            if len(pre_flattened) == 0:\n                flattened = []\n            else:\n                flattened = util.safe_np_int_cast(np.hstack(pre_flattened), np.int32)\n            ll_result = self._ll_tables.ibd_segments_between(\n                sample_set_sizes=sample_set_sizes,\n                sample_sets=flattened,\n                max_time=max_time,\n                min_span=min_span,\n                store_pairs=store_pairs,\n                store_segments=store_segments,\n            )\n        else:\n            if within is not None:\n                within = util.safe_np_int_cast(within, np.int32)\n            ll_result = self._ll_tables.ibd_segments_within(\n                samples=within,\n                max_time=max_time,\n                min_span=min_span,\n                store_pairs=store_pairs,\n                store_segments=store_segments,\n            )\n        return IdentitySegments(\n            ll_result,\n            max_time=max_time,\n            min_span=min_span,\n            store_pairs=store_pairs,\n            store_segments=store_segments,\n        )\n\n\nclass ImmutableNodeTable(ImmutableMetadataTable):\n    table_name = \"nodes\"\n    mutable_class = NodeTable\n\n    column_names = [\n        \"time\",\n        \"flags\",\n        \"population\",\n        \"individual\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n\nclass ImmutableIndividualTable(ImmutableMetadataTable):\n    table_name = \"individuals\"\n    mutable_class = IndividualTable\n\n    _row_field_indices = (0, 1, 2, 3)\n\n    column_names = [\n        \"flags\",\n        \"location\",\n        \"location_offset\",\n        \"parents\",\n        \"parents_offset\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n\nclass ImmutableEdgeTable(ImmutableMetadataTable):\n    table_name = \"edges\"\n    mutable_class = EdgeTable\n\n    column_names = [\n        \"left\",\n        \"right\",\n        \"parent\",\n        \"child\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n\nclass ImmutableMigrationTable(ImmutableMetadataTable):\n    table_name = \"migrations\"\n    mutable_class = MigrationTable\n\n    column_names = [\n        \"left\",\n        \"right\",\n        \"node\",\n        \"source\",\n        \"dest\",\n        \"time\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n\nclass ImmutableSiteTable(ImmutableMetadataTable):\n    table_name = \"sites\"\n    mutable_class = SiteTable\n\n    _row_field_indices = (0, 1, 4)\n\n    column_names = [\n        \"position\",\n        \"ancestral_state\",\n        \"ancestral_state_offset\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n\nclass ImmutableMutationTable(ImmutableMetadataTable):\n    table_name = \"mutations\"\n    mutable_class = MutationTable\n\n    _row_field_indices = (0, 1, 2, 3, 4, 5)\n\n    column_names = [\n        \"site\",\n        \"node\",\n        \"time\",\n        \"derived_state\",\n        \"derived_state_offset\",\n        \"parent\",\n        \"metadata\",\n        \"metadata_offset\",\n    ]\n\n\nclass ImmutablePopulationTable(ImmutableMetadataTable):\n    table_name = \"populations\"\n    mutable_class = PopulationTable\n\n    column_names = [\"metadata\", \"metadata_offset\"]\n\n\nclass ImmutableProvenanceTable(ImmutableBaseTable):\n    table_name = \"provenances\"\n    mutable_class = ProvenanceTable\n\n    column_names = [\n        \"record\",\n        \"record_offset\",\n        \"timestamp\",\n        \"timestamp_offset\",\n    ]\n\n    def equals(self, other, ignore_timestamps=False):\n        return self._equals_internal(other, ignore_timestamps=bool(ignore_timestamps))\n\n    def assert_equals(self, other, *, ignore_timestamps=False):\n        if ignore_timestamps and getattr(self, \"table_name\", None) != \"provenances\":\n            raise ValueError(\"ignore_timestamps is only valid for Provenance tables\")\n        self._assert_equals_internal(other, ignore_timestamps=bool(ignore_timestamps))\n\n\nclass ImmutableTableCollection(metadata.MetadataProvider):\n    \"\"\"\n    An immutable view of a table collection backed by a :class:`TreeSequence`.\n    Provides zero-copy read access to all table data without allowing mutation.\n\n    This class is returned by :attr:`TreeSequence.tables` and provides efficient,\n    read-only access to the underlying table data. Since it's backed directly by\n    the low-level TreeSequence representation, no copying of data is required.\n\n    All methods from TableCollection that do not mutate the data are reflected here.\n\n    To obtain a mutable copy of this table collection, use the :meth:`.copy`\n    method which returns a :class:`TableCollection` instance that can be modified.\n    Alternatively, use :meth:`TreeSequence.dump_tables` to get a mutable copy\n    directly from the tree sequence.\n\n    All mutator methods present on :class:`TableCollection` (such as ``sort()``,\n    ``simplify()``, ``clear()``, etc.) will raise an :class:`ImmutableTableError`\n    if called on an immutable table collection.\n    \"\"\"\n\n    def __init__(self, ll_tree_sequence):\n        object.__setattr__(self, \"_initialised\", False)\n        self._llts = ll_tree_sequence\n        super().__init__(ll_tree_sequence)\n\n        # Create immutable table views - lazy initialization could be added later\n        self.individuals = ImmutableIndividualTable(ll_tree_sequence)\n        self.nodes = ImmutableNodeTable(ll_tree_sequence)\n        self.edges = ImmutableEdgeTable(ll_tree_sequence)\n        self.migrations = ImmutableMigrationTable(ll_tree_sequence)\n        self.sites = ImmutableSiteTable(ll_tree_sequence)\n        self.mutations = ImmutableMutationTable(ll_tree_sequence)\n        self.populations = ImmutablePopulationTable(ll_tree_sequence)\n        self.provenances = ImmutableProvenanceTable(ll_tree_sequence)\n        object.__setattr__(self, \"_initialised\", True)\n\n    @property\n    def sequence_length(self):\n        return self._llts.get_sequence_length()\n\n    @property\n    def file_uuid(self):\n        return self._llts.get_file_uuid()\n\n    @property\n    def time_units(self):\n        return self._llts.get_time_units()\n\n    @property\n    def reference_sequence(self):\n        return ReferenceSequence(self._llts.reference_sequence)\n\n    @property\n    def metadata_schema(self):\n        return metadata.parse_metadata_schema(self._llts.get_metadata_schema())\n\n    @property\n    def metadata(self):\n        return self.metadata_schema.decode_row(self.metadata_bytes)\n\n    @property\n    def metadata_bytes(self):\n        return self._llts.get_metadata()\n\n    @property\n    def table_name_map(self):\n        return {\n            \"edges\": self.edges,\n            \"individuals\": self.individuals,\n            \"migrations\": self.migrations,\n            \"mutations\": self.mutations,\n            \"nodes\": self.nodes,\n            \"populations\": self.populations,\n            \"provenances\": self.provenances,\n            \"sites\": self.sites,\n        }\n\n    @property\n    def indexes(self) -> TableCollectionIndexes:\n        return TableCollectionIndexes(\n            **{\n                \"edge_insertion_order\": self._llts.indexes_edge_insertion_order,\n                \"edge_removal_order\": self._llts.indexes_edge_removal_order,\n            }\n        )\n\n    def has_index(self):\n        return (\n            self._llts.indexes_edge_insertion_order is not None\n            and self._llts.indexes_edge_removal_order is not None\n        )\n\n    def asdict(self, force_offset_64=False):\n        # TODO Could avoid the copy here\n        return self.copy().asdict(force_offset_64=force_offset_64)\n\n    def equals(\n        self,\n        other,\n        *,\n        ignore_metadata=False,\n        ignore_ts_metadata=False,\n        ignore_provenance=False,\n        ignore_timestamps=False,\n        ignore_tables=False,\n        ignore_reference_sequence=False,\n    ):\n        if self is other:\n            return True\n        try:\n            self.assert_equals(\n                other,\n                ignore_metadata=ignore_metadata,\n                ignore_ts_metadata=ignore_ts_metadata,\n                ignore_provenance=ignore_provenance,\n                ignore_timestamps=ignore_timestamps,\n                ignore_tables=ignore_tables,\n                ignore_reference_sequence=ignore_reference_sequence,\n            )\n            return True\n        except AssertionError:\n            return False\n\n    def assert_equals(\n        self,\n        other,\n        *,\n        ignore_metadata=False,\n        ignore_ts_metadata=False,\n        ignore_provenance=False,\n        ignore_timestamps=False,\n        ignore_tables=False,\n        ignore_reference_sequence=False,\n    ):\n        _assert_table_collections_equal(\n            self,\n            other,\n            ignore_metadata=ignore_metadata,\n            ignore_ts_metadata=ignore_ts_metadata,\n            ignore_provenance=ignore_provenance,\n            ignore_timestamps=ignore_timestamps,\n            ignore_tables=ignore_tables,\n            ignore_reference_sequence=ignore_reference_sequence,\n        )\n\n    @property\n    def nbytes(self):\n        return sum(\n            (\n                8,  # sequence length\n                len(self.metadata_bytes) + len(self._llts.get_metadata_schema()),\n                len(self.time_units.encode()),\n                self.indexes.nbytes,\n                self.reference_sequence.nbytes,\n                sum(table.nbytes for table in self.table_name_map.values()),\n            )\n        )\n\n    def __eq__(self, other):\n        return self.equals(other)\n\n    def __str__(self):\n        return \"\\n\".join(\n            [\n                \"ImmutableTableCollection\",\n                \"\",\n                f\"Sequence Length: {self.sequence_length}\",\n                f\"Time units: {self.time_units}\",\n                \"\",\n                \"Individuals\",\n                str(self.individuals),\n                \"Nodes\",\n                str(self.nodes),\n                \"Edges\",\n                str(self.edges),\n                \"Sites\",\n                str(self.sites),\n                \"Mutations\",\n                str(self.mutations),\n                \"Migrations\",\n                str(self.migrations),\n                \"Populations\",\n                str(self.populations),\n                \"Provenances\",\n                str(self.provenances),\n            ]\n        )\n\n    def link_ancestors(self, samples, ancestors):\n        \"\"\"\n        See :meth:`TableCollection.link_ancestors`.\n        \"\"\"\n        samples = util.safe_np_int_cast(samples, np.int32)\n        ancestors = util.safe_np_int_cast(ancestors, np.int32)\n        ll_edge_table = self._llts.link_ancestors(samples, ancestors)\n        return EdgeTable(ll_table=ll_edge_table)\n\n    def map_ancestors(self, *args, **kwargs):\n        \"\"\"\n        Deprecated alias for :meth:`link_ancestors`.\n        \"\"\"\n        return self.link_ancestors(*args, **kwargs)\n\n    _MUTATOR_METHODS = {\n        \"clear\",\n        \"sort\",\n        \"sort_individuals\",\n        \"canonicalise\",\n        \"compute_mutation_parents\",\n        \"compute_mutation_times\",\n        \"deduplicate_sites\",\n        \"delete_sites\",\n        \"delete_intervals\",\n        \"keep_intervals\",\n        \"ltrim\",\n        \"rtrim\",\n        \"trim\",\n        \"shift\",\n        \"delete_older\",\n        \"build_index\",\n        \"drop_index\",\n        \"subset\",\n        \"union\",\n        \"ibd_segments\",\n        \"fromdict\",\n        \"simplify\",\n    }\n\n    def copy(self):\n        ll_tables = _tskit.TableCollection(self.sequence_length)\n        self._llts.dump_tables(ll_tables)\n        return TableCollection(ll_tables=ll_tables)\n\n    def dump(self, file_or_path):\n        return self.copy().dump(file_or_path)\n\n    def tree_sequence(self):\n        return tskit.TreeSequence(self._llts)\n\n    def has_reference_sequence(self):\n        return self._llts.has_reference_sequence()\n\n    def __getattr__(self, name):\n        if name in self._MUTATOR_METHODS:\n            raise ImmutableTableError(\n                f\"Cannot call {name}() on immutable table collection. \"\n                f\"Use TreeSequence.dump_tables() for mutable copy.\"\n            )\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object has no attribute '{name}'\"\n        )\n\n    def __setattr__(self, name, value):\n        # Allow all assignments during initialization\n        if not self._initialised:\n            object.__setattr__(self, name, value)\n            return\n        raise ImmutableTableError(\n            f\"Cannot set attribute '{name}' on immutable table collection. \"\n            f\"Use TreeSequence.dump_tables() for mutable copy.\"\n        )\n"
  },
  {
    "path": "python/tskit/text_formats.py",
    "content": "# MIT License\n#\n# Copyright (c) 2021-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nModule responsible for working with text format data.\n\"\"\"\n\nimport base64\n\nimport numpy as np\n\nimport tskit\nfrom tskit import util\n\n\ndef parse_fam(fam_file):\n    \"\"\"\n    Parse PLINK .fam file and convert to tskit IndividualTable.\n\n    Assumes fam file contains five columns: FID, IID, PAT, MAT, SEX\n\n    :param fam_file: PLINK .fam file object\n    :param tskit.TableCollection tc: TableCollection with IndividualTable to\n        which the individuals will be added\n    \"\"\"\n    individuals = np.loadtxt(\n        fname=fam_file,\n        dtype=str,\n        ndmin=2,  # read file as 2-D table\n        usecols=(0, 1, 2, 3, 4),  # only keep FID, IID, PAT, MAT, SEX columns\n    )  # requires same number of columns in each row, i.e. not ragged\n\n    id_map = {}  # dict for translating PLINK ID to tskit IndividualTable ID\n    for tskit_id, (plink_fid, plink_iid, _pat, _mat, _sex) in enumerate(individuals):\n        # include space between strings to ensure uniqueness\n        plink_id = f\"{plink_fid} {plink_iid}\"\n        if plink_id in id_map:\n            raise ValueError(\"Duplicate PLINK ID: {plink_id}\")\n        id_map[plink_id] = tskit_id\n    id_map[\"0\"] = -1  # -1 is used in tskit to denote \"missing\"\n\n    tc = tskit.TableCollection(1)\n    tb = tc.individuals\n    tb.metadata_schema = tskit.MetadataSchema(\n        {\n            \"codec\": \"json\",\n            \"type\": \"object\",\n            \"properties\": {\n                \"plink_fid\": {\"type\": \"string\"},\n                \"plink_iid\": {\"type\": \"string\"},\n                \"sex\": {\"type\": \"integer\"},\n            },\n            \"required\": [\"plink_fid\", \"plink_iid\", \"sex\"],\n            \"additionalProperties\": True,\n        }\n    )\n    for plink_fid, plink_iid, pat, mat, sex in individuals:\n        sex = int(sex)\n        if sex not in range(3):\n            raise ValueError(\n                \"Sex must be one of the following: 0 (unknown), 1 (male), 2 (female)\"\n            )\n        metadata_dict = {\"plink_fid\": plink_fid, \"plink_iid\": plink_iid, \"sex\": sex}\n        pat_id = f\"{plink_fid} {pat}\" if pat != \"0\" else pat\n        mat_id = f\"{plink_fid} {mat}\" if mat != \"0\" else mat\n        tb.add_row(\n            parents=[\n                id_map[pat_id],\n                id_map[mat_id],\n            ],\n            metadata=metadata_dict,\n        )\n    tc.sort()\n\n    return tb\n\n\ndef flexible_file_output(ts_export_func):\n    \"\"\"\n    Decorator to support writing to either an open file-like object\n    or to a path. Assumes the second argument is the output.\n    \"\"\"\n\n    def f(ts, file_or_path, **kwargs):\n        file, local_file = util.convert_file_like_to_open_file(file_or_path, \"w\")\n        try:\n            ts_export_func(ts, file, **kwargs)\n        finally:\n            if local_file:\n                file.close()\n\n    return f\n\n\n@flexible_file_output\ndef write_nexus(\n    ts,\n    out,\n    *,\n    precision,\n    include_trees,\n    include_alignments,\n    reference_sequence,\n    missing_data_character,\n    node_labels,\n    isolated_as_missing=None,\n):\n    # See TreeSequence.write_nexus for documentation on parameters.\n    if precision is None:\n        pos_precision = 0 if ts.discrete_genome else 17\n        time_precision = None\n    else:\n        pos_precision = precision\n        time_precision = precision\n\n    indent = \"  \"\n    print(\"#NEXUS\", file=out)\n    print(\"BEGIN TAXA;\", file=out)\n    print(\"\", f\"DIMENSIONS NTAX={ts.num_samples};\", sep=indent, file=out)\n\n    if node_labels is not None:\n        taxlabels = \" \".join(\n            node_labels[u] if u in node_labels else f\"n{u}\" for u in ts.samples()\n        )\n    else:\n        taxlabels = \" \".join(f\"n{u}\" for u in ts.samples())\n    print(\"\", f\"TAXLABELS {taxlabels};\", sep=indent, file=out)\n    print(\"END;\", file=out)\n\n    if include_alignments is None:\n        include_alignments = ts.discrete_genome and ts.num_sites > 0\n    if include_alignments:\n        missing_data_character = (\n            \"?\" if missing_data_character is None else missing_data_character\n        )\n        print(\"BEGIN DATA;\", file=out)\n        print(\"\", f\"DIMENSIONS NCHAR={int(ts.sequence_length)};\", sep=indent, file=out)\n        print(\n            \"\",\n            f\"FORMAT DATATYPE=DNA MISSING={missing_data_character};\",\n            sep=indent,\n            file=out,\n        )\n        print(\"\", \"MATRIX\", file=out, sep=indent)\n        alignments = ts.alignments(\n            reference_sequence=reference_sequence,\n            missing_data_character=missing_data_character,\n            isolated_as_missing=isolated_as_missing,\n        )\n        for u, alignment in zip(ts.samples(), alignments):\n            print(2 * indent, f\"n{u}\", \" \", alignment, sep=\"\", file=out)\n        print(\"\", \";\", sep=indent, file=out)\n        print(\"END;\", file=out)\n\n    include_trees = True if include_trees is None else include_trees\n    if include_trees:\n        print(\"BEGIN TREES;\", file=out)\n\n        if node_labels is not None:\n            translations = \", \".join(f\"n{u} {name}\" for u, name in node_labels.items())\n            print(f\"  TRANSLATE {translations};\", file=out)\n\n        for tree in ts.trees():\n            start_interval = \"{0:.{1}f}\".format(tree.interval.left, pos_precision)\n            end_interval = \"{0:.{1}f}\".format(tree.interval.right, pos_precision)\n            tree_label = f\"t{start_interval}^{end_interval}\"\n            newick = tree.as_newick(precision=time_precision)\n            print(\"\", f\"TREE {tree_label} = [&R] {newick}\", sep=indent, file=out)\n        print(\"END;\", file=out)\n\n\ndef wrap_text(text, width):\n    \"\"\"\n    Return an iterator over the lines in the specified string of at most the\n    specified width. (We could use textwrap.wrap for this, but it uses a\n    more complicated algorithm appropriate for blocks of words.)\n    \"\"\"\n    width = len(text) if width == 0 else width\n    N = len(text) // width\n    offset = 0\n    for _ in range(N):\n        yield text[offset : offset + width]\n        offset += width\n    if offset != len(text):\n        yield text[offset:]\n\n\n@flexible_file_output\ndef write_fasta(\n    ts,\n    output,\n    *,\n    wrap_width,\n    reference_sequence,\n    missing_data_character,\n    isolated_as_missing=None,\n):\n    # See TreeSequence.write_fasta for documentation\n    if wrap_width < 0 or int(wrap_width) != wrap_width:\n        raise ValueError(\n            \"wrap_width must be a non-negative integer. \"\n            \"You may specify `wrap_width=0` \"\n            \"if you do not want any wrapping.\"\n        )\n    wrap_width = int(wrap_width)\n    alignments = ts.alignments(\n        reference_sequence=reference_sequence,\n        missing_data_character=missing_data_character,\n        isolated_as_missing=isolated_as_missing,\n    )\n    for u, alignment in zip(ts.samples(), alignments):\n        print(\">\", f\"n{u}\", sep=\"\", file=output)\n        for line in wrap_text(alignment, wrap_width):\n            print(line, file=output)\n\n\ndef _build_newick(tree, *, node, precision, node_labels, include_branch_lengths):\n    label = node_labels.get(node, \"\")\n    if tree.is_leaf(node):\n        s = f\"{label}\"\n    else:\n        s = \"(\"\n        for child in tree.children(node):\n            branch_length = tree.branch_length(child)\n            subtree = _build_newick(\n                tree,\n                node=child,\n                precision=precision,\n                node_labels=node_labels,\n                include_branch_lengths=include_branch_lengths,\n            )\n            if include_branch_lengths:\n                subtree += \":{0:.{1}f}\".format(branch_length, precision)\n            s += subtree + \",\"\n        s = s[:-1] + f\"){label}\"\n    return s\n\n\ndef build_newick(tree, *, root, precision, node_labels, include_branch_lengths):\n    \"\"\"\n    Simple recursive version of the newick generator used when non-default\n    node labels are needed, or when branch lengths are omitted\n    \"\"\"\n    s = _build_newick(\n        tree,\n        node=root,\n        precision=precision,\n        node_labels=node_labels,\n        include_branch_lengths=include_branch_lengths,\n    )\n    return s + \";\"\n\n\ndef dump_text(\n    ts,\n    *,\n    nodes,\n    edges,\n    sites,\n    mutations,\n    individuals,\n    populations,\n    migrations,\n    provenances,\n    precision,\n    encoding,\n    base64_metadata,\n):\n    if nodes is not None:\n        print(\n            \"id\",\n            \"is_sample\",\n            \"time\",\n            \"population\",\n            \"individual\",\n            \"metadata\",\n            sep=\"\\t\",\n            file=nodes,\n        )\n        for node in ts.nodes():\n            metadata = text_metadata(base64_metadata, encoding, node)\n            row = (\n                \"{id:d}\\t\"\n                \"{is_sample:d}\\t\"\n                \"{time:.{precision}f}\\t\"\n                \"{population:d}\\t\"\n                \"{individual:d}\\t\"\n                \"{metadata}\"\n            ).format(\n                precision=precision,\n                id=node.id,\n                is_sample=node.is_sample(),\n                time=node.time,\n                population=node.population,\n                individual=node.individual,\n                metadata=metadata,\n            )\n            print(row, file=nodes)\n\n    if edges is not None:\n        print(\"left\", \"right\", \"parent\", \"child\", \"metadata\", sep=\"\\t\", file=edges)\n        for edge in ts.edges():\n            metadata = text_metadata(base64_metadata, encoding, edge)\n            row = (\n                \"{left:.{precision}f}\\t\"\n                \"{right:.{precision}f}\\t\"\n                \"{parent:d}\\t\"\n                \"{child:d}\\t\"\n                \"{metadata}\"\n            ).format(\n                precision=precision,\n                left=edge.left,\n                right=edge.right,\n                parent=edge.parent,\n                child=edge.child,\n                metadata=metadata,\n            )\n            print(row, file=edges)\n\n    if sites is not None:\n        print(\"position\", \"ancestral_state\", \"metadata\", sep=\"\\t\", file=sites)\n        for site in ts.sites():\n            metadata = text_metadata(base64_metadata, encoding, site)\n            row = (\"{position:.{precision}f}\\t{ancestral_state}\\t{metadata}\").format(\n                precision=precision,\n                position=site.position,\n                ancestral_state=site.ancestral_state,\n                metadata=metadata,\n            )\n            print(row, file=sites)\n\n    if mutations is not None:\n        print(\n            \"site\",\n            \"node\",\n            \"time\",\n            \"derived_state\",\n            \"parent\",\n            \"metadata\",\n            sep=\"\\t\",\n            file=mutations,\n        )\n        for site in ts.sites():\n            for mutation in site.mutations:\n                metadata = text_metadata(base64_metadata, encoding, mutation)\n                row = (\n                    \"{site}\\t{node}\\t{time}\\t{derived_state}\\t{parent}\\t{metadata}\"\n                ).format(\n                    site=mutation.site,\n                    node=mutation.node,\n                    time=(\n                        \"unknown\"\n                        if util.is_unknown_time(mutation.time)\n                        else mutation.time\n                    ),\n                    derived_state=mutation.derived_state,\n                    parent=mutation.parent,\n                    metadata=metadata,\n                )\n                print(row, file=mutations)\n\n    if individuals is not None:\n        print(\n            \"id\",\n            \"flags\",\n            \"location\",\n            \"parents\",\n            \"metadata\",\n            sep=\"\\t\",\n            file=individuals,\n        )\n        for individual in ts.individuals():\n            metadata = text_metadata(base64_metadata, encoding, individual)\n            location = \",\".join(map(str, individual.location))\n            parents = \",\".join(map(str, individual.parents))\n            row = (\"{id}\\t{flags}\\t{location}\\t{parents}\\t{metadata}\").format(\n                id=individual.id,\n                flags=individual.flags,\n                location=location,\n                parents=parents,\n                metadata=metadata,\n            )\n            print(row, file=individuals)\n\n    if populations is not None:\n        print(\"id\", \"metadata\", sep=\"\\t\", file=populations)\n        for population in ts.populations():\n            metadata = text_metadata(base64_metadata, encoding, population)\n            row = (\"{id}\\t{metadata}\").format(id=population.id, metadata=metadata)\n            print(row, file=populations)\n\n    if migrations is not None:\n        print(\n            \"left\",\n            \"right\",\n            \"node\",\n            \"source\",\n            \"dest\",\n            \"time\",\n            \"metadata\",\n            sep=\"\\t\",\n            file=migrations,\n        )\n        for migration in ts.migrations():\n            metadata = text_metadata(base64_metadata, encoding, migration)\n            row = (\n                \"{left}\\t{right}\\t{node}\\t{source}\\t{dest}\\t{time}\\t{metadata}\\t\"\n            ).format(\n                left=migration.left,\n                right=migration.right,\n                node=migration.node,\n                source=migration.source,\n                dest=migration.dest,\n                time=migration.time,\n                metadata=metadata,\n            )\n            print(row, file=migrations)\n\n    if provenances is not None:\n        print(\"id\", \"timestamp\", \"record\", sep=\"\\t\", file=provenances)\n        for provenance in ts.provenances():\n            row = (\"{id}\\t{timestamp}\\t{record}\\t\").format(\n                id=provenance.id,\n                timestamp=provenance.timestamp,\n                record=provenance.record,\n            )\n            print(row, file=provenances)\n\n\ndef text_metadata(base64_metadata, encoding, node):\n    metadata = node.metadata\n    if isinstance(metadata, bytes) and base64_metadata:\n        metadata = base64.b64encode(metadata).decode(encoding)\n    else:\n        metadata = repr(metadata)\n    return metadata\n"
  },
  {
    "path": "python/tskit/trees.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n# Copyright (c) 2015-2018 University of Oxford\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nModule responsible for managing trees and tree sequences.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport base64\nimport builtins\nimport collections\nimport concurrent.futures\nimport functools\nimport io\nimport itertools\nimport json\nimport math\nimport numbers\nimport warnings\nfrom dataclasses import dataclass\nfrom typing import Any, NamedTuple\n\nimport numpy as np\n\nimport _tskit\nimport tskit\nimport tskit.combinatorics as combinatorics\nimport tskit.drawing as drawing\nimport tskit.metadata as metadata_module\nimport tskit.provenance as provenance\nimport tskit.tables as tables\nimport tskit.text_formats as text_formats\nimport tskit.util as util\nimport tskit.vcf as vcf\nfrom tskit import NODE_IS_SAMPLE, NULL, UNKNOWN_TIME\n\nLEGACY_MS_LABELS = \"legacy_ms\"\n\n\n@dataclass\nclass VcfModelMapping:\n    individuals_nodes: np.ndarray\n    individuals_name: np.ndarray\n    transformed_positions: np.ndarray\n    contig_length: int\n    contig_id: str\n    isolated_as_missing: bool\n\n\nclass CoalescenceRecord(NamedTuple):\n    left: float\n    right: float\n    node: int\n    children: np.ndarray\n    time: float\n    population: int\n\n\nclass Interval(NamedTuple):\n    \"\"\"\n    A tuple of 2 numbers, ``[left, right)``, defining an interval over the genome.\n    \"\"\"\n\n    left: float | int\n    \"\"\"\n    The left hand end of the interval. By convention this value is included\n    in the interval\n    \"\"\"\n    right: float | int\n    \"\"\"\n    The right hand end of the interval. By convention this value is *not*\n    included in the interval, i.e., the interval is half-open.\n    \"\"\"\n\n    @property\n    def span(self) -> float | int:\n        \"\"\"\n        The span of the genome covered by this interval, simply ``right-left``.\n        \"\"\"\n        return self.right - self.left\n\n    @property\n    def mid(self) -> float | int:\n        \"\"\"\n        The middle point of this interval, simply ``left+(right-left)/2``.\n        \"\"\"\n        return self.left + (self.right - self.left) / 2\n\n\nclass EdgeDiff(NamedTuple):\n    interval: Interval\n    edges_out: list\n    edges_in: list\n\n\ndef store_tree_sequence(cls):\n    wrapped_init = cls.__init__\n\n    # Intercept the init to record the tree_sequence\n    def new_init(self, *args, tree_sequence=None, **kwargs):\n        builtins.object.__setattr__(self, \"_tree_sequence\", tree_sequence)\n        wrapped_init(self, *args, **kwargs)\n\n    cls.__init__ = new_init\n    return cls\n\n\n@store_tree_sequence\n@metadata_module.lazy_decode()\n@dataclass\nclass Individual(util.Dataclass):\n    \"\"\"\n    An :ref:`individual <sec_individual_table_definition>` in a tree sequence.\n    Since nodes correspond to genomes, individuals are associated with a collection\n    of nodes (e.g., two nodes per diploid). See :ref:`sec_nodes_or_individuals`\n    for more discussion of this distinction.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\n        \"id\",\n        \"flags\",\n        \"location\",\n        \"parents\",\n        \"nodes\",\n        \"metadata\",\n        \"_tree_sequence\",\n    ]\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this individual. Varies from 0 to\n    :attr:`TreeSequence.num_individuals` - 1.\"\"\"\n    flags: int\n    \"\"\"\n    The bitwise flags for this individual.\n    \"\"\"\n    location: np.ndarray\n    \"\"\"\n    The spatial location of this individual as a numpy array. The location is an empty\n    array if no spatial location is defined.\n    \"\"\"\n    parents: np.ndarray\n    \"\"\"\n    The parent individual ids of this individual as a numpy array. The parents is an\n    empty array if no parents are defined.\n    \"\"\"\n    nodes: np.ndarray\n    \"\"\"\n    The IDs of the nodes that are associated with this individual as\n    a numpy array (dtype=np.int32). If no nodes are associated with the\n    individual this array will be empty.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>`\n    for this individual, decoded if a schema applies.\n    \"\"\"\n\n    @property\n    def population(self) -> int:\n        populations = {self._tree_sequence.node(n).population for n in self.nodes}\n        if len(populations) > 1:\n            raise ValueError(\"Individual has nodes with mis-matched populations\")\n        if len(populations) == 0:\n            return tskit.NULL\n        return populations.pop()\n\n    @property\n    def time(self) -> int:\n        times = {self._tree_sequence.node(n).time for n in self.nodes}\n        if len(times) > 1:\n            raise ValueError(\"Individual has nodes with mis-matched times\")\n        if len(times) == 0:\n            return tskit.UNKNOWN_TIME\n        return times.pop()\n\n    # Custom eq for the numpy arrays\n    def __eq__(self, other):\n        return (\n            self.id == other.id\n            and self.flags == other.flags\n            and np.array_equal(self.location, other.location)\n            and np.array_equal(self.parents, other.parents)\n            and np.array_equal(self.nodes, other.nodes)\n            and self.metadata == other.metadata\n        )\n\n\n@metadata_module.lazy_decode()\n@dataclass\nclass Node(util.Dataclass):\n    \"\"\"\n    A :ref:`node <sec_node_table_definition>` in a tree sequence, corresponding\n    to a single genome. The ``time`` and ``population`` are attributes of the\n    ``Node``, rather than the ``Individual``, as discussed in\n    :ref:`sec_nodes_or_individuals`.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\"id\", \"flags\", \"time\", \"population\", \"individual\", \"metadata\"]\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this node. Varies from 0 to :attr:`TreeSequence.num_nodes` - 1.\n    \"\"\"\n    flags: int\n    \"\"\"\n    The bitwise flags for this node.\n    \"\"\"\n    time: float\n    \"\"\"\n    The birth time of this node.\n    \"\"\"\n    population: int\n    \"\"\"\n    The integer ID of the population that this node was born in.\n    \"\"\"\n    individual: int\n    \"\"\"\n    The integer ID of the individual that this node was a part of.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>` for this node, decoded if a schema\n    applies.\n    \"\"\"\n\n    def is_sample(self):\n        \"\"\"\n        Returns True if this node is a sample. This value is derived from the\n        ``flag`` variable.\n\n        :rtype: bool\n        \"\"\"\n        return self.flags & NODE_IS_SAMPLE\n\n\n@metadata_module.lazy_decode(own_init=True)\n@dataclass\nclass Edge(util.Dataclass):\n    \"\"\"\n    An :ref:`edge <sec_edge_table_definition>` in a tree sequence.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\"left\", \"right\", \"parent\", \"child\", \"metadata\", \"id\"]\n    left: float\n    \"\"\"\n    The left coordinate of this edge.\n    \"\"\"\n    right: float\n    \"\"\"\n    The right coordinate of this edge.\n    \"\"\"\n    parent: int\n    \"\"\"\n    The integer ID of the parent node for this edge.\n    To obtain further information about a node with a given ID, use\n    :meth:`TreeSequence.node`.\n    \"\"\"\n    child: int\n    \"\"\"\n    The integer ID of the child node for this edge.\n    To obtain further information about a node with a given ID, use\n    :meth:`TreeSequence.node`.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>` for this edge, decoded if a schema\n    applies.\n    \"\"\"\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this edge. Varies from 0 to\n    :attr:`TreeSequence.num_edges` - 1.\n    \"\"\"\n\n    # Custom init to define default values with slots\n    def __init__(\n        self,\n        left,\n        right,\n        parent,\n        child,\n        metadata=b\"\",\n        id=None,  # noqa A002\n        metadata_decoder=None,\n    ):\n        self.id = id\n        self.left = left\n        self.right = right\n        self.parent = parent\n        self.child = child\n        self.metadata = metadata\n        self._metadata_decoder = metadata_decoder\n\n    @property\n    def span(self):\n        \"\"\"\n        Returns the span of this edge, i.e., the right position minus the left position\n\n        :return: The span of this edge.\n        :rtype: float\n        \"\"\"\n        return self.right - self.left\n\n    @property\n    def interval(self):\n        \"\"\"\n        Returns the left and right positions of this edge as an :class:`Interval`\n\n        :return: The interval covered by this edge.\n        :rtype: :class:`Interval`\n        \"\"\"\n        return Interval(self.left, self.right)\n\n\n@metadata_module.lazy_decode()\n@dataclass\nclass Site(util.Dataclass):\n    \"\"\"\n    A :ref:`site <sec_site_table_definition>` in a tree sequence.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\"id\", \"position\", \"ancestral_state\", \"mutations\", \"metadata\"]\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this site. Varies from 0 to :attr:`TreeSequence.num_sites` - 1.\n    \"\"\"\n    position: float\n    \"\"\"\n    The floating point location of this site in genome coordinates.\n    Ranges from 0 (inclusive) to :attr:`TreeSequence.sequence_length` (exclusive).\n    \"\"\"\n    ancestral_state: str\n    \"\"\"\n    The ancestral state at this site (i.e., the state inherited by nodes, unless\n    mutations occur).\n    \"\"\"\n    mutations: np.ndarray\n    \"\"\"\n    The list of mutations at this site. Mutations within a site are returned in the\n\n    order they are specified in the underlying :class:`MutationTable`. For canonical\n    (i.e., valid) tables, this means ancestral mutations precede their descendants, so\n    older mutations (as defined by the canonical mutation ordering; see\n    :ref:`sec_mutation_requirements`) appear before younger ones.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>` for this site, decoded if a schema\n    applies.\n    \"\"\"\n\n    # We need a custom eq for the numpy arrays\n    def __eq__(self, other):\n        return (\n            isinstance(other, Site)\n            and self.id == other.id\n            and self.position == other.position\n            and self.ancestral_state == other.ancestral_state\n            and np.array_equal(self.mutations, other.mutations)\n            and self.metadata == other.metadata\n        )\n\n    @property\n    def alleles(self) -> set[str]:\n        \"\"\"\n        Return the set of all the alleles defined at this site\n\n        .. note::\n            This deliberately returns an (unordered) *set* of the possible allelic\n            states (as defined by the site's ancestral allele and its associated\n            mutations). If you wish to obtain an (ordered) *list* of alleles, for\n            example to translate the numeric genotypes at a site into allelic states,\n            you should instead use ``.alleles`` attribute of the :class:`Variant` class,\n            which unlike this attribute includes ``None`` as a state when there is\n            missing data at a site.\n        \"\"\"\n        return {self.ancestral_state} | {m.derived_state for m in self.mutations}\n\n\n@metadata_module.lazy_decode()\n@dataclass\nclass Mutation(util.Dataclass):\n    \"\"\"\n    A :ref:`mutation <sec_mutation_table_definition>` in a tree sequence.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\n        \"id\",\n        \"site\",\n        \"node\",\n        \"derived_state\",\n        \"parent\",\n        \"metadata\",\n        \"time\",\n        \"edge\",\n        \"inherited_state\",\n    ]\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this mutation. Varies from 0 to\n    :attr:`TreeSequence.num_mutations` - 1.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n    site: int\n    \"\"\"\n    The integer ID of the site that this mutation occurs at. To obtain\n    further information about a site with a given ID use :meth:`TreeSequence.site`.\n    \"\"\"\n    node: int\n    \"\"\"\n    The integer ID of the first node that inherits this mutation.\n    To obtain further information about a node with a given ID, use\n    :meth:`TreeSequence.node`.\n    \"\"\"\n    derived_state: str\n    \"\"\"\n    The derived state for this mutation. This is the state\n    inherited by nodes in the subtree rooted at this mutation's node, unless\n    another mutation occurs.\n    \"\"\"\n    parent: int\n    \"\"\"\n    The integer ID of this mutation's parent mutation. When multiple\n    mutations occur at a site along a path in the tree, mutations must\n    record the mutation that is immediately above them. If the mutation does\n    not have a parent, this is equal to the :data:`NULL` (-1).\n    To obtain further information about a mutation with a given ID, use\n    :meth:`TreeSequence.mutation`.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>` for this mutation, decoded if a schema\n    applies.\n    \"\"\"\n    time: float\n    \"\"\"\n    The occurrence time of this mutation.\n    \"\"\"\n    edge: int\n    \"\"\"\n    The ID of the edge that this mutation is on.\n    \"\"\"\n    inherited_state: str\n    \"\"\"\n    The inherited state for this mutation. This is the state that existed at the site\n    before this mutation occurred. This is either the ancestral state of the site\n    (if the mutation has no parent) or the derived state of the mutation's\n    parent mutation (if it has a parent).\n    \"\"\"\n\n    # To get default values on slots we define a custom init\n    def __init__(\n        self,\n        id=NULL,  # noqa A003\n        site=NULL,\n        node=NULL,\n        time=UNKNOWN_TIME,\n        derived_state=None,\n        parent=NULL,\n        metadata=b\"\",\n        edge=NULL,\n        inherited_state=None,\n    ):\n        self.id = id\n        self.site = site\n        self.node = node\n        self.time = time\n        self.derived_state = derived_state\n        self.parent = parent\n        self.metadata = metadata\n        self.edge = edge\n        self.inherited_state = inherited_state\n\n    # We need a custom eq to compare unknown times.\n    def __eq__(self, other):\n        return (\n            isinstance(other, Mutation)\n            and self.id == other.id\n            and self.site == other.site\n            and self.node == other.node\n            and self.derived_state == other.derived_state\n            and self.parent == other.parent\n            and self.edge == other.edge\n            and self.metadata == other.metadata\n            and (\n                self.time == other.time\n                or (util.is_unknown_time(self.time) and util.is_unknown_time(other.time))\n            )\n        )\n\n\n@metadata_module.lazy_decode()\n@dataclass\nclass Migration(util.Dataclass):\n    \"\"\"\n    A :ref:`migration <sec_migration_table_definition>` in a tree sequence.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\"left\", \"right\", \"node\", \"source\", \"dest\", \"time\", \"metadata\", \"id\"]\n    left: float\n    \"\"\"\n    The left end of the genomic interval covered by this\n    migration (inclusive).\n    \"\"\"\n    right: float\n    \"\"\"\n    The right end of the genomic interval covered by this migration\n    (exclusive).\n    \"\"\"\n    node: int\n    \"\"\"\n    The integer ID of the node involved in this migration event.\n    To obtain further information about a node with a given ID, use\n    :meth:`TreeSequence.node`.\n    \"\"\"\n    source: int\n    \"\"\"\n    The source population ID.\n    \"\"\"\n    dest: int\n    \"\"\"\n    The destination population ID.\n    \"\"\"\n    time: float\n    \"\"\"\n    The time at which this migration occurred at.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>` for this migration, decoded if a schema\n    applies.\n    \"\"\"\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this migration. Varies from 0 to\n    :attr:`TreeSequence.num_migrations` - 1.\n    \"\"\"\n\n\n@metadata_module.lazy_decode()\n@dataclass\nclass Population(util.Dataclass):\n    \"\"\"\n    A :ref:`population <sec_population_table_definition>` in a tree sequence.\n\n    Modifying the attributes in this class will have **no effect** on the\n    underlying tree sequence data.\n    \"\"\"\n\n    __slots__ = [\"id\", \"metadata\"]\n    id: int  # noqa A003\n    \"\"\"\n    The integer ID of this population. Varies from 0 to\n    :attr:`TreeSequence.num_populations` - 1.\n    \"\"\"\n    metadata: bytes | dict | None\n    \"\"\"\n    The :ref:`metadata <sec_metadata_definition>` for this population, decoded if a\n    schema applies.\n    \"\"\"\n\n\n@dataclass\nclass Edgeset(util.Dataclass):\n    __slots__ = [\"left\", \"right\", \"parent\", \"children\"]\n    left: int\n    right: int\n    parent: int\n    children: np.ndarray\n\n    # We need a custom eq for the numpy array\n    def __eq__(self, other):\n        return (\n            isinstance(other, Edgeset)\n            and self.left == other.left\n            and self.right == other.right\n            and self.parent == other.parent\n            and np.array_equal(self.children, other.children)\n        )\n\n\n@dataclass\nclass Provenance(util.Dataclass):\n    \"\"\"\n    A provenance entry in a tree sequence, detailing how this tree\n    sequence was generated, or subsequent operations on it (see :ref:`sec_provenance`).\n    \"\"\"\n\n    __slots__ = [\"id\", \"timestamp\", \"record\"]\n    id: int  # noqa A003\n    timestamp: str\n    \"\"\"\n    The time that this entry was made\n    \"\"\"\n    record: str\n    \"\"\"\n    A JSON string giving details of the provenance (see :ref:`sec_provenance_example`\n    for an example JSON string)\n    \"\"\"\n\n\nclass Tree:\n    \"\"\"\n    A single tree in a :class:`TreeSequence`. Please see the\n    :ref:`tutorials:sec_processing_trees` section for information\n    on how efficiently access trees sequentially or obtain a list\n    of individual trees in a tree sequence.\n\n    The ``sample_lists`` parameter controls the features that are enabled\n    for this tree. If ``sample_lists`` is True a more efficient algorithm is\n    used in the :meth:`Tree.samples` method.\n\n    The ``tracked_samples`` parameter can be used to efficiently count the\n    number of samples in a given set that exist in a particular subtree\n    using the :meth:`Tree.num_tracked_samples` method.\n\n    The :class:`Tree` class is a state-machine which has a state\n    corresponding to each of the trees in the parent tree sequence. We\n    transition between these states by using the seek functions like\n    :meth:`Tree.first`, :meth:`Tree.last`, :meth:`Tree.seek` and\n    :meth:`Tree.seek_index`. There is one more state, the so-called \"null\"\n    or \"cleared\" state. This is the state that a :class:`Tree` is in\n    immediately after initialisation;  it has an index of -1, and no edges. We\n    can also enter the null state by calling :meth:`Tree.next` on the last\n    tree in a sequence, calling :meth:`Tree.prev` on the first tree in a\n    sequence or calling calling the :meth:`Tree.clear` method at any time.\n\n    The high-level TreeSequence seeking and iterations methods (e.g,\n    :meth:`TreeSequence.trees`) are built on these low-level state-machine\n    seek operations. We recommend these higher level operations for most\n    users.\n\n    :param TreeSequence tree_sequence: The parent tree sequence.\n    :param list tracked_samples: The list of samples to be tracked and\n        counted using the :meth:`Tree.num_tracked_samples` method.\n    :param bool sample_lists: If True, provide more efficient access\n        to the samples beneath a given node using the\n        :meth:`Tree.samples` method.\n    :param int root_threshold: The minimum number of samples that a node\n        must be ancestral to for it to be in the list of roots. By default\n        this is 1, so that isolated samples (representing missing data)\n        are roots. To efficiently restrict the roots of the tree to\n        those subtending meaningful topology, set this to 2. This value\n        is only relevant when trees have multiple roots.\n    :param bool sample_counts: Deprecated since 0.2.4.\n    \"\"\"\n\n    def __init__(\n        self,\n        tree_sequence,\n        tracked_samples=None,\n        *,\n        sample_lists=False,\n        root_threshold=1,\n        sample_counts=None,\n    ):\n        options = 0\n        if sample_counts is not None:\n            warnings.warn(\n                \"The sample_counts option is not supported since 0.2.4 and is ignored\",\n                RuntimeWarning,\n                stacklevel=4,\n            )\n        if sample_lists:\n            options |= _tskit.SAMPLE_LISTS\n        kwargs = {\"options\": options}\n        if root_threshold <= 0:\n            raise ValueError(\"Root threshold must be greater than 0\")\n        if tracked_samples is not None:\n            # TODO remove this when we allow numpy arrays in the low-level API.\n            kwargs[\"tracked_samples\"] = list(tracked_samples)\n\n        self._tree_sequence = tree_sequence\n        self._ll_tree = _tskit.Tree(tree_sequence.ll_tree_sequence, **kwargs)\n        self._ll_tree.set_root_threshold(root_threshold)\n        self._make_arrays()\n\n    def copy(self):\n        \"\"\"\n        Returns a deep copy of this tree. The returned tree will have identical state\n        to this tree.\n\n        :return: A copy of this tree.\n        :rtype: Tree\n        \"\"\"\n        copy = type(self).__new__(type(self))\n        copy._tree_sequence = self._tree_sequence\n        copy._ll_tree = self._ll_tree.copy()\n        copy._make_arrays()\n        return copy\n\n    # TODO make this method public and document it.\n    # Note that this probably does not cover all corner cases correctly\n    # https://github.com/tskit-dev/tskit/issues/1908\n    def _has_isolated_samples(self):\n        # TODO Is this definition correct for a single-node tree sequence?\n        for root in self.roots:\n            # If the root has no children then it must be a sample\n            if self.left_child(root) == NULL:\n                return True\n        return False\n\n    def _make_arrays(self):\n        # Store the low-level arrays for efficiency. There's no real overhead\n        # in this, because the refer to the same underlying memory as the\n        # tree object.\n        self._parent_array = self._ll_tree.parent_array\n        self._left_child_array = self._ll_tree.left_child_array\n        self._right_child_array = self._ll_tree.right_child_array\n        self._left_sib_array = self._ll_tree.left_sib_array\n        self._right_sib_array = self._ll_tree.right_sib_array\n        self._num_children_array = self._ll_tree.num_children_array\n        self._edge_array = self._ll_tree.edge_array\n\n    @property\n    def tree_sequence(self):\n        \"\"\"\n        Returns the tree sequence that this tree is from.\n\n        :return: The parent tree sequence for this tree.\n        :rtype: :class:`TreeSequence`\n        \"\"\"\n        return self._tree_sequence\n\n    @property\n    def root_threshold(self):\n        \"\"\"\n        Returns the minimum number of samples that a node must be an ancestor\n        of to be considered a potential root. This can be set, for example, when\n        calling the :meth:`TreeSequence.trees` iterator.\n\n        :return: The root threshold.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_root_threshold()\n\n    def __eq__(self, other):\n        ret = False\n        if type(other) is type(self):\n            ret = bool(self._ll_tree.equals(other._ll_tree))\n        return ret\n\n    def __ne__(self, other):\n        return not self.__eq__(other)\n\n    def first(self):\n        \"\"\"\n        Seeks to the first tree in the sequence. This can be called whether\n        the tree is in the null state or not.\n        \"\"\"\n        self._ll_tree.first()\n\n    def last(self):\n        \"\"\"\n        Seeks to the last tree in the sequence. This can be called whether\n        the tree is in the null state or not.\n        \"\"\"\n        self._ll_tree.last()\n\n    def next(self):  # noqa A002\n        \"\"\"\n        Seeks to the next tree in the sequence. If the tree is in the initial\n        null state we seek to the first tree (equivalent to calling :meth:`~Tree.first`).\n        Calling ``next`` on the last tree in the sequence results in the tree\n        being cleared back into the null initial state (equivalent to calling\n        :meth:`~Tree.clear`). The return value of the function indicates whether the\n        tree is in a non-null state, and can be used to loop over the trees::\n\n            # Iterate over the trees from left-to-right\n            tree = tskit.Tree(tree_sequence)\n            while tree.next()\n                # Do something with the tree.\n                print(tree.index)\n            # tree is now back in the null state.\n\n        :return: True if the tree has been transformed into one of the trees\n            in the sequence; False if the tree has been transformed into the\n            null state.\n        :rtype: bool\n        \"\"\"\n        return bool(self._ll_tree.next())\n\n    def prev(self):\n        \"\"\"\n        Seeks to the previous tree in the sequence. If the tree is in the initial\n        null state we seek to the last tree (equivalent to calling :meth:`~Tree.last`).\n        Calling ``prev`` on the first tree in the sequence results in the tree\n        being cleared back into the null initial state (equivalent to calling\n        :meth:`~Tree.clear`). The return value of the function indicates whether the\n        tree is in a non-null state, and can be used to loop over the trees::\n\n            # Iterate over the trees from right-to-left\n            tree = tskit.Tree(tree_sequence)\n            while tree.prev()\n                # Do something with the tree.\n                print(tree.index)\n            # tree is now back in the null state.\n\n        :return: True if the tree has been transformed into one of the trees\n            in the sequence; False if the tree has been transformed into the\n            null state.\n        :rtype: bool\n        \"\"\"\n        return bool(self._ll_tree.prev())\n\n    def clear(self):\n        \"\"\"\n        Resets this tree back to the initial null state. Calling this method\n        on a tree already in the null state has no effect.\n        \"\"\"\n        self._ll_tree.clear()\n\n    def seek_index(self, index, skip=None):\n        \"\"\"\n        Sets the state to represent the tree at the specified\n        index in the parent tree sequence. Negative indexes following the\n        standard Python conventions are allowed, i.e., ``index=-1`` will\n        seek to the last tree in the sequence.\n\n        .. include:: substitutions/linear_traversal_warning.rst\n\n        :param int index: The tree index to seek to.\n        :raises IndexError: If an index outside the acceptable range is provided.\n        \"\"\"\n        num_trees = self.tree_sequence.num_trees\n        if index < 0:\n            index += num_trees\n        if index < 0 or index >= num_trees:\n            raise IndexError(\"Index out of bounds\")\n        skip = False if skip is None else skip\n        self._ll_tree.seek_index(index, skip)\n\n    def seek(self, position, skip=None):\n        \"\"\"\n        Sets the state to represent the tree that covers the specified\n        position in the parent tree sequence. After a successful return\n        of this method we have ``tree.interval.left`` <= ``position``\n        < ``tree.interval.right``.\n\n        .. include:: substitutions/linear_traversal_warning.rst\n\n        :param float position: The position along the sequence length to\n            seek to.\n        :raises ValueError: If ``position`` is less than 0 or ``position`` is greater\n            than or equal to\n            :attr:`TreeSequence.sequence_length`.\n        \"\"\"\n        if position < 0 or position >= self.tree_sequence.sequence_length:\n            raise ValueError(\"Position out of bounds\")\n        skip = False if skip is None else skip\n        self._ll_tree.seek(position, skip)\n\n    def rank(self) -> tskit.Rank:\n        \"\"\"\n        Produce the rank of this tree in the enumeration of all leaf-labelled\n        trees of n leaves. See the :ref:`sec_tree_ranks` section for\n        details on ranking and unranking trees.\n\n        :raises ValueError: If the tree has multiple roots.\n        \"\"\"\n        return combinatorics.RankTree.from_tsk_tree(self).rank()\n\n    @staticmethod\n    def unrank(num_leaves, rank, *, span=1, branch_length=1) -> Tree:\n        \"\"\"\n        Reconstruct the tree of the given ``rank``\n        (see :meth:`tskit.Tree.rank`) with ``num_leaves`` leaves.\n        The labels and times of internal nodes are assigned by a postorder\n        traversal of the nodes, such that the time of each internal node\n        is the maximum time of its children plus the specified ``branch_length``.\n        The time of each leaf is 0.\n\n        See the :ref:`sec_tree_ranks` section for details on ranking and\n        unranking trees and what constitutes valid ranks.\n\n        :param int num_leaves: The number of leaves of the tree to generate.\n        :param tuple(int) rank: The rank of the tree to generate.\n        :param float span: The genomic span of the returned tree. The tree will cover\n            the interval :math:`[0, \\\\text{span})` and the :attr:`~Tree.tree_sequence`\n            from which the tree is taken will have its\n            :attr:`~tskit.TreeSequence.sequence_length` equal to ``span``.\n        :param float branch_length: The minimum length of a branch in this tree.\n        :raises ValueError: If the given rank is out of bounds for trees\n            with ``num_leaves`` leaves.\n        \"\"\"\n        rank_tree = combinatorics.RankTree.unrank(num_leaves, rank)\n        return rank_tree.to_tsk_tree(span=span, branch_length=branch_length)\n\n    def count_topologies(self, sample_sets=None) -> tskit.TopologyCounter:\n        \"\"\"\n        Calculates the distribution of embedded topologies for every combination\n        of the sample sets in ``sample_sets``. ``sample_sets`` defaults to all\n        samples in the tree grouped by population.\n\n        ``sample_sets`` need not include all samples but must be pairwise disjoint.\n\n        The returned object is a :class:`tskit.TopologyCounter` that contains\n        counts of topologies per combination of sample sets. For example::\n\n            topology_counter = tree.count_topologies()\n            rank, count = topology_counter[0, 1, 2].most_common(1)[0]\n\n        produces the most common tree topology, with populations 0, 1\n        and 2 as its tips, according to the genealogies of those\n        populations' samples in this tree.\n\n        The counts for each topology in the :class:`tskit.TopologyCounter`\n        are absolute counts that we would get if we were to select all\n        combinations of samples from the relevant sample sets.\n        For sample sets :math:`[s_0, s_1, ..., s_n]`, the total number of\n        topologies for those sample sets is equal to\n        :math:`|s_0| * |s_1| * ... * |s_n|`, so the counts in the counter\n        ``topology_counter[0, 1, ..., n]`` should sum to\n        :math:`|s_0| * |s_1| * ... * |s_n|`.\n\n        To convert the topology counts to probabilities, divide by the total\n        possible number of sample combinations from the sample sets in question::\n\n            set_sizes = [len(sample_set) for sample_set in sample_sets]\n            p = count / (set_sizes[0] * set_sizes[1] * set_sizes[2])\n\n        .. warning:: The interface for this method is preliminary and may be subject to\n            backwards incompatible changes in the near future.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n            Defaults to all samples grouped by population.\n        :raises ValueError: If nodes in ``sample_sets`` are invalid or are\n            internal samples.\n        \"\"\"\n        if sample_sets is None:\n            sample_sets = [\n                self.tree_sequence.samples(population=pop.id)\n                for pop in self.tree_sequence.populations()\n            ]\n\n        return combinatorics.tree_count_topologies(self, sample_sets)\n\n    def get_branch_length(self, u):\n        # Deprecated alias for branch_length\n        return self.branch_length(u)\n\n    def branch_length(self, u):\n        \"\"\"\n        Returns the length of the branch (in units of time) joining the\n        specified node to its parent. This is equivalent to::\n\n            tree.time(tree.parent(u)) - tree.time(u)\n\n        The branch length for a node that has no parent (e.g., a root) is\n        defined as zero.\n\n        Note that this is not related to the property `.length` which\n        is a deprecated alias for the genomic :attr:`~Tree.span` covered by a tree.\n\n        :param int u: The node of interest.\n        :return: The branch length from u to its parent.\n        :rtype: float\n        \"\"\"\n        ret = 0\n        parent = self.parent(u)\n        if parent != NULL:\n            ret = self.time(parent) - self.time(u)\n        return ret\n\n    def get_total_branch_length(self):\n        # Deprecated alias for total_branch_length\n        return self.total_branch_length\n\n    @property\n    def total_branch_length(self):\n        \"\"\"\n        Returns the sum of all the branch lengths in this tree (in\n        units of time). This is equivalent to::\n\n            sum(tree.branch_length(u) for u in tree.nodes())\n\n        Note that the branch lengths for root nodes are defined as zero.\n\n        As this is defined by a traversal of the tree, technically we\n        return the sum of all branch lengths that are reachable from\n        roots. Thus, this is the total length of all branches that are connected\n        to at least one sample. This distinction is only important\n        in tree sequences that contain 'dead branches', i.e., those\n        that define topology that is not connected to a tree root\n        (see :ref:`sec_data_model_tree_dead_leaves_and_branches`)\n\n        :return: The sum of lengths of branches in this tree.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree.get_total_branch_length()\n\n    def get_mrca(self, u, v):\n        # Deprecated alias for mrca\n        return self.mrca(u, v)\n\n    def mrca(self, *args):\n        \"\"\"\n        Returns the most recent common ancestor of the specified nodes.\n\n        :param int `*args`: input node IDs, at least 2 arguments are required.\n        :return: The node ID of the most recent common ancestor of the\n            input nodes, or :data:`tskit.NULL` if the nodes do not share\n            a common ancestor in the tree.\n        :rtype: int\n        \"\"\"\n        if len(args) < 2:\n            raise ValueError(\"Must supply at least two arguments\")\n        mrca = args[0]\n        for node in args[1:]:\n            mrca = self._ll_tree.get_mrca(mrca, node)\n            if mrca == tskit.NULL:\n                break\n        return mrca\n\n    def get_tmrca(self, u, v):\n        # Deprecated alias for tmrca\n        return self.tmrca(u, v)\n\n    def tmrca(self, *args):\n        \"\"\"\n        Returns the time of the most recent common ancestor of the specified\n        nodes. This is equivalent to::\n\n            tree.time(tree.mrca(*args))\n\n        .. note::\n            If you are using this method to calculate average tmrca values along the\n            genome between pairs of sample nodes, for efficiency reasons you should\n            instead consider the ``mode=\"branch\"`` option of the\n            :meth:`TreeSequence.divergence` or :meth:`TreeSequence.diversity` methods.\n            Since these calculate the average branch length between pairs of sample\n            nodes, for samples at time 0 the resulting statistics will be exactly\n            twice the tmrca value.\n\n        :param `*args`: input node IDs, at least 2 arguments are required.\n        :return: The time of the most recent common ancestor of all the nodes.\n        :rtype: float\n        :raises ValueError: If the nodes do not share a single common ancestor in this\n            tree (i.e., if ``tree.mrca(*args) == tskit.NULL``)\n        \"\"\"\n        mrca = self.mrca(*args)\n        if mrca == tskit.NULL:\n            raise ValueError(f\"Nodes {args} do not share a common ancestor in the tree\")\n        return self.get_time(mrca)\n\n    def get_parent(self, u):\n        # Deprecated alias for parent\n        return self.parent(u)\n\n    def parent(self, u):\n        \"\"\"\n        Returns the parent of the specified node. Returns\n        :data:`tskit.NULL` if u is a root or is not a node in\n        the current tree.\n\n        :param int u: The node of interest.\n        :return: The parent of u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_parent(u)\n\n    @property\n    def parent_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) encoding the parent of each node\n        in this tree, such that ``tree.parent_array[u] == tree.parent(u)``\n        for all ``0 <= u <= ts.num_nodes``. See the :meth:`~.parent`\n        method for details on the semantics of tree parents and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._parent_array\n\n    def ancestors(self, u):\n        \"\"\"\n        Returns an iterator over the ancestors of node ``u`` in this tree\n        (i.e. the chain of parents from ``u`` to the root).\n        \"\"\"\n        u = self.parent(u)\n        while u != -1:\n            yield u\n            u = self.parent(u)\n\n    # Quintuply linked tree structure.\n\n    def left_child(self, u):\n        \"\"\"\n        Returns the leftmost child of the specified node. Returns\n        :data:`tskit.NULL` if u is a leaf or is not a node in\n        the current tree. The left-to-right ordering of children\n        is arbitrary and should not be depended on; see the\n        :ref:`data model <sec_data_model_tree_structure>` section\n        for details.\n\n        This is a low-level method giving access to the quintuply linked\n        tree structure in memory; the :meth:`.children` method is a more\n        convenient way to obtain the children of a given node.\n\n        :param int u: The node of interest.\n        :return: The leftmost child of u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_left_child(u)\n\n    @property\n    def left_child_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) encoding the left child of each node\n        in this tree, such that ``tree.left_child_array[u] == tree.left_child(u)``\n        for all ``0 <= u <= ts.num_nodes``. See the :meth:`~.left_child`\n        method for details on the semantics of tree left_child and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._left_child_array\n\n    def right_child(self, u):\n        \"\"\"\n        Returns the rightmost child of the specified node. Returns\n        :data:`tskit.NULL` if u is a leaf or is not a node in\n        the current tree. The left-to-right ordering of children\n        is arbitrary and should not be depended on; see the\n        :ref:`data model <sec_data_model_tree_structure>` section\n        for details.\n\n        This is a low-level method giving access to the quintuply linked\n        tree structure in memory; the :meth:`.children` method is a more\n        convenient way to obtain the children of a given node.\n\n        :param int u: The node of interest.\n        :return: The rightmost child of u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_right_child(u)\n\n    @property\n    def right_child_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) encoding the right child of each node\n        in this tree, such that ``tree.right_child_array[u] == tree.right_child(u)``\n        for all ``0 <= u <= ts.num_nodes``. See the :meth:`~.right_child`\n        method for details on the semantics of tree right_child and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._right_child_array\n\n    def left_sib(self, u):\n        \"\"\"\n        Returns the sibling node to the left of u, or :data:`tskit.NULL`\n        if u does not have a left sibling.\n        The left-to-right ordering of children\n        is arbitrary and should not be depended on; see the\n        :ref:`data model <sec_data_model_tree_structure>` section\n        for details.\n\n        :param int u: The node of interest.\n        :return: The sibling node to the left of u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_left_sib(u)\n\n    @property\n    def left_sib_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) encoding the left sib of each node\n        in this tree, such that ``tree.left_sib_array[u] == tree.left_sib(u)``\n        for all ``0 <= u <= ts.num_nodes``. See the :meth:`~.left_sib`\n        method for details on the semantics of tree left_sib and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._left_sib_array\n\n    def right_sib(self, u):\n        \"\"\"\n        Returns the sibling node to the right of u, or :data:`tskit.NULL`\n        if u does not have a right sibling.\n        The left-to-right ordering of children\n        is arbitrary and should not be depended on; see the\n        :ref:`data model <sec_data_model_tree_structure>` section\n        for details.\n\n        :param int u: The node of interest.\n        :return: The sibling node to the right of u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_right_sib(u)\n\n    @property\n    def right_sib_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) encoding the right sib of each node\n        in this tree, such that ``tree.right_sib_array[u] == tree.right_sib(u)``\n        for all ``0 <= u <= ts.num_nodes``. See the :meth:`~.right_sib`\n        method for details on the semantics of tree right_sib and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._right_sib_array\n\n    def siblings(self, u):\n        \"\"\"\n        Returns the sibling(s) of the specified node ``u`` as a tuple of integer\n        node IDs. If ``u`` has no siblings or is not a node in the current tree,\n        returns an empty tuple. If ``u`` is the root of a single-root tree,\n        returns an empty tuple; if ``u`` is the root of a multi-root tree,\n        returns the other roots (note all the roots are related by the virtual root).\n        If ``u`` is the virtual root (which has no siblings), returns an empty tuple.\n        If ``u`` is an isolated node, whether it has siblings or not depends on\n        whether it is a sample or non-sample node; if it is a sample node,\n        returns the root(s) of the tree, otherwise, returns an empty tuple.\n        The ordering of siblings  is arbitrary and should not be depended on;\n        see the :ref:`data model <sec_data_model_tree_structure>` section for details.\n\n        :param int u: The node of interest.\n        :return: The siblings of ``u``.\n        :rtype: tuple(int)\n        \"\"\"\n        if u == self.virtual_root:\n            return tuple()\n        parent = self.parent(u)\n        if self.is_root(u):\n            parent = self.virtual_root\n        if parent != tskit.NULL:\n            return tuple(v for v in self.children(parent) if u != v)\n        return tuple()\n\n    @property\n    def num_children_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) encoding the number of children of\n        each node in this tree, such that\n        ``tree.num_children_array[u] == tree.num_children(u)`` for all\n        ``0 <= u <= ts.num_nodes``. See the :meth:`~.num_children`\n        method for details on the semantics of tree num_children and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._num_children_array\n\n    def edge(self, u):\n        \"\"\"\n        Returns the id of the edge encoding the relationship between ``u``\n        and its parent, or :data:`tskit.NULL` if ``u`` is a root, virtual root\n        or is not a node in the current tree.\n\n        :param int u: The node of interest.\n        :return: Id of edge connecting u to its parent.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_edge(u)\n\n    @property\n    def edge_array(self):\n        \"\"\"\n        A numpy array (dtype=np.int32) of edge ids encoding the relationship\n        between the child node ``u`` and its parent, such that\n        ``tree.edge_array[u] == tree.edge(u)`` for all\n        ``0 <= u <= ts.num_nodes``. See the :meth:`~.edge`\n        method for details on the semantics of tree edge and the\n        :ref:`sec_data_model_tree_structure` section for information on the\n        quintuply linked tree encoding.\n\n        .. include:: substitutions/virtual_root_array_note.rst\n\n        .. include:: substitutions/tree_array_warning.rst\n        \"\"\"\n        return self._edge_array\n\n    # Sample list.\n\n    def left_sample(self, u):\n        return self._ll_tree.get_left_sample(u)\n\n    def right_sample(self, u):\n        return self._ll_tree.get_right_sample(u)\n\n    def next_sample(self, u):\n        return self._ll_tree.get_next_sample(u)\n\n    @property\n    def virtual_root(self):\n        \"\"\"\n        The ID of the virtual root in this tree. This is equal to\n        :attr:`TreeSequence.num_nodes`.\n\n        Please see the :ref:`tree roots <sec_data_model_tree_roots>`\n        section for more details.\n        \"\"\"\n        return self._ll_tree.get_virtual_root()\n\n    @property\n    def num_edges(self):\n        \"\"\"\n        The total number of edges in this tree. This is equal to the\n        number of tree sequence edges that intersect with this tree's\n        genomic interval.\n\n        Note that this may be greater than the number of branches that\n        are reachable from the tree's roots, since we can have topology\n        that is not associated with any samples.\n        \"\"\"\n        return self._ll_tree.get_num_edges()\n\n    @property\n    def left_root(self):\n        \"\"\"\n        The leftmost root in this tree. If there are multiple roots\n        in this tree, they are siblings of this node, and so we can\n        use :meth:`.right_sib` to iterate over all roots:\n\n        .. code-block:: python\n\n            u = tree.left_root\n            while u != tskit.NULL:\n                print(\"Root:\", u)\n                u = tree.right_sib(u)\n\n        The left-to-right ordering of roots is arbitrary and should\n        not be depended on; see the\n        :ref:`data model <sec_data_model_tree_structure>`\n        section for details.\n\n        This is a low-level method giving access to the quintuply linked\n        tree structure in memory; the :attr:`~Tree.roots` attribute is a more\n        convenient way to obtain the roots of a tree. If you are assuming\n        that there is a single root in the tree you should use the\n        :attr:`~Tree.root` property.\n\n        .. warning:: Do not use this property if you are assuming that there\n            is a single root in trees that are being processed. The\n            :attr:`~Tree.root` property should be used in this case, as it will\n            raise an error when multiple roots exists.\n\n        :rtype: int\n        \"\"\"\n        return self.left_child(self.virtual_root)\n\n    @property\n    def right_root(self):\n        return self.right_child(self.virtual_root)\n\n    def get_children(self, u):\n        # Deprecated alias for self.children\n        return self.children(u)\n\n    def children(self, u):\n        \"\"\"\n        Returns the children of the specified node ``u`` as a tuple of integer node IDs.\n        If ``u`` is a leaf, return the empty tuple. The ordering of children\n        is arbitrary and should not be depended on; see the\n        :ref:`data model <sec_data_model_tree_structure>` section\n        for details.\n\n        :param int u: The node of interest.\n        :return: The children of ``u`` as a tuple of integers\n        :rtype: tuple(int)\n        \"\"\"\n        return self._ll_tree.get_children(u)\n\n    def get_time(self, u):\n        # Deprecated alias for self.time\n        return self.time(u)\n\n    def time(self, u):\n        \"\"\"\n        Returns the time of the specified node. This is equivalently\n        to ``tree.tree_sequence.node(u).time`` except for the special\n        case of the tree's :ref:`virtual root <sec_data_model_tree_roots>`,\n        which is defined as positive infinity.\n\n        :param int u: The node of interest.\n        :return: The time of u.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree.get_time(u)\n\n    def depth(self, u):\n        \"\"\"\n        Returns the number of nodes on the path from ``u`` to a\n        root, not including ``u``. Thus, the depth of a root is\n        zero.\n\n        As a special case, the depth of the :ref:`virtual root\n        <sec_data_model_tree_roots>` is defined as -1.\n\n        :param int u: The node of interest.\n        :return: The depth of u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.depth(u)\n\n    def get_population(self, u):\n        # Deprecated alias for self.population\n        return self.population(u)\n\n    def population(self, u):\n        \"\"\"\n        Returns the population associated with the specified node.\n        Equivalent to ``tree.tree_sequence.node(u).population``.\n\n        :param int u: The node of interest.\n        :return: The ID of the population associated with node u.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_population(u)\n\n    def is_internal(self, u):\n        \"\"\"\n        Returns True if the specified node is not a leaf. A node is internal\n        if it has one or more children in the current tree.\n\n        :param int u: The node of interest.\n        :return: True if u is not a leaf node.\n        :rtype: bool\n        \"\"\"\n        return not self.is_leaf(u)\n\n    def is_leaf(self, u):\n        \"\"\"\n        Returns True if the specified node is a leaf. A node :math:`u` is a\n        leaf if it has zero children.\n\n        .. note::\n            :math:`u` can be any node in the entire tree sequence, including ones\n            which are not connected via branches to a root node of the tree (and which\n            are therefore not conventionally considered part of the tree). Indeed, if\n            there are many trees in the tree sequence, it is common for the majority of\n            non-sample nodes to be :meth:`isolated<is_isolated>` in any one\n            tree. By the definition above, this method will return ``True`` for such\n            a tree when a node of this sort is specified. Such nodes can be thought of\n            as \"dead leaves\", see :ref:`sec_data_model_tree_dead_leaves_and_branches`.\n\n        :param int u: The node of interest.\n        :return: True if u is a leaf node.\n        :rtype: bool\n        \"\"\"\n        return len(self.children(u)) == 0\n\n    def is_isolated(self, u):\n        \"\"\"\n        Returns True if the specified node is isolated in this tree: that is\n        it has no parents and no children (note that all isolated nodes in the tree\n        are therefore also :meth:`leaves<Tree.is_leaf>`). Sample nodes that are isolated\n        and have no mutations above them are used to represent\n        :ref:`missing data<sec_data_model_missing_data>`.\n\n        :param int u: The node of interest.\n        :return: True if u is an isolated node.\n        :rtype: bool\n        \"\"\"\n        return self.num_children(u) == 0 and self.parent(u) == NULL\n\n    def is_sample(self, u):\n        \"\"\"\n        Returns True if the specified node is a sample. A node :math:`u` is a\n        sample if it has been marked as a sample in the parent tree sequence.\n\n        :param int u: The node of interest.\n        :return: True if u is a sample.\n        :rtype: bool\n        \"\"\"\n        return bool(self._ll_tree.is_sample(u))\n\n    def is_descendant(self, u, v):\n        \"\"\"\n        Returns True if the specified node u is a descendant of node v and False\n        otherwise. A node :math:`u` is a descendant of another node :math:`v` if\n        :math:`v` is on the path from :math:`u` to root. A node is considered\n        to be a descendant of itself, so ``tree.is_descendant(u, u)`` will be\n        True for any valid node.\n\n        :param int u: The descendant node.\n        :param int v: The ancestral node.\n        :return: True if u is a descendant of v.\n        :rtype: bool\n        :raises ValueError: If u or v are not valid node IDs.\n        \"\"\"\n        return bool(self._ll_tree.is_descendant(u, v))\n\n    @property\n    def num_nodes(self):\n        \"\"\"\n        Returns the number of nodes in the :class:`TreeSequence` this tree is in.\n        Equivalent to ``tree.tree_sequence.num_nodes``.\n\n        .. deprecated:: 0.4\n            Use :attr:`Tree.tree_sequence.num_nodes<TreeSequence.num_nodes>` if you want\n            the number of nodes in the entire tree sequence, or\n            ``len(tree.preorder())`` to find the number of nodes that are\n            reachable from all roots in this tree.\n\n        :rtype: int\n\n        \"\"\"\n        warnings.warn(\n            \"This property is a deprecated alias for Tree.tree_sequence.num_nodes \"\n            \"and will be removed in the future. To obtain the number of nodes \"\n            \"in the topology of the current tree (i.e. reachable from the roots) \"\n            \"use len(tree.preorder()).\",\n            FutureWarning,\n            stacklevel=4,\n        )\n        return self.tree_sequence.num_nodes\n\n    @property\n    def num_roots(self):\n        \"\"\"\n        The number of roots in this tree, as defined in the :attr:`~Tree.roots`\n        attribute.\n\n        Only requires O(number of roots) time.\n\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_num_roots()\n\n    @property\n    def has_single_root(self):\n        \"\"\"\n        ``True`` if this tree has a single root, ``False`` otherwise.\n        Equivalent to tree.num_roots == 1. This is a O(1) operation.\n\n        :rtype: bool\n        \"\"\"\n        root = self.left_root\n        if root != NULL and self.right_sib(root) == NULL:\n            return True\n        return False\n\n    @property\n    def has_multiple_roots(self):\n        \"\"\"\n        ``True`` if this tree has more than one root, ``False`` otherwise.\n        Equivalent to tree.num_roots > 1. This is a O(1) operation.\n\n        :rtype: bool\n        \"\"\"\n        root = self.left_root\n        if root != NULL and self.right_sib(root) != NULL:\n            return True\n        return False\n\n    @property\n    def roots(self):\n        \"\"\"\n        The list of roots in this tree. A root is defined as a unique endpoint of the\n        paths starting at samples, subject to the condition that it is connected to at\n        least :attr:`root_threshold` samples. We can define the set of roots as follows:\n\n        .. code-block:: python\n\n            roots = set()\n            for u in tree_sequence.samples():\n                while tree.parent(u) != tskit.NULL:\n                    u = tree.parent(u)\n                if tree.num_samples(u) >= tree.root_threshold:\n                    roots.add(u)\n            # roots is now the set of all roots in this tree.\n            assert sorted(roots) == sorted(tree.roots)\n\n        The roots of the tree are returned in a list, in no particular order.\n\n        Only requires O(number of roots) time.\n\n        .. note::\n            In trees with large amounts of :ref:`sec_data_model_missing_data`,\n            for example where a region of the genome lacks any ancestral information,\n            there can be a very large number of roots, potentially all the samples\n            in the tree sequence.\n\n        :return: The list of roots in this tree.\n        :rtype: list\n        \"\"\"\n        roots = []\n        u = self.left_root\n        while u != NULL:\n            roots.append(u)\n            u = self.right_sib(u)\n        return roots\n\n    def get_root(self):\n        # Deprecated alias for self.root\n        return self.root\n\n    @property\n    def root(self):\n        \"\"\"\n        The root of this tree. If the tree contains multiple roots, a ValueError is\n        raised indicating that the :attr:`~Tree.roots` attribute should be used instead.\n\n        :return: The root node.\n        :rtype: int\n        :raises ValueError: if this tree contains more than one root.\n        \"\"\"\n        if self.has_multiple_roots:\n            raise ValueError(\"More than one root exists. Use tree.roots instead\")\n        return self.left_root\n\n    def is_root(self, u) -> bool:\n        \"\"\"\n        Returns ``True`` if the specified node is a root in this tree (see\n        :attr:`~Tree.roots` for the definition of a root). This is exactly equivalent to\n        finding the node ID in :attr:`~Tree.roots`, but is more efficient for trees\n        with large numbers of roots, such as in regions with extensive\n        :ref:`sec_data_model_missing_data`.  Note that ``False`` is returned for all\n        other nodes, including :ref:`isolated<sec_data_model_tree_isolated_nodes>`\n        non-sample nodes which are not found in the topology of the current tree.\n\n        :param int u: The node of interest.\n        :return: ``True`` if u is a root.\n        \"\"\"\n        return (\n            self.num_samples(u) >= self.root_threshold and self.parent(u) == tskit.NULL\n        )\n\n    def get_index(self):\n        # Deprecated alias for self.index\n        return self.index\n\n    @property\n    def index(self):\n        \"\"\"\n        Returns the index this tree occupies in the parent tree sequence.\n        This index is zero based, so the first tree in the sequence has index 0.\n\n        :return: The index of this tree.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_index()\n\n    def get_interval(self):\n        # Deprecated alias for self.interval\n        return self.interval\n\n    @property\n    def interval(self):\n        \"\"\"\n        Returns the coordinates of the genomic interval that this tree\n        represents the history of. The interval is returned as a tuple\n        :math:`(l, r)` and is a half-open interval such that the left\n        coordinate is inclusive and the right coordinate is exclusive. This\n        tree therefore applies to all genomic locations :math:`x` such that\n        :math:`l \\\\leq x < r`.\n\n        :return: A named tuple (l, r) representing the left-most (inclusive)\n            and right-most (exclusive) coordinates of the genomic region\n            covered by this tree. The coordinates can be accessed by index\n            (``0`` or ``1``) or equivalently by name (``.left`` or ``.right``)\n        :rtype: Interval\n        \"\"\"\n        return Interval(self._ll_tree.get_left(), self._ll_tree.get_right())\n\n    def get_length(self):\n        # Deprecated alias for self.span\n        return self.length\n\n    @property\n    def length(self):\n        # Deprecated alias for self.span\n        return self.span\n\n    @property\n    def span(self):\n        \"\"\"\n        Returns the genomic distance that this tree spans.\n        This is defined as :math:`r - l`, where :math:`(l, r)` is the genomic\n        interval returned by :attr:`~Tree.interval`.\n\n        :return: The genomic distance covered by this tree.\n        :rtype: float\n        \"\"\"\n        return self.interval.span\n\n    @property\n    def mid(self):\n        \"\"\"\n        Returns the midpoint of the genomic interval that this tree represents\n        the history of. This is defined as :math:`(l + (r - l) / 2)`, where\n        :math:`(l, r)` is the genomic interval returned by\n        :attr:`~Tree.interval`.\n\n        :return: The genomic distance covered by this tree.\n        :rtype: float\n        \"\"\"\n        return self.interval.mid\n\n    def get_sample_size(self):\n        # Deprecated alias for self.sample_size\n        return self.sample_size\n\n    @property\n    def sample_size(self):\n        # Deliberately undocumented but kept for backwards compatibility.\n        # The proper way to access this is via tree.tree_sequence.num_samples\n        return self._ll_tree.get_sample_size()\n\n    def draw_text(\n        self,\n        orientation=None,\n        *,\n        node_labels=None,\n        max_time=None,\n        use_ascii=False,\n        order=None,\n    ):\n        \"\"\"\n        Create a text representation of a tree.\n\n        :param str orientation: one of ``\"top\"``, ``\"left\"``, ``\"bottom\"``, or\n            ``\"right\"``, specifying the margin on which the root is placed. Specifying\n            ``\"left\"`` or ``\"right\"`` will lead to time being shown on the x axis (i.e.\n            a \"horizontal\" tree. If ``None`` (default) use the standard coalescent\n            arrangement of a vertical tree with recent nodes at the bottom of the plot\n            and older nodes above.\n        :param dict node_labels: If specified, show custom labels for the nodes\n            that are present in the map. Any nodes not specified in the map will\n            not have a node label.\n        :param str max_time: If equal to ``\"tree\"`` (the default), the maximum time\n            is set to be that of the oldest root in the tree. If equal to ``\"ts\"`` the\n            maximum time is set to be the time of the oldest root in the tree\n            sequence; this is useful when drawing trees from the same tree sequence as it\n            ensures that node heights are consistent.\n        :param bool use_ascii: If ``False`` (default) then use unicode\n            `box drawing characters \\\n<https://en.wikipedia.org/wiki/Box-drawing_character>`_\n            to render the tree. If ``True``, use plain ascii characters, which look\n            cruder but are less susceptible to misalignment or font substitution.\n            Alternatively, if you are having alignment problems with Unicode, you can try\n            out the solution documented `here \\\n<https://github.com/tskit-dev/tskit/issues/189#issuecomment-499114811>`_.\n        :param str order: The left-to-right ordering of child nodes in the drawn tree.\n            This can be either: ``\"minlex\"``, which minimises the differences\n            between adjacent trees (see also the ``\"minlex_postorder\"`` traversal\n            order for the :meth:`.nodes` method); or ``\"tree\"`` which draws trees\n            in the left-to-right order defined by the\n            :ref:`quintuply linked tree structure <sec_data_model_tree_structure>`.\n            If not specified or None, this defaults to ``\"minlex\"``.\n\n        :return: A text representation of a tree.\n        :rtype: str\n        \"\"\"\n        orientation = drawing.check_orientation(orientation)\n        if orientation in (drawing.LEFT, drawing.RIGHT):\n            text_tree = drawing.HorizontalTextTree(\n                self,\n                orientation=orientation,\n                node_labels=node_labels,\n                max_time=max_time,\n                use_ascii=use_ascii,\n                order=order,\n            )\n        else:\n            text_tree = drawing.VerticalTextTree(\n                self,\n                orientation=orientation,\n                node_labels=node_labels,\n                max_time=max_time,\n                use_ascii=use_ascii,\n                order=order,\n            )\n        return str(text_tree)\n\n    def draw_svg(\n        self,\n        path=None,\n        *,\n        size=None,\n        time_scale=None,\n        tree_height_scale=None,\n        title=None,\n        max_time=None,\n        min_time=None,\n        max_tree_height=None,\n        node_labels=None,\n        mutation_labels=None,\n        node_titles=None,\n        mutation_titles=None,\n        root_svg_attributes=None,\n        style=None,\n        order=None,\n        force_root_branch=None,\n        symbol_size=None,\n        x_axis=None,\n        x_label=None,\n        x_regions=None,\n        y_axis=None,\n        y_label=None,\n        y_ticks=None,\n        y_gridlines=None,\n        all_edge_mutations=None,\n        omit_sites=None,\n        canvas_size=None,\n        preamble=None,\n        **kwargs,\n    ):\n        \"\"\"\n        Return an SVG representation of a single tree. By default, numeric\n        labels are drawn beside nodes and mutations: these can be altered using the\n        ``node_labels`` and ``mutation_labels`` parameters. See the\n        :ref:`visualization tutorial<tutorials:sec_tskit_viz>` for more details.\n\n        :param str path: The path to the file to write the output. If None, do not\n            write to file.\n        :param tuple(int, int) size: A tuple of (width, height) specifying a target\n            drawing size in abstract user units (usually interpreted as pixels on\n            initial display). Components of the drawing will be scaled so that the total\n            plot including labels etc. normally fits onto a canvas of this size (see\n            ``canvas_size`` below). If ``None``, pick a size appropriate for a tree\n            with a reasonably small number (i.e. tens) of samples. Default: ``None``\n        :type size:\n        :param str time_scale: Control how height values for nodes are computed.\n            If this is equal to ``\"time\"`` (the default), node heights are proportional\n            to their time values. If this is equal to ``\"log_time\"``, node heights are\n            proportional to their log(time) values. If it is equal to ``\"rank\"``, node\n            heights are spaced equally according to their ranked times.\n        :param str tree_height_scale: Deprecated alias for time_scale. (Deprecated in\n                0.3.6)\n        :param str title: A title string to be included in the SVG output. If ``None``\n            (default) no title is shown, which gives more vertical space for the tree.\n        :param str,float max_time: The maximum plotted time value in the current\n            scaling system (see ``time_scale``). Can be either a string or a\n            numeric value. If equal to ``\"tree\"`` (the default), the maximum time\n            is set to be that of the oldest root in the tree. If equal to ``\"ts\"`` the\n            maximum time is set to be the time of the oldest root in the tree\n            sequence; this is useful when drawing trees from the same tree sequence as it\n            ensures that node heights are consistent. If a numeric value, this is used as\n            the maximum plotted time by which to scale other nodes.\n        :param str,float min_time: The minimum plotted time value in the current\n            scaling system (see ``time_scale``). Can be either a string or a\n            numeric value. If equal to ``\"tree\"`` (the default), the minimum time\n            is set to be that of the youngest node in the tree. If equal to ``\"ts\"`` the\n            minimum time is set to be the time of the youngest node in the tree\n            sequence; this is useful when drawing trees from the same tree sequence as it\n            ensures that node heights are consistent. If a numeric value, this is used as\n            the minimum plotted time.\n        :param str,float max_tree_height: Deprecated alias for max_time. (Deprecated in\n            0.3.6)\n        :param node_labels: If specified, show custom labels for the nodes\n            (specified by ID) that are present in this map; any nodes not present will\n            not have a label. To use a metadata key, for example, use\n            ``node_labels={node.id: node.metadata[\"key\"] for node in ts.nodes()}``.\n        :type node_labels: dict(int, str)\n        :param mutation_labels: If specified, show custom labels for the\n            mutations (specified by ID) that are present in the map; any mutations\n            not present will not have a label.\n        :type mutation_labels: dict(int, str)\n        :param dict(int, str) node_titles: If specified, add a ``<title>`` string to\n            symbols for each node (specified by ID) present in this map. SVG visualizers\n            such as web browsers will commonly display this string on mousing over the\n            node symbol.\n        :param dict(int, str) mutation_titles: If specified, add a ``<title>`` string to\n            symbols for each mutation (specified by ID) present in this map. SVG\n            visualizers such as web browsers will commonly display this string on\n            mousing over the mutation symbol in the tree and (if show) on the x axis.\n        :param dict root_svg_attributes: Additional attributes, such as an id, that will\n            be embedded in the root ``<svg>`` tag of the generated drawing.\n        :param str style: A\n            `css style string <https://www.w3.org/TR/CSS22/syndata.html>`_ that will be\n            included in the ``<style>`` tag of the generated svg.\n        :param str order: The left-to-right ordering of child nodes in the drawn tree.\n            This can be either: ``\"minlex\"``, which minimises the differences\n            between adjacent trees (see also the ``\"minlex_postorder\"`` traversal\n            order for the :meth:`.nodes` method); or ``\"tree\"`` which draws trees\n            in the left-to-right order defined by the\n            :ref:`quintuply linked tree structure <sec_data_model_tree_structure>`.\n            If not specified or None, this defaults to ``\"minlex\"``.\n        :param bool force_root_branch: If ``True`` always plot a branch (edge) above the\n            root(s) in the tree. If ``None`` (default) then only plot such root branches\n            if there is a mutation above a root of the tree.\n        :param float symbol_size: Change the default size of the node and mutation\n            plotting symbols. If ``None`` (default) use a standard size.\n        :param bool x_axis: Should the plot have an X axis line, showing the start and\n            end position of this tree along the genome. If ``None`` (default) do not\n            plot an X axis.\n        :param str x_label: Place a label under the plot. If ``None`` (default) and\n            there is an X axis, create and place an appropriate label.\n        :param dict x_regions: A dictionary mapping (left, right) tuples to names. This\n            draws a box, labelled with the name, on the X axis between the left and\n            right positions, and can be used for annotating genomic regions (e.g.\n            genes) on the X axis. If ``None`` (default) do not plot any regions.\n        :param bool y_axis: Should the plot have an Y axis line, showing time (or\n            ranked node time if ``time_scale=\"rank\"``). If ``None`` (default)\n            do not plot a Y axis.\n        :param str y_label: Place a label to the left of the plot. If ``None`` (default)\n            and there is a Y axis,  create and place an appropriate label.\n        :param Union[list, dict] y_ticks: A list of Y values at which to plot\n            tickmarks, or a dictionary mapping Y values to labels (``[]`` gives no\n            tickmarks). If ``None`` (default), plot one tickmark for each unique node\n            value. Note that if ``time_scale=\"rank\"``, the Y values refer to the\n            zero-based rank of the plotted nodes, rather than the node time itself.\n        :param bool y_gridlines: Whether to plot horizontal lines behind the tree\n            at each y tickmark.\n        :param bool all_edge_mutations: The edge on which a mutation occurs may span\n            multiple trees. If ``False`` or ``None`` (default) mutations are only drawn\n            on an edge if their site position exists within the genomic interval covered\n            by this tree. If ``True``, all mutations on each edge of the tree are drawn,\n            even if their genomic position is to the left or right of the tree\n            itself. Note that this means that independent drawings of different trees\n            from the same tree sequence may share some plotted mutations.\n        :param bool omit_sites: If True, omit sites and mutations from the drawing.\n            Default: False\n        :param tuple(int, int) canvas_size: The (width, height) of the SVG canvas.\n            This will change the SVG width and height without rescaling graphical\n            elements, allowing extra room e.g. for unusually long labels. If ``None``\n            take the canvas size to be the same as the target drawing size (see\n            ``size``, above). Default: None\n        :param str preamble: SVG commands to be included at the start of the returned\n            object, immediately after the opening tag. These can include custom svg\n            elements such as legends or annotations or even entire ``<svg>`` elements.\n            The preamble is not checked for validity, so it is up to the user to\n            ensure that it is valid SVG. Default: None\n\n        :return: An SVG representation of a tree.\n        :rtype: SVGString\n        \"\"\"\n        svgtree = drawing.SvgTree(\n            self,\n            size,\n            time_scale=time_scale,\n            tree_height_scale=tree_height_scale,\n            title=title,\n            max_time=max_time,\n            min_time=min_time,\n            max_tree_height=max_tree_height,\n            node_labels=node_labels,\n            mutation_labels=mutation_labels,\n            node_titles=node_titles,\n            mutation_titles=mutation_titles,\n            root_svg_attributes=root_svg_attributes,\n            style=style,\n            order=order,  # NB undocumented: Tree.draw_svg can also take an iterable here\n            force_root_branch=force_root_branch,\n            symbol_size=symbol_size,\n            x_axis=x_axis,\n            x_label=x_label,\n            x_regions=x_regions,\n            y_axis=y_axis,\n            y_label=y_label,\n            y_ticks=y_ticks,\n            y_gridlines=y_gridlines,\n            all_edge_mutations=all_edge_mutations,\n            omit_sites=omit_sites,\n            canvas_size=canvas_size,\n            preamble=preamble,\n            **kwargs,\n        )\n        return svgtree.draw(path)\n\n    def draw(\n        self,\n        path=None,\n        width=None,\n        height=None,\n        node_labels=None,\n        node_colours=None,\n        mutation_labels=None,\n        mutation_colours=None,\n        format=None,  # noqa A002\n        edge_colours=None,\n        time_scale=None,\n        tree_height_scale=None,\n        max_time=None,\n        min_time=None,\n        max_tree_height=None,\n        order=None,\n        omit_sites=None,\n    ):\n        \"\"\"\n        Returns a drawing of this tree.\n\n        When working in a Jupyter notebook, use the ``IPython.display.SVG``\n        function to display the SVG output from this function inline in the notebook::\n\n            SVG(tree.draw())\n\n        The unicode format uses unicode `box drawing characters\n        <https://en.wikipedia.org/wiki/Box-drawing_character>`_ to render the tree.\n        This allows rendered trees to be printed out to the terminal::\n\n            print(tree.draw(format=\"unicode\"))\n              6\n            ┏━┻━┓\n            ┃   5\n            ┃ ┏━┻┓\n            ┃ ┃  4\n            ┃ ┃ ┏┻┓\n            3 0 1 2\n\n        The ``node_labels`` argument allows the user to specify custom labels\n        for nodes, or no labels at all::\n\n            print(tree.draw(format=\"unicode\", node_labels={}))\n              ┃\n            ┏━┻━┓\n            ┃   ┃\n            ┃ ┏━┻┓\n            ┃ ┃  ┃\n            ┃ ┃ ┏┻┓\n            ┃ ┃ ┃ ┃\n\n        Note: in some environments such as Jupyter notebooks with Windows or Mac,\n        users have observed that the Unicode box drawings can be misaligned. In\n        these cases, we recommend using the SVG or ASCII display formats instead.\n        If you have a strong preference for aligned Unicode, you can try out the\n        solution documented\n        `here <https://github.com/tskit-dev/tskit/issues/189#issuecomment-499114811>`_.\n\n        :param str path: The path to the file to write the output. If None, do not\n            write to file.\n        :param int width: The width of the image in pixels. If not specified, either\n            defaults to the minimum size required to depict the tree (text formats)\n            or 200 pixels.\n        :param int height: The height of the image in pixels. If not specified, either\n            defaults to the minimum size required to depict the tree (text formats)\n            or 200 pixels.\n        :param dict node_labels: If specified, show custom labels for the nodes\n            that are present in the map. Any nodes not specified in the map will\n            not have a node label.\n        :param dict node_colours: If specified, show custom colours for the nodes\n            given in the map. Any nodes not specified in the map will take the default\n            colour; a value of ``None`` is treated as transparent and hence the node\n            symbol is not plotted. (Only supported in the SVG format.)\n        :param dict mutation_labels: If specified, show custom labels for the mutations\n            (specified by ID) that are present in the map. Any mutations not in the map\n            will not have a label. (Showing mutations is currently only supported in the\n            SVG format)\n        :param dict mutation_colours: If specified, show custom colours for the mutations\n            given in the map (specified by ID). As for ``node_colours``, mutations not\n            present in the map take the default colour, and those mapping to ``None``\n            are not drawn. (Only supported in the SVG format.)\n        :param str format: The format of the returned image. Currently supported\n            are 'svg', 'ascii' and 'unicode'. Note that the :meth:`Tree.draw_svg`\n            method provides more comprehensive functionality for creating SVGs.\n        :param dict edge_colours: If specified, show custom colours for the edge\n            joining each node in the map to its parent. As for ``node_colours``,\n            unspecified edges take the default colour, and ``None`` values result in the\n            edge being omitted. (Only supported in the SVG format.)\n        :param str time_scale: Control how height values for nodes are computed.\n            If this is equal to ``\"time\"``, node heights are proportional to their time\n            values. If this is equal to ``\"log_time\"``, node heights are proportional to\n            their log(time) values. If it is equal to ``\"rank\"``, node heights are spaced\n            equally according to their ranked times. For SVG output the default is\n            'time'-scale whereas for text output the default is 'rank'-scale.\n            Time scaling is not currently supported for text output.\n        :param str tree_height_scale: Deprecated alias for time_scale. (Deprecated in\n                0.3.6)\n        :param str,float max_time: The maximum time value in the current\n            scaling system (see ``time_scale``). Can be either a string or a\n            numeric value. If equal to ``\"tree\"``, the maximum time is set to be\n            that of the oldest root in the tree. If equal to ``\"ts\"`` the maximum\n            time is set to be the time of the oldest root in the tree sequence;\n            this is useful when drawing trees from the same tree sequence as it ensures\n            that node heights are consistent. If a numeric value, this is used as the\n            maximum time by which to scale other nodes. This parameter\n            is not currently supported for text output.\n        :param str,float min_time: The minimum time value in the current\n            scaling system (see ``time_scale``). Can be either a string or a\n            numeric value. If equal to ``\"tree\"``, the minimum time is set to be\n            that of the youngest node in the tree. If equal to ``\"ts\"`` the minimum\n            time is set to be the time of the youngest node in the tree sequence;\n            this is useful when drawing trees from the same tree sequence as it ensures\n            that node heights are consistent. If a numeric value, this is used as the\n            minimum time to display. This parameter is not currently supported for text\n            output.\n        :param str max_tree_height: Deprecated alias for max_time. (Deprecated in\n                0.3.6)\n        :param str order: The left-to-right ordering of child nodes in the drawn tree.\n            This can be either: ``\"minlex\"``, which minimises the differences\n            between adjacent trees (see also the ``\"minlex_postorder\"`` traversal\n            order for the :meth:`.nodes` method); or ``\"tree\"`` which draws trees\n            in the left-to-right order defined by the\n            :ref:`quintuply linked tree structure <sec_data_model_tree_structure>`.\n            If not specified or None, this defaults to ``\"minlex\"``.\n        :param bool omit_sites: If True, omit sites and mutations from the drawing\n            (only relevant to the SVG format). Default: False\n        :return: A representation of this tree in the requested format.\n        :rtype: str\n        \"\"\"\n        output = drawing.draw_tree(\n            self,\n            format=format,\n            width=width,\n            height=height,\n            node_labels=node_labels,\n            node_colours=node_colours,\n            mutation_labels=mutation_labels,\n            mutation_colours=mutation_colours,\n            edge_colours=edge_colours,\n            time_scale=time_scale,\n            tree_height_scale=tree_height_scale,\n            max_time=max_time,\n            min_time=min_time,\n            max_tree_height=max_tree_height,\n            order=order,\n            omit_sites=omit_sites,\n        )\n        if path is not None:\n            with open(path, \"w\") as f:\n                f.write(output)\n        return output\n\n    def get_num_mutations(self):\n        return self.num_mutations\n\n    @property\n    def num_mutations(self):\n        \"\"\"\n        Returns the total number of mutations across all sites on this tree.\n\n        :return: The total number of mutations over all sites on this tree.\n        :rtype: int\n        \"\"\"\n        return sum(len(site.mutations) for site in self.sites())\n\n    @property\n    def num_sites(self):\n        \"\"\"\n        Returns the number of sites on this tree.\n\n        :return: The number of sites on this tree.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_num_sites()\n\n    def sites(self):\n        \"\"\"\n        Returns an iterator over all the :ref:`sites <sec_site_table_definition>`\n        in this tree. Sites are returned in order of increasing ID\n        (and also position). See the :class:`Site` class for details on\n        the available fields for each site.\n\n        :return: An iterator over all sites in this tree.\n        \"\"\"\n        # TODO change the low-level API to just return the IDs of the sites.\n        for ll_site in self._ll_tree.get_sites():\n            _, _, _, id_, _ = ll_site\n            yield self.tree_sequence.site(id_)\n\n    def mutations(self):\n        \"\"\"\n        Returns an iterator over all the\n        :ref:`mutations <sec_mutation_table_definition>` in this tree.\n        Mutations are returned in their\n        :ref:`order in the mutations table<sec_mutation_requirements>`,\n        that is, by nondecreasing site ID, and within a site, by decreasing\n        mutation time with parent mutations before their children.\n        See the :class:`Mutation` class for details on the available fields for\n        each mutation.\n\n        The returned iterator is equivalent to iterating over all sites\n        and all mutations in each site, i.e.::\n\n            for site in tree.sites():\n                for mutation in site.mutations:\n                    yield mutation\n\n        :return: An iterator over all :class:`Mutation` objects in this tree.\n        :rtype: iter(:class:`Mutation`)\n        \"\"\"\n        for site in self.sites():\n            yield from site.mutations\n\n    def get_leaves(self, u):\n        # Deprecated alias for samples. See the discussion in the get_num_leaves\n        # method for why this method is here and why it is semantically incorrect.\n        # The 'leaves' iterator below correctly returns the leaves below a given\n        # node.\n        return self.samples(u)\n\n    def leaves(self, u=None):\n        \"\"\"\n        Returns an iterator over all the leaves in this tree that descend from\n        the specified node. If :math:`u`  is not specified, return all leaves on\n        the tree (i.e. all leaves reachable from the tree root(s), see note below).\n\n        .. note::\n            :math:`u` can be any node in the entire tree sequence, including ones\n            which are not connected via branches to a root node of the tree. If\n            called on such a node, the iterator will return \"dead\" leaves\n            (see :ref:`sec_data_model_tree_dead_leaves_and_branches`) which cannot\n            be reached from a root of this tree. However, dead leaves will never be\n            returned if :math:`u` is left unspecified.\n\n        :param int u: The node of interest.\n        :return: An iterator over all leaves in the subtree rooted at u.\n        :rtype: collections.abc.Iterable\n        \"\"\"\n        roots = [u]\n        if u is None:\n            roots = self.roots\n        for root in roots:\n            for v in self.nodes(root):\n                if self.is_leaf(v):\n                    yield v\n\n    def _sample_generator(self, u):\n        if self._ll_tree.get_options() & _tskit.SAMPLE_LISTS:\n            samples = self.tree_sequence.samples()\n            index = self.left_sample(u)\n            if index != NULL:\n                stop = self.right_sample(u)\n                while True:\n                    yield samples[index]\n                    if index == stop:\n                        break\n                    index = self.next_sample(index)\n        else:\n            # Fall back on iterating over all nodes in the tree, yielding\n            # samples as we see them.\n            for v in self.nodes(u):\n                if self.is_sample(v):\n                    yield v\n\n    def samples(self, u=None):\n        \"\"\"\n        Returns an iterator over the numerical IDs of all the sample nodes in\n        this tree that are underneath the node with ID ``u``. If ``u`` is a sample,\n        it is included in the returned iterator. If ``u`` is not a sample, it is\n        possible for the returned iterator to be empty, for example if ``u`` is an\n        :meth:`isolated<Tree.is_isolated>` node that is not part of the the current\n        topology. If u is not specified, return all sample node IDs in the tree\n        (equivalent to all the sample node IDs in the tree sequence).\n\n        If the :meth:`TreeSequence.trees` method is called with\n        ``sample_lists=True``, this method uses an efficient algorithm to find\n        the sample nodes. If not, a simple traversal based method is used.\n\n        .. note::\n\n            The iterator is *not* guaranteed to return the sample node IDs in\n            numerical or any other particular order.\n\n        :param int u: The node of interest.\n        :return: An iterator over all sample node IDs in the subtree rooted at u.\n        :rtype: collections.abc.Iterable\n        \"\"\"\n        roots = [u]\n        if u is None:\n            roots = self.roots\n        for root in roots:\n            yield from self._sample_generator(root)\n\n    def num_children(self, u):\n        \"\"\"\n        Returns the number of children of the specified\n        node (i.e., ``len(tree.children(u))``)\n\n        :param int u: The node of interest.\n        :return: The number of immediate children of the node u in this tree.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_num_children(u)\n\n    def get_num_leaves(self, u):\n        # Deprecated alias for num_samples. The method name is inaccurate\n        # as this will count the number of tracked _samples_. This is only provided to\n        # avoid breaking existing code and should not be used in new code. We could\n        # change this method to be semantically correct and just count the\n        # number of leaves we hit in the leaves() iterator. However, this would\n        # have the undesirable effect of making code that depends on the constant\n        # time performance of get_num_leaves many times slower. So, the best option\n        # is to leave this method as is, and to slowly deprecate it out. Once this\n        # has been removed, we might add in a ``num_leaves`` method that returns the\n        # length of the leaves() iterator as one would expect.\n        return self.num_samples(u)\n\n    def get_num_samples(self, u=None):\n        # Deprecated alias for num_samples.\n        return self.num_samples(u)\n\n    def num_samples(self, u=None):\n        \"\"\"\n        Returns the number of sample nodes in this tree underneath the specified\n        node (including the node itself). If u is not specified return\n        the total number of samples in the tree.\n\n        This is a constant time operation.\n\n        :param int u: The node of interest.\n        :return: The number of samples in the subtree rooted at u.\n        :rtype: int\n        \"\"\"\n        u = self.virtual_root if u is None else u\n        return self._ll_tree.get_num_samples(u)\n\n    def get_num_tracked_leaves(self, u):\n        # Deprecated alias for num_tracked_samples. The method name is inaccurate\n        # as this will count the number of tracked _samples_. This is only provided to\n        # avoid breaking existing code and should not be used in new code.\n        return self.num_tracked_samples(u)\n\n    def get_num_tracked_samples(self, u=None):\n        # Deprecated alias for num_tracked_samples\n        return self.num_tracked_samples(u)\n\n    def num_tracked_samples(self, u=None):\n        \"\"\"\n        Returns the number of samples in the set specified in the\n        ``tracked_samples`` parameter of the :meth:`TreeSequence.trees` method\n        underneath the specified node. If the input node is not specified,\n        return the total number of tracked samples in the tree.\n\n        This is a constant time operation.\n\n        :param int u: The node of interest.\n        :return: The number of samples within the set of tracked samples in\n            the subtree rooted at u.\n        :rtype: int\n        \"\"\"\n        u = self.virtual_root if u is None else u\n        return self._ll_tree.get_num_tracked_samples(u)\n\n    def preorder(self, u=NULL):\n        \"\"\"\n        Returns a numpy array of node ids in `preorder\n        <https://en.wikipedia.org/wiki/Tree_traversal#Pre-order_(NLR)>`_. If the node u\n        is specified the traversal is rooted at this node (and it will be the first\n        element in the returned array). Otherwise, all nodes reachable from the tree\n        roots will be returned. See :ref:`tutorials:sec_analysing_trees_traversals` for\n        examples.\n\n        :param int u: If specified, return all nodes in the subtree rooted at u\n            (including u) in traversal order.\n        :return: Array of node ids\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        return self._ll_tree.get_preorder(u)\n\n    def postorder(self, u=NULL):\n        \"\"\"\n        Returns a numpy array of node ids in `postorder\n        <https://en.wikipedia.org/wiki/Tree_traversal##Post-order_(LRN)>`_. If the node u\n        is specified the traversal is rooted at this node (and it will be the last\n        element in the returned array). Otherwise, all nodes reachable from the tree\n        roots will be returned. See :ref:`tutorials:sec_analysing_trees_traversals` for\n        examples.\n\n        :param int u: If specified, return all nodes in the subtree rooted at u\n            (including u) in traversal order.\n        :return: Array of node ids\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        return self._ll_tree.get_postorder(u)\n\n    def timeasc(self, u=NULL):\n        \"\"\"\n        Returns a numpy array of node ids. Starting at `u`, returns the reachable\n        descendant nodes in order of increasing time (most recent first), falling back\n        to increasing ID if times are equal. Also see\n        :ref:`tutorials:sec_analysing_trees_traversals` for examples of how to use\n        traversals.\n\n        :param int u: If specified, return all nodes in the subtree rooted at u\n            (including u) in traversal order.\n        :return: Array of node ids\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        nodes = self.preorder(u)\n        is_virtual_root = u == self.virtual_root\n        time = self.tree_sequence.nodes_time\n        if is_virtual_root:\n            # We could avoid creating this array if we wanted to, but\n            # it's not that often people will be using this with the\n            # virtual_root as an argument, so doesn't seem worth\n            # the complexity\n            time = np.append(time, [np.inf])\n        order = np.lexsort([nodes, time[nodes]])\n        return nodes[order]\n\n    def timedesc(self, u=NULL):\n        \"\"\"\n        Returns a numpy array of node ids. Starting at `u`, returns the reachable\n        descendant nodes in order of decreasing time (least recent first), falling back\n        to decreasing ID if times are equal. Also see\n        :ref:`tutorials:sec_analysing_trees_traversals` for examples of how to use\n        traversals.\n\n        :param int u: If specified, return all nodes in the subtree rooted at u\n            (including u) in traversal order.\n        :return: Array of node ids\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        return self.timeasc(u)[::-1]\n\n    def _preorder_traversal(self, root):\n        # Return Python integers for compatibility\n        return map(int, self.preorder(root))\n\n    def _postorder_traversal(self, root):\n        # Return Python integers for compatibility\n        return map(int, self.postorder(root))\n\n    def _inorder_traversal(self, root):\n        # TODO add a nonrecursive version of the inorder traversal.\n\n        def traverse(u):\n            children = self.get_children(u)\n            mid = len(children) // 2\n            for c in children[:mid]:\n                yield from traverse(c)\n            yield u\n            for c in children[mid:]:\n                yield from traverse(c)\n\n        roots = self.roots if root == NULL else [root]\n        for root in roots:\n            yield from traverse(root)\n\n    def _levelorder_traversal(self, root):\n        roots = self.roots if root == NULL else [root]\n        queue = collections.deque(roots)\n        # For perf we store these to avoid lookups in the tight loop\n        pop = queue.popleft\n        extend = queue.extend\n        children = self.children\n        # Note: the usual style is to be explicit about what we're testing\n        # and use while len(queue) > 0, but this form is slightly faster.\n        while queue:\n            v = pop()\n            extend(children(v))\n            yield v\n\n    def _timeasc_traversal(self, root):\n        \"\"\"\n        Sorts by increasing time but falls back to increasing ID for equal times.\n        \"\"\"\n        return map(int, self.timeasc(root))\n\n    def _timedesc_traversal(self, root):\n        \"\"\"\n        The reverse of timeasc.\n        \"\"\"\n        return map(int, self.timedesc(root))\n\n    def _minlex_postorder_traversal(self, root):\n        \"\"\"\n        Postorder traversal that visits leaves in minimum lexicographic order.\n\n        Minlex stands for minimum lexicographic. We wish to visit a tree in such\n        a way that the leaves visited, when their IDs are listed out, have\n        minimum lexicographic order. This is a useful ordering for drawing\n        multiple Trees of a TreeSequence, as it leads to more consistency\n        between adjacent Trees.\n        \"\"\"\n\n        # We compute a dictionary mapping from internal node ID to min leaf ID\n        # under the node, using a first postorder traversal\n        min_leaf = {}\n        for u in self.nodes(root, order=\"postorder\"):\n            if self.is_leaf(u):\n                min_leaf[u] = u\n            else:\n                min_leaf[u] = min(min_leaf[v] for v in self.children(u))\n\n        # If we deliberately specify the virtual root, it should also be returned\n        is_virtual_root = root == self.virtual_root\n        if root == -1:\n            root = self.virtual_root\n\n        stack = [(root, False)]\n        while len(stack) > 0:\n            u, visited = stack.pop()\n            if visited:\n                if u != self.virtual_root or is_virtual_root:\n                    yield u\n            else:\n                stack.append((u, True))  # Reappend, marking visited\n                stack.extend(\n                    sorted(\n                        ((c, False) for c in self.children(u)),\n                        key=lambda v: min_leaf[v[0]],\n                        reverse=True,\n                    )\n                )\n\n    def nodes(self, root=None, order=\"preorder\"):\n        \"\"\"\n        Returns an iterator over the node IDs reachable from the specified node in this\n        tree in the specified traversal order.\n\n        .. note::\n            Unlike the :meth:`TreeSequence.nodes` method, this iterator produces\n            integer node IDs, not :class:`Node` objects.\n\n        If the ``root`` parameter is not provided or ``None``, iterate over all\n        nodes reachable from the roots (see :attr:`Tree.roots` for details\n        on which nodes are considered roots). If the ``root`` parameter\n        is provided, only the nodes in the subtree rooted at this node\n        (including the specified node) will be iterated over. If the\n        :attr:`.virtual_root` is specified as the traversal root, it will\n        be included in the traversed nodes in the appropriate position\n        for the given ordering. (See the\n        :ref:`tree roots <sec_data_model_tree_virtual_root>` section for more\n        information on the virtual root.)\n\n        The ``order`` parameter defines the order in which tree nodes are visited\n        in the iteration (also see the :ref:`sec_analysing_trees_traversals` section\n        in the `tutorials <https://tskit.dev/tutorials>`__). The available orders are:\n\n        - 'preorder': starting at root, yield the current node, then recurse\n          and do a preorder on each child of the current node. See also `Wikipedia\n          <https://en.wikipedia.org/wiki/Tree_traversal#Pre-order_(NLR)>`__.\n        - 'inorder': starting at root, assuming binary trees, recurse and do\n          an inorder on the first child, then yield the current node, then\n          recurse and do an inorder on the second child. In the case of ``n``\n          child nodes (not necessarily 2), the first ``n // 2`` children are\n          visited in the first stage, and the remaining ``n - n // 2`` children\n          are visited in the second stage. See also `Wikipedia\n          <https://en.wikipedia.org/wiki/Tree_traversal#In-order_(LNR)>`__.\n        - 'postorder': starting at root, recurse and do a postorder on each\n          child of the current node, then yield the current node. See also\n          `Wikipedia\n          <https://en.wikipedia.org/wiki/Tree_traversal#Post-order_(LRN)>`__.\n        - 'levelorder' ('breadthfirst'): visit the nodes under root (including\n          the root) in increasing order of their depth from root. See also\n          `Wikipedia\n          <https://en.wikipedia.org/wiki/Tree_traversal\\\n#Breadth-first_search_/_level_order>`__.\n        - 'timeasc': visits the nodes in order of increasing time, falling back to\n          increasing ID if times are equal.\n        - 'timedesc': visits the nodes in order of decreasing time, falling back to\n          decreasing ID if times are equal.\n        - 'minlex_postorder': a usual postorder has ambiguity in the order in\n          which children of a node are visited. We constrain this by outputting\n          a postorder such that the leaves visited, when their IDs are\n          listed out, have minimum `lexicographic order\n          <https://en.wikipedia.org/wiki/Lexicographical_order>`__ out of all valid\n          traversals. This traversal is useful for drawing multiple trees of\n          a ``TreeSequence``, as it leads to more consistency between adjacent\n          trees. Note that internal non-leaf nodes are not counted in\n          assessing the lexicographic order.\n\n        :param int root: The root of the subtree we are traversing.\n        :param str order: The traversal ordering. Currently 'preorder',\n            'inorder', 'postorder', 'levelorder' ('breadthfirst'), 'timeasc' and\n            'timedesc' and 'minlex_postorder' are supported.\n        :return: An iterator over the node IDs in the tree in some traversal order.\n        :rtype: collections.abc.Iterable, int\n        \"\"\"\n        methods = {\n            \"preorder\": self._preorder_traversal,\n            \"inorder\": self._inorder_traversal,\n            \"postorder\": self._postorder_traversal,\n            \"levelorder\": self._levelorder_traversal,\n            \"breadthfirst\": self._levelorder_traversal,\n            \"timeasc\": self._timeasc_traversal,\n            \"timedesc\": self._timedesc_traversal,\n            \"minlex_postorder\": self._minlex_postorder_traversal,\n        }\n        try:\n            iterator = methods[order]\n        except KeyError:\n            raise ValueError(f\"Traversal ordering '{order}' not supported\")\n\n        root = -1 if root is None else root\n        return iterator(root)\n\n    def _as_newick_fast(self, *, root, precision, legacy_ms_labels):\n        \"\"\"\n        Call into the fast but limited C implementation of the newick conversion.\n        \"\"\"\n        root_time = max(1, self.time(root))\n        max_label_size = math.ceil(math.log10(self.tree_sequence.num_nodes))\n        single_node_size = (\n            5 + max_label_size + math.ceil(math.log10(root_time)) + precision\n        )\n        buffer_size = 1 + single_node_size * self.tree_sequence.num_nodes\n        return self._ll_tree.get_newick(\n            precision=precision,\n            root=root,\n            buffer_size=buffer_size,\n            legacy_ms_labels=legacy_ms_labels,\n        )\n\n    def as_newick(\n        self,\n        *,\n        root=None,\n        precision=None,\n        node_labels=None,\n        include_branch_lengths=None,\n    ):\n        \"\"\"\n        Returns a `newick encoding\n        <https://en.wikipedia.org/wiki/Newick_format>`_ of this tree.\n        For example, a binary tree with 3 leaves generated by\n        :meth:`Tree.generate_balanced(3)<Tree.generate_balanced>`\n        encodes as::\n\n            (n0:2,(n1:1,n2:1):1);\n\n        By default :ref:`sample nodes<sec_data_model_definitions>` are\n        labelled using the form ``f\"n{node_id}\"``, i.e. the sample node's\n        ID prefixed with the string ``\"n\"``. Node labels can be specified\n        explicitly using the ``node_labels`` argument, which is a mapping from\n        integer node IDs to the corresponding string label. If a node is not\n        present in the mapping, no label is associated with the node in\n        output.\n\n        .. warning:: Node labels are **not** Newick escaped, so care must be taken\n            to provide labels that will not break the encoding.\n\n        .. note:: Specifying a ``node_labels`` dictionary or setting\n            ``include_branch_lengths=False`` results in a less efficient\n            method being used to generate the newick output. The performance\n            difference can be substantial for large trees.\n\n        By default, branch lengths are printed out with sufficient precision\n        for them to be recovered exactly in double precision (although note\n        that this does not necessarily mean that we can precisely recover the\n        corresponding node times, since branch lengths are obtained by\n        subtraction). If all times on the tree sequence are discrete, then\n        branch lengths are printed as integers. Otherwise, branch lengths are\n        printed with 17 digits of precision (i.e., ``\"%.17f\"`` in\n        printf-notation).\n\n        The precision for branch lengths can be specified using the ``precision``\n        argument. Branch lengths can be omitted entirely by setting\n        ``include_branch_lengths=False``.\n\n        If the ``root`` argument is specified, we return the newick encoding of\n        the specified subtree, otherwise the full tree is returned. If the tree\n        has :ref:`multiple roots <sec_data_model_tree_roots>` and a root node\n        is not explicitly specified, we raise a ``ValueError``. This is because\n        most libraries and downstream software consider a newick string that\n        contains multiple disconnected subtrees an error, and it is therefore\n        best to consider how such topologies should be interchanged on a\n        case-by-base basis. A list of the newick strings for each root can be\n        obtained by ``[tree.as_newick(root=root) for root in tree.roots]``.\n\n        :param int precision: The numerical precision with which branch lengths are\n            printed. If not specified or None default to 0 if the tree sequence\n            contains only integer node times, or 17 otherwise.\n        :param int root: If specified, return the tree rooted at this node.\n        :param dict node_labels: If specified, show custom labels for the nodes\n            that are present in the map. Any nodes not specified in the map will\n            not have a node label.\n        :param include_branch_lengths: If True (default), output branch lengths in the\n            Newick string. If False, only output the topology, without branch lengths.\n        :return: A newick representation of this tree.\n        :rtype: str\n        \"\"\"\n        if root is None:\n            if not self.has_single_root:\n                raise ValueError(\n                    \"Cannot get newick unless a tree has a single root. Try \"\n                    \"[t.as_newick(root) for root in t.roots] to get a list of \"\n                    \"newick trees, one for each root.\"\n                )\n            root = self.root\n\n        if precision is None:\n            # 17 decimal digits provides the full precision of an IEEE double,\n            # as defined by DBL_DECIMAL_DIG macro. If we have discrete time\n            # then write out integer branch lengths.\n            precision = 0 if self.tree_sequence.discrete_time else 17\n        include_branch_lengths = (\n            True if include_branch_lengths is None else include_branch_lengths\n        )\n        # Can we run this through the fast path?\n        if include_branch_lengths and node_labels in [LEGACY_MS_LABELS, None]:\n            # Note the LEGACY_MS_LABELS code path is not part of the documented\n            # interface and should not be depended on by client code.\n            return self._as_newick_fast(\n                root=root,\n                precision=precision,\n                legacy_ms_labels=node_labels == LEGACY_MS_LABELS,\n            )\n\n        # No, we have to use the slower Python code.\n        if node_labels is None:\n            node_labels = {u: f\"n{u}\" for u in self.tree_sequence.samples()}\n        elif node_labels == LEGACY_MS_LABELS:\n            # NOTE in the ms format it's the *leaf* nodes we label not\n            # necessarily the samples. We keep this behaviour to avoid\n            # breaking legacy code that may depend on it.\n            node_labels = {u: f\"{u + 1}\" for u in self.leaves()}\n        return text_formats.build_newick(\n            self,\n            root=root,\n            precision=precision,\n            node_labels=node_labels,\n            include_branch_lengths=include_branch_lengths,\n        )\n\n    def newick(\n        self,\n        precision=14,\n        *,\n        root=None,\n        node_labels=None,\n        include_branch_lengths=True,\n    ):\n        \"\"\"\n        .. warning:: This method is deprecated and may be removed in future\n            versions of tskit. Please use the :meth:`.as_newick` method\n            in new code.\n\n        This method is a deprecated version of the :meth:`.as_newick` method.\n        Functionality is equivalent, except for the default node labels.\n\n        By default, *leaf* nodes are labelled with their numerical ID + 1,\n        and internal nodes are not labelled. This default strategy was originally\n        used to mimic the output of the ``ms`` simulator. However, the choice\n        of labelling leaf nodes rather than samples is problematic, and this\n        behaviour is only retained to avoid breaking existing code which may\n        rely on it.\n\n        Other parameters behave as documented in the :meth:`.as_newick` method.\n\n        :param int precision: The numerical precision with which branch lengths are\n            printed. Defaults to 14.\n        :param int root: If specified, return the tree rooted at this node.\n        :param dict node_labels: If specified, show custom labels for the nodes\n            that are present in the map. Any nodes not specified in the map will\n            not have a node label.\n        :param include_branch_lengths: If True (default), output branch lengths in the\n            Newick string. If False, only output the topology, without branch lengths.\n        :return: A newick representation of this tree.\n        :rtype: str\n        \"\"\"\n        node_labels = LEGACY_MS_LABELS if node_labels is None else node_labels\n        return self.as_newick(\n            root=root,\n            precision=precision,\n            node_labels=node_labels,\n            include_branch_lengths=include_branch_lengths,\n        )\n\n    def as_dict_of_dicts(self):\n        \"\"\"\n        Convert tree to dict of dicts for conversion to a\n        `networkx graph <https://networkx.github.io/documentation/stable/\n        reference/classes/digraph.html>`_.\n\n        For example::\n\n            import networkx as nx\n            nx.DiGraph(tree.as_dict_of_dicts())\n            # undirected graphs work as well\n            nx.Graph(tree.as_dict_of_dicts())\n\n        :return: Dictionary of dictionaries of dictionaries where the first key\n            is the source, the second key is the target of an edge, and the\n            third key is an edge annotation. At this point the only annotation\n            is \"branch_length\", the length of the branch (in units of time).\n        \"\"\"\n        dod = {}\n        for parent in self.nodes():\n            dod[parent] = {}\n            for child in self.children(parent):\n                dod[parent][child] = {\"branch_length\": self.branch_length(child)}\n        return dod\n\n    @property\n    def parent_dict(self):\n        return self.get_parent_dict()\n\n    def get_parent_dict(self):\n        pi = {\n            u: self.parent(u)\n            for u in range(self.tree_sequence.num_nodes)\n            if self.parent(u) != NULL\n        }\n        return pi\n\n    def __str__(self):\n        \"\"\"\n        Return a plain text summary of a tree in a tree sequence\n        \"\"\"\n        tree_rows = [\n            [\"Index\", f\"{util.format_number(self.index, sep=',')}\"],\n            [\n                \"Interval\",\n                f\"{util.format_number(self.interval.left, sep=',')}-\"\n                f\"{util.format_number(self.interval.right, sep=',')}\"\n                f\"({util.format_number(self.span, sep=',')})\",\n            ],\n            [\"Roots\", f\"{util.format_number(self.num_roots, sep=',')}\"],\n            [\"Nodes\", f\"{util.format_number(len(self.preorder()), sep=',')}\"],\n            [\"Sites\", f\"{util.format_number(self.num_sites, sep=',')}\"],\n            [\"Mutations\", f\"{util.format_number(self.num_mutations, sep=',')}\"],\n            [\n                \"Total Branch Length\",\n                f\"{util.format_number(self.total_branch_length, sep=',')}\",\n            ],\n        ]\n        return util.unicode_table(tree_rows, title=\"Tree\")\n\n    def _repr_html_(self):\n        \"\"\"\n        Return an html summary of a tree in a tree sequence. Called by jupyter\n        notebooks to render trees\n        \"\"\"\n        return util.tree_html(self)\n\n    def map_mutations(self, genotypes, alleles, ancestral_state=None):\n        \"\"\"\n        Given observations for the samples in this tree described by the specified\n        set of genotypes and alleles, return a parsimonious set of state transitions\n        explaining these observations. The genotypes array is interpreted as indexes\n        into the alleles list in the same manner as described in the\n        :meth:`TreeSequence.variants` method. Thus, if sample ``j`` carries the\n        allele at index ``k``, then we have ``genotypes[j] = k``.\n        Missing observations can be specified for a sample using the value\n        ``tskit.MISSING_DATA`` (-1), in which case the state at this sample does not\n        influence the ancestral state or the position of mutations returned. At least\n        one non-missing observation must be provided. A maximum of 64 alleles are\n        supported.\n\n        The current implementation uses the Hartigan parsimony algorithm to determine\n        the minimum number of state transitions required to explain the data. In this\n        model, transitions between any of the non-missing states is equally likely.\n\n        The returned values correspond directly to the data model for describing\n        variation at sites using mutations. See the :ref:`sec_site_table_definition`\n        and :ref:`sec_mutation_table_definition` definitions for details and background.\n\n        The state reconstruction is returned as two-tuple, ``(ancestral_state,\n        mutations)``, where ``ancestral_state`` is the allele assigned to the\n        tree root(s) and ``mutations`` is a list of :class:`Mutation` objects,\n        ordered as :ref:`required in a mutation table<sec_mutation_requirements>`.\n        For each mutation, ``derived_state`` is the new state after this mutation and\n        ``node`` is the tree node immediately beneath the mutation (if there are unary\n        nodes between two branch points, hence multiple nodes above which the\n        mutation could be parsimoniously placed, the oldest node is used). The\n        ``parent`` property contains the index in the returned list of the previous\n        mutation on the path to root, or ``tskit.NULL``\n        if there are no previous mutations (see the :ref:`sec_mutation_table_definition`\n        for more information on the concept of mutation parents). All other attributes\n        of the :class:`Mutation` object are undefined and should not be used.\n\n        .. note::\n            Sample states observed as missing in the input ``genotypes`` need\n            not correspond to samples whose nodes are actually \"missing\" (i.e.,\n            :ref:`isolated<sec_data_model_missing_data>`) in the tree. In this\n            case, mapping the mutations returned by this method onto the tree\n            will result in these missing observations being imputed to the\n            most parsimonious state.\n\n        Because the ``parent`` in the returned list of mutations refers to the index\n        in that list, if you are adding mutations to an existing tree sequence, you\n        will need to maintain a map of list IDs to the newly added mutations, for\n        instance::\n\n            last_tree = ts.last()\n            anc_state, parsimonious_muts = last_tree.map_mutations([0, 1, 0], (\"A\", \"T\"))\n            # Edit the tree sequence, see the \"Tables and Editing\" tutorial\n            tables = ts.dump_tables()\n            # add a new site at the end of ts, assumes there isn't one there already\n            site_id = tables.sites.add_row(ts.sequence_length - 1, anc_state)\n\n            mut_id_map = {tskit.NULL: tskit.NULL}  # don't change if parent id is -1\n            for list_id, mutation in enumerate(parsimonious_muts):\n                mut_id_map[list_id] = tables.mutations.append(\n                    mutation.replace(site=site_id, parent=mut_id_map[mutation.parent]))\n            tables.sort()  # Redundant here, but needed if the site is not the last one\n            new_ts = tables.tree_sequence()\n\n        See the :ref:`tutorials:sec_analysing_trees_parsimony` section in the tutorial\n        for further examples of how to use this method.\n\n        :param array_like genotypes: The input observations for the samples in this tree.\n        :param tuple(str) alleles: The alleles for the specified ``genotypes``. Each\n            positive value in the ``genotypes`` array is treated as an index into this\n            list of alleles.\n        :param ancestral_state: A fixed ancestral state, specified either as a\n            non-negative integer less than the number of alleles, or a string which\n            must be one of the ``alleles`` provided above. If ``None`` (default) then\n            an ancestral state is chosen arbitrarily from among those that provide\n            the most parsimonious placement of mutations. Note that if the ancestral\n            state is specified, the placement of mutations may not be as parsimonious\n            as that which could be achieved by leaving the ancestral state unspecified;\n            additionally it may lead to mutations being placed above the root node(s) of\n            the tree (for example if all the samples have a genotype of 1 but the\n            ancestral state is fixed to be 0).\n        :type ancestral_state: Union[int, str]\n        :return: The inferred ancestral state and list of mutations on this tree\n            that encode the specified observations.\n        :rtype: (str, list(tskit.Mutation))\n        \"\"\"\n        genotypes = util.safe_np_int_cast(genotypes, np.int8)\n        max_alleles = np.max(genotypes)\n        if ancestral_state is not None:\n            if isinstance(ancestral_state, str):\n                # Will raise a ValueError if not in the list\n                ancestral_state = alleles.index(ancestral_state)\n            if ancestral_state < 0 or ancestral_state >= len(alleles):\n                raise ValueError(\"ancestral_state not between 0 and (num_alleles-1)\")\n            max_alleles = max(ancestral_state, max_alleles)\n        if max_alleles >= 64:\n            raise ValueError(\"A maximum of 64 states is supported\")\n        ancestral_state, transitions = self._ll_tree.map_mutations(\n            genotypes, ancestral_state\n        )\n        # Translate back into string alleles\n        ancestral_state = alleles[ancestral_state]\n        mutations = [\n            Mutation(\n                node=node,\n                derived_state=alleles[derived_state],\n                parent=parent,\n                metadata=self.tree_sequence.table_metadata_schemas.mutation.empty_value,\n            )\n            for node, parent, derived_state in transitions\n        ]\n        return ancestral_state, mutations\n\n    def kc_distance(self, other, lambda_=0.0):\n        \"\"\"\n        Returns the Kendall-Colijn distance between the specified pair of trees.\n        The ``lambda_`` parameter  determines the relative weight of topology\n        vs branch lengths in calculating the distance. If ``lambda_`` is 0\n        (the default) we only consider topology, and if it is 1 we only\n        consider branch lengths. See `Kendall & Colijn (2016)\n        <https://academic.oup.com/mbe/article/33/10/2735/2925548>`_ for details.\n\n        The trees we are comparing to must have identical lists of sample\n        nodes (i.e., the same IDs in the same order). The metric operates on\n        samples, not leaves, so internal samples are treated identically to\n        sample tips. Subtrees with no samples do not contribute to the metric.\n\n        :param Tree other: The other tree to compare to.\n        :param float lambda_: The KC metric lambda parameter determining the\n            relative weight of topology and branch length.\n        :return: The computed KC distance between this tree and other.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree.get_kc_distance(other._ll_tree, lambda_)\n\n    def _get_sample_sets(self):\n        ret = {}\n        for u in self.nodes(order=\"postorder\"):\n            u_sample_set = set()\n            if self.is_sample(u):\n                u_sample_set.add(u)\n            for v in self.children(u):\n                u_sample_set |= ret[v]\n            ret[u] = frozenset(u_sample_set)\n        return ret\n\n    def rf_distance(self, other):\n        \"\"\"\n        Returns the (unweighted) Robinson-Foulds distance between the specified pair\n        of trees, where corresponding samples between the two trees are identified by\n        node ID. The Robinson-Foulds distance (also known as the symmetric difference)\n        is defined as the number of bipartitions that are present in one tree but\n        not the other (see\n        `Robinson & Foulds (1981) <https://doi.org/10.1016/0025-5564(81)90043-2>`_).\n        This method returns the unnormalised RF distance: if the\n        trees are strictly bifurcating, i.e. binary, the value can be\n        normalised by dividing by the maximum, which is $2n-4$ for two rooted\n        trees of $n$ samples (however, if the trees contain polytomies, the maximum\n        RF distance is less easily defined).\n\n        .. note::\n            The RF distance can be sensitive to small changes in topology: in some\n            cases, changing the position of a single leaf can result in the maximum\n            RF distance. Therefore even if adjacent trees in a tree sequence differ\n            by a single subtree-prune-and-regraft operation, the RF distance\n            between them can be large.\n\n        :param Tree other: The other tree to compare to. Trees are treated as rooted.\n        :return: The unweighted Robinson-Foulds distance between this tree and ``other``.\n        :rtype: int\n        :raises ValueError: If either tree has multiple roots, or the trees have\n            different sample nodes.\n        \"\"\"\n        if self.num_roots != 1 or other.num_roots != 1:\n            raise ValueError(\"Trees must have a single root\")\n\n        s1 = set(self._get_sample_sets().values())\n        s2 = set(other._get_sample_sets().values())\n\n        return len(s1.symmetric_difference(s2))\n\n    def path_length(self, u, v):\n        \"\"\"\n        Returns the number of edges on the path in this tree between the two nodes.\n        If the two nodes have a most recent common ancestor, then this is defined as\n        ``tree.depth(u) + tree.depth(v) - 2 * tree.depth(tree.mrca(u, v))``. If the nodes\n        do not have an MRCA (i.e., they are in disconnected subtrees) the path length\n        is infinity.\n\n        .. note:: This counts the number of \"hops\" between two nodes. To find the branch\n            length distance between them, in units of time (i.e. the sum of edge lengths\n            that separate two nodes) use the :meth:`.distance_between` method instead.\n\n        .. seealso:: See also the :meth:`.depth` method\n\n        :param int u: The first node for path length computation.\n        :param int v: The second node for path length computation.\n        :return: The number of edges between the two nodes.\n        :rtype: int\n        \"\"\"\n        mrca = self.mrca(u, v)\n        if mrca == -1:\n            return math.inf\n        return self.depth(u) + self.depth(v) - 2 * self.depth(mrca)\n\n    def distance_between(self, u, v):\n        \"\"\"\n        Returns the total distance between two nodes in the tree, expressed as\n        the sum of \"branch lengths\" from both nodes to their most recent common ancestor.\n\n        :param int u: The first node for path length computation.\n        :param int v: The second node for path length computation.\n        :return: The distance between the two nodes, the sum of \"branch lengths\" .\n        :rtype: float\n        \"\"\"\n        tmrca = self.tmrca(u, v)\n        return tmrca - self.time(u) + tmrca - self.time(v)\n\n    def b1_index(self):\n        \"\"\"\n        Returns the\n        `B1 balance index <https://treebalance.wordpress.com/b₁-index/>`_\n        for this tree. This is defined as the inverse of the sum of all\n        longest paths to leaves for each node besides roots.\n\n        .. seealso:: See `Shao and Sokal (1990)\n            <https://www.jstor.org/stable/2992186>`_ for details.\n\n        :return: The B1 balance index.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree.get_b1_index()\n\n    def b2_index(self, base=10):\n        \"\"\"\n        Returns the\n        `B2 balance index <https://treebalance.wordpress.com/b₂-index/>`_\n        this tree.\n        This is defined as the Shannon entropy of the probability\n        distribution to reach leaves assuming a random walk\n        from a root. The default base is 10, following Shao and Sokal (1990).\n\n        .. seealso:: See `Shao and Sokal (1990)\n            <https://www.jstor.org/stable/2992186>`_ for details.\n\n        :param int base: The base used for the logarithm in the\n            Shannon entropy computation.\n        :return: The B2 balance index.\n        :rtype: float\n        \"\"\"\n        # Let Python decide if the base is acceptable\n        math.log(10, base)\n        return self._ll_tree.get_b2_index(base)\n\n    def colless_index(self):\n        \"\"\"\n        Returns the\n        `Colless imbalance index <https://treebalance.wordpress.com/colless-index/>`_\n        for this tree. This is defined as the sum of all differences between\n        number of leaves subtended by the left and right child of each node.\n        The Colless index is undefined for non-binary trees and trees with\n        multiple roots. This method will raise a LibraryError if the tree is\n        not singly-rooted and binary.\n\n        .. seealso:: See `Shao and Sokal (1990)\n            <https://www.jstor.org/stable/2992186>`_ for details.\n\n        :return: The Colless imbalance index.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_colless_index()\n\n    def sackin_index(self):\n        \"\"\"\n        Returns the\n        `Sackin imbalance index <https://treebalance.wordpress.com/sackin-index/>`_\n        for this tree. This is defined as the sum of the depths of all leaves\n        in the tree. Equivalent to ``sum(tree.depth(u) for u in\n        tree.leaves())``\n\n        .. seealso:: See `Shao and Sokal (1990)\n            <https://www.jstor.org/stable/2992186>`_ for details.\n\n        :return: The Sackin imbalance index.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_sackin_index()\n\n    def num_lineages(self, t):\n        \"\"\"\n        Returns the number of lineages present in this tree at time ``t``. This\n        is defined as the number of branches in this tree (reachable from the\n        samples) that intersect with ``t``. Thus, ``tree.num_lineages(t)``\n        is equal to 0 for any ``t`` greater than or equal to the time of\n        the root in a singly-rooted tree.\n\n        .. note:: Note that this definition means that if a (non root) node\n            with three children has time ``t``, then it will count as one lineage,\n            not three.\n\n        :param int t: The time to count lineages at.\n        :return: The number of lineages in the tree at time t.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree.get_num_lineages(t)\n\n    def split_polytomies(\n        self,\n        *,\n        epsilon=None,\n        method=None,\n        record_provenance=True,\n        random_seed=None,\n        **kwargs,\n    ):\n        \"\"\"\n        Return a new :class:`.Tree` where extra nodes and edges have been inserted\n        so that any any node ``u`` with greater than 2 children --- a multifurcation\n        or \"polytomy\" --- is resolved into successive bifurcations. New nodes are\n        inserted at times fractionally less than than the time of node ``u``.\n        Times are allocated to different levels of the tree, such that any newly\n        inserted sibling nodes will have the same time.\n\n        By default, the times of the newly generated children of a particular\n        node are the minimum representable distance in floating point arithmetic\n        from their parents (using the `nextafter\n        <https://numpy.org/doc/stable/reference/generated/numpy.nextafter.html>`_\n        function). Thus, the generated branches have the shortest possible nonzero\n        length. A fixed branch length between inserted nodes and their parents\n        can also be specified by using the ``epsilon`` parameter.\n\n        .. note::\n            A tree sequence :ref:`requires<sec_valid_tree_sequence_requirements>` that\n            parents be older than children and that mutations are younger than the\n            parent of the edge on which they lie. If a fixed ``epsilon`` is specifed\n            and is not small enough compared to the distance between a polytomy and\n            its oldest child (or oldest child mutation) these requirements may not\n            be met. In this case an error will be raised.\n\n        If the ``method`` is ``\"random\"`` (currently the only option, and the default\n        when no method is specified), then for a node with :math:`n` children, the\n        :math:`(2n - 3)! / (2^(n - 2) (n - 2!))` possible binary trees with equal\n        probability.\n\n        The returned :class:`.Tree` will have the same genomic span as this tree,\n        and node IDs will be conserved (that is, node ``u`` in this tree will\n        be the same node in the returned tree). The returned tree is derived from a\n        tree sequence that contains only one non-degenerate tree, that is, where\n        edges cover only the interval spanned by this tree.\n\n        :param epsilon: If specified, the fixed branch length between inserted\n            nodes and their parents. If None (the default), the minimal possible\n            nonzero branch length is generated for each node.\n        :param str method: The method used to break polytomies. Currently only \"random\"\n            is supported, which can also be specified by ``method=None``\n            (Default: ``None``).\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        :param int random_seed: The random seed. If this is None, a random seed will\n            be automatically generated. Valid random seeds must be between 1 and\n            :math:`2^32 − 1`.\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example\n            ``tree.split_polytomies(sample_lists=True)`` will\n            return a :class:`Tree` created with ``sample_lists=True``.\n        :return: A new tree with polytomies split into random bifurcations.\n        :rtype: tskit.Tree\n        \"\"\"\n        return combinatorics.split_polytomies(\n            self,\n            epsilon=epsilon,\n            method=method,\n            record_provenance=record_provenance,\n            random_seed=random_seed,\n            **kwargs,\n        )\n\n    @staticmethod\n    def generate_star(\n        num_leaves, *, span=1, branch_length=1, record_provenance=True, **kwargs\n    ):\n        \"\"\"\n        Generate a :class:`Tree` whose leaf nodes all have the same parent (i.e.,\n        a \"star\" tree). The leaf nodes are all at time 0 and are marked as sample nodes.\n\n        The tree produced by this method is identical to\n        ``tskit.Tree.unrank(n, (0, 0))``, but generated more efficiently for large ``n``.\n\n        :param int num_leaves: The number of leaf nodes in the returned tree (must be\n            2 or greater).\n        :param float span: The span of the tree, and therefore the\n            :attr:`~TreeSequence.sequence_length` of the :attr:`.tree_sequence`\n            property of the returned :class:`Tree`.\n        :param float branch_length: The length of every branch in the tree (equivalent\n            to the time of the root node).\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example\n            ``tskit.Tree.generate_star(sample_lists=True)`` will\n            return a :class:`Tree` created with ``sample_lists=True``.\n        :return: A star-shaped tree. Its corresponding :class:`TreeSequence` is available\n            via the :attr:`.tree_sequence` attribute.\n        :rtype: Tree\n        \"\"\"\n        return combinatorics.generate_star(\n            num_leaves,\n            span=span,\n            branch_length=branch_length,\n            record_provenance=record_provenance,\n            **kwargs,\n        )\n\n    @staticmethod\n    def generate_balanced(\n        num_leaves,\n        *,\n        arity=2,\n        span=1,\n        branch_length=1,\n        record_provenance=True,\n        **kwargs,\n    ):\n        \"\"\"\n        Generate a :class:`Tree` with the specified number of leaves that is maximally\n        balanced. By default, the tree returned is binary, such that for each\n        node that subtends :math:`n` leaves, the left child will subtend\n        :math:`\\\\lfloor{n / 2}\\\\rfloor` leaves and the right child the\n        remainder. Balanced trees with higher arity can also generated using the\n        ``arity`` parameter, where the leaves subtending a node are distributed\n        among its children analogously.\n\n        In the returned tree, the leaf nodes are all at time 0, marked as samples,\n        and labelled 0 to n from left-to-right. Internal node IDs are assigned\n        sequentially from n in a postorder traversal, and the time of an internal\n        node is the maximum time of its children plus the specified ``branch_length``.\n\n        :param int num_leaves: The number of leaf nodes in the returned tree (must be\n            be 2 or greater).\n        :param int arity: The maximum number of children a node can have in the returned\n            tree.\n        :param float span: The span of the tree, and therefore the\n            :attr:`~TreeSequence.sequence_length` of the :attr:`.tree_sequence`\n            property of the returned :class:`Tree`.\n        :param float branch_length: The minimum length of a branch in the tree (see\n            above for details on how internal node times are assigned).\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example\n            ``tskit.Tree.generate_balanced(sample_lists=True)`` will\n            return a :class:`Tree` created with ``sample_lists=True``.\n        :return: A balanced tree. Its corresponding :class:`TreeSequence` is available\n            via the :attr:`.tree_sequence` attribute.\n        :rtype: Tree\n        \"\"\"\n        return combinatorics.generate_balanced(\n            num_leaves,\n            arity=arity,\n            span=span,\n            branch_length=branch_length,\n            record_provenance=record_provenance,\n            **kwargs,\n        )\n\n    @staticmethod\n    def generate_comb(\n        num_leaves, *, span=1, branch_length=1, record_provenance=True, **kwargs\n    ):\n        \"\"\"\n        Generate a :class:`Tree` in which all internal nodes have two children\n        and the left child is a leaf. This is a \"comb\", \"ladder\" or \"pectinate\"\n        phylogeny, and also known as a `caterpillar tree\n        <https://en.wikipedia.org/wiki/Caterpillar_tree>`_.\n\n        The leaf nodes are all at time 0, marked as samples,\n        and labelled 0 to n from left-to-right. Internal node IDs are assigned\n        sequentially from n as we ascend the tree, and the time of an internal\n        node is the maximum time of its children plus the specified ``branch_length``.\n\n        :param int num_leaves: The number of leaf nodes in the returned tree (must be\n            2 or greater).\n        :param float span: The span of the tree, and therefore the\n            :attr:`~TreeSequence.sequence_length` of the :attr:`.tree_sequence`\n            property of the returned :class:`Tree`.\n        :param float branch_length: The branch length between each internal node; the\n            root node is therefore placed at time ``branch_length * (num_leaves - 1)``.\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example\n            ``tskit.Tree.generate_comb(sample_lists=True)`` will\n            return a :class:`Tree` created with ``sample_lists=True``.\n        :return: A comb-shaped bifurcating tree. Its corresponding :class:`TreeSequence`\n            is available via the :attr:`.tree_sequence` attribute.\n        :rtype: Tree\n        \"\"\"\n        return combinatorics.generate_comb(\n            num_leaves,\n            span=span,\n            branch_length=branch_length,\n            record_provenance=record_provenance,\n            **kwargs,\n        )\n\n    @staticmethod\n    def generate_random_binary(\n        num_leaves,\n        *,\n        span=1,\n        branch_length=1,\n        random_seed=None,\n        record_provenance=True,\n        **kwargs,\n    ):\n        \"\"\"\n        Generate a random binary :class:`Tree` with :math:`n` = ``num_leaves``\n        leaves with an equal probability of returning any topology and\n        leaf label permutation among the :math:`(2n - 3)! / (2^{n - 2} (n - 2)!)`\n        leaf-labelled binary trees.\n\n        The leaf nodes are marked as samples, labelled 0 to n, and placed at\n        time 0. Internal node IDs are assigned sequentially from n as we ascend\n        the tree, and the time of an internal node is the maximum time of its\n        children plus the specified ``branch_length``.\n\n        .. note::\n            The returned tree has not been created under any explicit model of\n            evolution. In order to simulate such trees, additional software\n            such as `msprime <https://github.com/tskit-dev/msprime>`` is required.\n\n        :param int num_leaves: The number of leaf nodes in the returned tree (must\n            be 2 or greater).\n        :param float span: The span of the tree, and therefore the\n            :attr:`~TreeSequence.sequence_length` of the :attr:`.tree_sequence`\n            property of the returned :class:`Tree`.\n        :param float branch_length: The minimum time between parent and child nodes.\n        :param int random_seed: The random seed. If this is None, a random seed will\n            be automatically generated. Valid random seeds must be between 1 and\n            :math:`2^32 − 1`.\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example\n            ``tskit.Tree.generate_comb(sample_lists=True)`` will\n            return a :class:`Tree` created with ``sample_lists=True``.\n        :return: A random binary tree. Its corresponding :class:`TreeSequence` is\n            available via the :attr:`.tree_sequence` attribute.\n        :rtype: Tree\n        \"\"\"\n        return combinatorics.generate_random_binary(\n            num_leaves,\n            span=span,\n            branch_length=branch_length,\n            random_seed=random_seed,\n            record_provenance=record_provenance,\n            **kwargs,\n        )\n\n\ndef load(file, *, skip_tables=False, skip_reference_sequence=False):\n    \"\"\"\n    Return a :class:`TreeSequence` instance loaded from the specified file object or\n    path. The file must be in the\n    :ref:`tree sequence file format <sec_tree_sequence_file_format>`\n    produced by the :meth:`TreeSequence.dump` method.\n\n    .. warning:: With any of the ``skip_tables`` or ``skip_reference_sequence``\n        options set, it is not possible to load data from a non-seekable stream\n        (e.g. a socket or STDIN) of multiple tree sequences using consecutive\n        calls to :meth:`tskit.load`.\n\n    :param str file: The file object or path of the ``.trees`` file containing the\n        tree sequence we wish to load.\n    :param bool skip_tables: If True, no tables are read from the ``.trees``\n        file and only the top-level information is populated in the tree\n        sequence object.\n    :param bool skip_reference_sequence: If True, the tree sequence is read\n        without loading its reference sequence.\n    :return: The tree sequence object containing the information\n        stored in the specified file path.\n    :rtype: :class:`tskit.TreeSequence`\n    \"\"\"\n    return TreeSequence.load(\n        file, skip_tables=skip_tables, skip_reference_sequence=skip_reference_sequence\n    )\n\n\ndef parse_individuals(\n    source, strict=True, encoding=\"utf8\", base64_metadata=True, table=None\n):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of an individual table and returns the corresponding\n    :class:`IndividualTable` instance. See the :ref:`individual text format\n    <sec_individual_text_format>` section for the details of the required\n    format and the :ref:`individual table definition\n    <sec_individual_table_definition>` section for the required properties of\n    the contents.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict``\n    parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :param IndividualTable table: If specified write into this table. If not,\n        create a new :class:`IndividualTable` instance.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.IndividualTable()\n    # Read the header and find the indexes of the required fields.\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    flags_index = header.index(\"flags\")\n    location_index = None\n    parents_index = None\n    metadata_index = None\n    try:\n        location_index = header.index(\"location\")\n    except ValueError:\n        pass\n    try:\n        parents_index = header.index(\"parents\")\n    except ValueError:\n        pass\n    try:\n        metadata_index = header.index(\"metadata\")\n    except ValueError:\n        pass\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 1:\n            flags = int(tokens[flags_index])\n            location = ()\n            if location_index is not None:\n                location_string = tokens[location_index]\n                if len(location_string) > 0:\n                    location = tuple(map(float, location_string.split(\",\")))\n            parents = ()\n            if parents_index is not None:\n                parents_string = tokens[parents_index]\n                if len(parents_string) > 0:\n                    parents = tuple(map(int, parents_string.split(\",\")))\n            metadata = b\"\"\n            if metadata_index is not None and metadata_index < len(tokens):\n                metadata = tokens[metadata_index].encode(encoding)\n                if base64_metadata:\n                    metadata = base64.b64decode(metadata)\n            table.add_row(\n                flags=flags, location=location, parents=parents, metadata=metadata\n            )\n    return table\n\n\ndef parse_nodes(source, strict=True, encoding=\"utf8\", base64_metadata=True, table=None):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of a node table and returns the corresponding :class:`NodeTable`\n    instance. See the :ref:`node text format <sec_node_text_format>` section\n    for the details of the required format and the\n    :ref:`node table definition <sec_node_table_definition>` section for the\n    required properties of the contents.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict``\n    parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :param NodeTable table: If specified write into this table. If not,\n        create a new :class:`NodeTable` instance.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.NodeTable()\n    # Read the header and find the indexes of the required fields.\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    is_sample_index = header.index(\"is_sample\")\n    time_index = header.index(\"time\")\n    population_index = None\n    individual_index = None\n    metadata_index = None\n    try:\n        population_index = header.index(\"population\")\n    except ValueError:\n        pass\n    try:\n        individual_index = header.index(\"individual\")\n    except ValueError:\n        pass\n    try:\n        metadata_index = header.index(\"metadata\")\n    except ValueError:\n        pass\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 2:\n            is_sample = int(tokens[is_sample_index])\n            time = float(tokens[time_index])\n            flags = 0\n            if is_sample != 0:\n                flags |= NODE_IS_SAMPLE\n            population = NULL\n            if population_index is not None:\n                population = int(tokens[population_index])\n            individual = NULL\n            if individual_index is not None:\n                individual = int(tokens[individual_index])\n            metadata = b\"\"\n            if metadata_index is not None and metadata_index < len(tokens):\n                metadata = tokens[metadata_index].encode(encoding)\n                if base64_metadata:\n                    metadata = base64.b64decode(metadata)\n            table.add_row(\n                flags=flags,\n                time=time,\n                population=population,\n                individual=individual,\n                metadata=metadata,\n            )\n    return table\n\n\ndef parse_edges(source, strict=True, table=None, encoding=\"utf8\", base64_metadata=True):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of a edge table and returns the corresponding :class:`EdgeTable`\n    instance. See the :ref:`edge text format <sec_edge_text_format>` section\n    for the details of the required format and the\n    :ref:`edge table definition <sec_edge_table_definition>` section for the\n    required properties of the contents.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict`` parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param EdgeTable table: If specified, write the edges into this table. If\n        not, create a new :class:`EdgeTable` instance and return.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.EdgeTable()\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    left_index = header.index(\"left\")\n    right_index = header.index(\"right\")\n    parent_index = header.index(\"parent\")\n    children_index = header.index(\"child\")\n    metadata_index = None\n    try:\n        metadata_index = header.index(\"metadata\")\n    except ValueError:\n        pass\n    default_metadata = b\"\"\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 4:\n            left = float(tokens[left_index])\n            right = float(tokens[right_index])\n            parent = int(tokens[parent_index])\n            children = tuple(map(int, tokens[children_index].split(\",\")))\n            metadata = default_metadata\n            if metadata_index is not None and metadata_index < len(tokens):\n                metadata = tokens[metadata_index].encode(encoding)\n                if base64_metadata:\n                    metadata = base64.b64decode(metadata)\n            for child in children:\n                table.add_row(\n                    left=left,\n                    right=right,\n                    parent=parent,\n                    child=child,\n                    metadata=metadata,\n                )\n    return table\n\n\ndef parse_sites(source, strict=True, encoding=\"utf8\", base64_metadata=True, table=None):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of a site table and returns the corresponding :class:`SiteTable`\n    instance. See the :ref:`site text format <sec_site_text_format>` section\n    for the details of the required format and the\n    :ref:`site table definition <sec_site_table_definition>` section for the\n    required properties of the contents.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict``\n    parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :param SiteTable table: If specified write site into this table. If not,\n        create a new :class:`SiteTable` instance.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.SiteTable()\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    position_index = header.index(\"position\")\n    ancestral_state_index = header.index(\"ancestral_state\")\n    metadata_index = None\n    try:\n        metadata_index = header.index(\"metadata\")\n    except ValueError:\n        pass\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 2:\n            position = float(tokens[position_index])\n            ancestral_state = tokens[ancestral_state_index]\n            metadata = b\"\"\n            if metadata_index is not None and metadata_index < len(tokens):\n                metadata = tokens[metadata_index].encode(encoding)\n                if base64_metadata:\n                    metadata = base64.b64decode(metadata)\n            table.add_row(\n                position=position, ancestral_state=ancestral_state, metadata=metadata\n            )\n    return table\n\n\ndef parse_mutations(\n    source, strict=True, encoding=\"utf8\", base64_metadata=True, table=None\n):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of a mutation table and returns the corresponding :class:`MutationTable`\n    instance. See the :ref:`mutation text format <sec_mutation_text_format>` section\n    for the details of the required format and the\n    :ref:`mutation table definition <sec_mutation_table_definition>` section for the\n    required properties of the contents. Note that if the ``time`` column is missing its\n    entries are filled with ``UNKNOWN_TIME``.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict``\n    parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :param MutationTable table: If specified, write mutations into this table.\n        If not, create a new :class:`MutationTable` instance.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.MutationTable()\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    site_index = header.index(\"site\")\n    node_index = header.index(\"node\")\n    try:\n        time_index = header.index(\"time\")\n    except ValueError:\n        time_index = None\n    derived_state_index = header.index(\"derived_state\")\n    parent_index = None\n    parent = NULL\n    try:\n        parent_index = header.index(\"parent\")\n    except ValueError:\n        pass\n    metadata_index = None\n    try:\n        metadata_index = header.index(\"metadata\")\n    except ValueError:\n        pass\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 3:\n            site = int(tokens[site_index])\n            node = int(tokens[node_index])\n            if time_index is None or tokens[time_index] == tskit.TIME_UNITS_UNKNOWN:\n                time = UNKNOWN_TIME\n            else:\n                time = float(tokens[time_index])\n            derived_state = tokens[derived_state_index]\n            if parent_index is not None:\n                parent = int(tokens[parent_index])\n            metadata = b\"\"\n            if metadata_index is not None and metadata_index < len(tokens):\n                metadata = tokens[metadata_index].encode(encoding)\n                if base64_metadata:\n                    metadata = base64.b64decode(metadata)\n            table.add_row(\n                site=site,\n                node=node,\n                time=time,\n                derived_state=derived_state,\n                parent=parent,\n                metadata=metadata,\n            )\n    return table\n\n\ndef parse_populations(\n    source, strict=True, encoding=\"utf8\", base64_metadata=True, table=None\n):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of a population table and returns the corresponding\n    :class:`PopulationTable` instance. See the :ref:`population text format\n    <sec_population_text_format>` section for the details of the required\n    format and the :ref:`population table definition\n    <sec_population_table_definition>` section for the required properties of\n    the contents.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict``\n    parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :param PopulationTable table: If specified write into this table. If not,\n        create a new :class:`PopulationTable` instance.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.PopulationTable()\n    # Read the header and find the indexes of the required fields.\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    metadata_index = header.index(\"metadata\")\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 1:\n            metadata = tokens[metadata_index].encode(encoding)\n            if base64_metadata:\n                metadata = base64.b64decode(metadata)\n            table.add_row(metadata=metadata)\n    return table\n\n\ndef parse_migrations(\n    source, strict=True, encoding=\"utf8\", base64_metadata=True, table=None\n):\n    \"\"\"\n    Parse the specified file-like object containing a whitespace delimited\n    description of a migration table and returns the corresponding\n    :class:`MigrationTable` instance.\n\n    See the :ref:`migration text format <sec_migration_text_format>` section\n    for the details of the required format and the\n    :ref:`migration table definition <sec_migration_table_definition>` section\n    for the required properties of the contents. Note that if the ``time`` column\n    is missing its entries are filled with :data:`UNKNOWN_TIME`.\n\n    See :func:`tskit.load_text` for a detailed explanation of the ``strict``\n    parameter.\n\n    :param io.TextIOBase source: The file-like object containing the text.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :param MigrationTable table: If specified, write migrations into this table.\n        If not, create a new :class:`MigrationTable` instance.\n    \"\"\"\n    sep = None\n    if strict:\n        sep = \"\\t\"\n    if table is None:\n        table = tables.MigrationTable()\n    header = source.readline().rstrip(\"\\n\").split(sep)\n    left_index = header.index(\"left\")\n    right_index = header.index(\"right\")\n    node_index = header.index(\"node\")\n    source_index = header.index(\"source\")\n    dest_index = header.index(\"dest\")\n    time_index = header.index(\"time\")\n    metadata_index = None\n    try:\n        metadata_index = header.index(\"metadata\")\n    except ValueError:\n        pass\n    for line in source:\n        tokens = line.rstrip(\"\\n\").split(sep)\n        if len(tokens) >= 6:\n            left = float(tokens[left_index])\n            right = float(tokens[right_index])\n            node = int(tokens[node_index])\n            source = int(tokens[source_index])\n            dest = int(tokens[dest_index])\n            time = float(tokens[time_index])\n            metadata = b\"\"\n            if metadata_index is not None and metadata_index < len(tokens):\n                metadata = tokens[metadata_index].encode(encoding)\n                if base64_metadata:\n                    metadata = base64.b64decode(metadata)\n            table.add_row(\n                left=left,\n                right=right,\n                node=node,\n                source=source,\n                dest=dest,\n                time=time,\n                metadata=metadata,\n            )\n    return table\n\n\ndef load_text(\n    nodes,\n    edges,\n    sites=None,\n    mutations=None,\n    individuals=None,\n    populations=None,\n    migrations=None,\n    sequence_length=0,\n    strict=True,\n    encoding=\"utf8\",\n    base64_metadata=True,\n):\n    \"\"\"\n    Return a :class:`TreeSequence` instance parsed from tabulated text data\n    contained in the specified file-like objects. The format\n    for these files is documented in the :ref:`sec_text_file_format` section,\n    and is produced by the :meth:`TreeSequence.dump_text` method. Further\n    properties required for an input tree sequence are described in the\n    :ref:`sec_valid_tree_sequence_requirements` section. This method is intended as a\n    convenient interface for importing external data into tskit; the binary\n    file format using by :meth:`tskit.load` is many times more efficient than\n    this text format.\n\n    The ``nodes`` and ``edges`` parameters are mandatory and must be file-like\n    objects containing text with whitespace delimited columns,  parsable by\n    :func:`parse_nodes` and :func:`parse_edges`, respectively. ``sites``,\n    ``individuals``, ``populations``, ``mutations``, and ``migrations`` are optional,\n    and must be parsable by :func:`parse_sites`, :func:`parse_individuals`,\n    :func:`parse_populations`, :func:`parse_mutations`, and :func:`parse_migrations`,\n    respectively. For convenience, if the node table refers to populations,\n    but the ``populations`` parameter is not provided, a minimal set of rows are\n    added to the population table, so that a valid tree sequence can be returned.\n\n    The ``sequence_length`` parameter determines the\n    :attr:`TreeSequence.sequence_length` of the returned tree sequence. If it\n    is 0 or not specified, the value is taken to be the maximum right\n    coordinate of the input edges. This parameter is useful in degenerate\n    situations (such as when there are zero edges), but can usually be ignored.\n\n    The ``strict`` parameter controls the field delimiting algorithm that\n    is used. If ``strict`` is True (the default), we require exactly one\n    tab character separating each field. If ``strict`` is False, a more relaxed\n    whitespace delimiting algorithm is used, such that any run of whitespace\n    is regarded as a field separator. In most situations, ``strict=False``\n    is more convenient, but it can lead to error in certain situations. For\n    example, if a deletion is encoded in the mutation table this will not\n    be parseable when ``strict=False``.\n\n    After parsing the tables, :meth:`TableCollection.sort` is called to ensure that\n    the loaded tables satisfy the tree sequence :ref:`ordering requirements\n    <sec_valid_tree_sequence_requirements>`. Note that this may result in the\n    IDs of various entities changing from their positions in the input file.\n\n    :param io.TextIOBase nodes: The file-like object containing text describing a\n        :class:`NodeTable`.\n    :param io.TextIOBase edges: The file-like object containing text\n        describing an :class:`EdgeTable`.\n    :param io.TextIOBase sites: The file-like object containing text describing a\n        :class:`SiteTable`.\n    :param io.TextIOBase mutations: The file-like object containing text\n        describing a :class:`MutationTable`.\n    :param io.TextIOBase individuals: The file-like object containing text\n        describing a :class:`IndividualTable`.\n    :param io.TextIOBase populations: The file-like object containing text\n        describing a :class:`PopulationTable`.\n    :param io.TextIOBase migrations: The file-like object containing text\n        describing a :class:`MigrationTable`.\n    :param float sequence_length: The sequence length of the returned tree sequence. If\n        not supplied or zero this will be inferred from the set of edges.\n    :param bool strict: If True, require strict tab delimiting (default). If\n        False, a relaxed whitespace splitting algorithm is used.\n    :param str encoding: Encoding used for text representation.\n    :param bool base64_metadata: If True, metadata is encoded using Base64\n        encoding; otherwise, as plain text.\n    :return: The tree sequence object containing the information\n        stored in the specified file paths.\n    :rtype: :class:`tskit.TreeSequence`\n    \"\"\"\n    # We need to parse the edges so we can figure out the sequence length, and\n    # TableCollection.sequence_length is immutable so we need to create a temporary\n    # edge table.\n    edge_table = parse_edges(edges, strict=strict)\n    if sequence_length == 0 and len(edge_table) > 0:\n        sequence_length = edge_table.right.max()\n    tc = tables.TableCollection(sequence_length)\n    tc.edges.set_columns(\n        left=edge_table.left,\n        right=edge_table.right,\n        parent=edge_table.parent,\n        child=edge_table.child,\n    )\n    parse_nodes(\n        nodes,\n        strict=strict,\n        encoding=encoding,\n        base64_metadata=base64_metadata,\n        table=tc.nodes,\n    )\n    if sites is not None:\n        parse_sites(\n            sites,\n            strict=strict,\n            encoding=encoding,\n            base64_metadata=base64_metadata,\n            table=tc.sites,\n        )\n    if mutations is not None:\n        parse_mutations(\n            mutations,\n            strict=strict,\n            encoding=encoding,\n            base64_metadata=base64_metadata,\n            table=tc.mutations,\n        )\n    if individuals is not None:\n        parse_individuals(\n            individuals,\n            strict=strict,\n            encoding=encoding,\n            base64_metadata=base64_metadata,\n            table=tc.individuals,\n        )\n    if populations is None:\n        # As a convenience we add any populations referenced in the node table.\n        if len(tc.nodes) > 0:\n            max_population = tc.nodes.population.max()\n            if max_population != NULL:\n                for _ in range(max_population + 1):\n                    tc.populations.add_row()\n    else:\n        parse_populations(\n            populations,\n            strict=strict,\n            encoding=encoding,\n            base64_metadata=base64_metadata,\n            table=tc.populations,\n        )\n    if migrations is not None:\n        parse_migrations(\n            migrations,\n            strict=strict,\n            encoding=encoding,\n            base64_metadata=base64_metadata,\n            table=tc.migrations,\n        )\n    tc.sort()\n    return tc.tree_sequence()\n\n\nclass TreeIterator:\n    \"\"\"\n    Simple class providing forward and backward iteration over a tree sequence.\n    \"\"\"\n\n    def __init__(self, tree):\n        self.tree = tree\n        self.more_trees = True\n        self.forward = True\n\n    def __iter__(self):\n        return self\n\n    def __reversed__(self):\n        self.forward = False\n        return self\n\n    def __next__(self):\n        if self.forward:\n            self.more_trees = self.more_trees and self.tree.next()\n        else:\n            self.more_trees = self.more_trees and self.tree.prev()\n        if not self.more_trees:\n            raise StopIteration()\n        return self.tree\n\n    def __len__(self):\n        return self.tree.tree_sequence.num_trees\n\n\nclass SimpleContainerSequence:\n    \"\"\"\n    Simple wrapper to allow arrays of SimpleContainers (e.g. edges, nodes) that have a\n    function allowing access by index (e.g. ts.edge(i), ts.node(i)) to be treated as a\n    python sequence, allowing forward and reverse iteration.\n\n    To generate a sequence of items in a different order, the ``order`` parameter allows\n    an array of indexes to be passed in, such as returned from np.argsort or np.lexsort.\n    \"\"\"\n\n    def __init__(self, getter, length, order=None):\n        if order is None:\n            self.getter = getter\n        else:\n            self.getter = lambda index: getter(order[index])\n        self.length = length\n\n    def __len__(self):\n        return self.length\n\n    def __getitem__(self, index):\n        return self.getter(index)\n\n\n@dataclass(frozen=True)\nclass TableMetadataSchemas:\n    \"\"\"\n    Convenience class for returning the schemas of all the tables in a tree sequence.\n    \"\"\"\n\n    node: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the node table.\n    \"\"\"\n\n    edge: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the edge table.\n    \"\"\"\n\n    site: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the site table.\n    \"\"\"\n\n    mutation: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the mutation table.\n    \"\"\"\n\n    migration: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the migration table.\n    \"\"\"\n\n    individual: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the individual table.\n    \"\"\"\n\n    population: metadata_module.MetadataSchema = None\n    \"\"\"\n    The metadata schema of the population table.\n    \"\"\"\n\n\nclass TreeSequence:\n    \"\"\"\n    A single tree sequence, as defined by the :ref:`data model <sec_data_model>`.\n    A TreeSequence instance can be created from a set of\n    :ref:`tables <sec_table_definitions>` using\n    :meth:`TableCollection.tree_sequence`, or loaded from a set of text files\n    using :func:`tskit.load_text`, or loaded from a native binary file using\n    :func:`tskit.load`.\n\n    TreeSequences are immutable. To change the data held in a particular\n    tree sequence, first get the table information as a :class:`TableCollection`\n    instance (using :meth:`.dump_tables`), edit those tables using the\n    :ref:`tables api <sec_tables_api>`, and create a new tree sequence using\n    :meth:`TableCollection.tree_sequence`.\n\n    The :meth:`.trees` method iterates over all trees in a tree sequence, and\n    the :meth:`.variants` method iterates over all sites and their genotypes.\n    \"\"\"\n\n    def __init__(self, ll_tree_sequence):\n        self._ll_tree_sequence = ll_tree_sequence\n        self._immutable_tables = None\n        metadata_schema_strings = self._ll_tree_sequence.get_table_metadata_schemas()\n        metadata_schema_instances = {\n            name: metadata_module.parse_metadata_schema(\n                getattr(metadata_schema_strings, name)\n            )\n            for name in vars(TableMetadataSchemas)\n            if not name.startswith(\"_\")\n        }\n        self._table_metadata_schemas = TableMetadataSchemas(**metadata_schema_instances)\n        self._individuals_time = None\n        self._individuals_population = None\n        self._individuals_location = None\n        self._individuals_nodes = None\n        self._mutations_edge = None\n        self._mutations_inherited_state = None\n        self._sites_ancestral_state = None\n        self._mutations_derived_state = None\n        # NOTE: when we've implemented read-only access via the underlying\n        # tables we can replace these arrays with reference to the read-only\n        # tables here (and remove the low-level boilerplate).\n        llts = self._ll_tree_sequence\n        self._individuals_flags = llts.individuals_flags\n        self._individuals_metadata = llts.individuals_metadata\n        self._nodes_time = llts.nodes_time\n        self._nodes_flags = llts.nodes_flags\n        self._nodes_population = llts.nodes_population\n        self._nodes_individual = llts.nodes_individual\n        self._nodes_metadata = llts.nodes_metadata\n        self._edges_left = llts.edges_left\n        self._edges_right = llts.edges_right\n        self._edges_parent = llts.edges_parent\n        self._edges_child = llts.edges_child\n        self._edges_metadata = llts.edges_metadata\n        self._sites_position = llts.sites_position\n        self._sites_metadata = llts.sites_metadata\n        self._mutations_site = llts.mutations_site\n        self._mutations_node = llts.mutations_node\n        self._mutations_parent = llts.mutations_parent\n        self._mutations_time = llts.mutations_time\n        self._mutations_metadata = llts.mutations_metadata\n        self._migrations_left = llts.migrations_left\n        self._migrations_right = llts.migrations_right\n        self._migrations_node = llts.migrations_node\n        self._migrations_source = llts.migrations_source\n        self._migrations_dest = llts.migrations_dest\n        self._migrations_time = llts.migrations_time\n        self._migrations_metadata = llts.migrations_metadata\n        self._populations_metadata = llts.populations_metadata\n        self._indexes_edge_insertion_order = llts.indexes_edge_insertion_order\n        self._indexes_edge_removal_order = llts.indexes_edge_removal_order\n\n    # Implement the pickle protocol for TreeSequence\n    def __getstate__(self):\n        return self.dump_tables()\n\n    def __setstate__(self, tc):\n        self.__init__(tc.tree_sequence().ll_tree_sequence)\n\n    def __eq__(self, other):\n        return self.tables == other.tables\n\n    def equals(\n        self,\n        other,\n        *,\n        ignore_metadata=False,\n        ignore_ts_metadata=False,\n        ignore_provenance=False,\n        ignore_timestamps=False,\n        ignore_tables=False,\n        ignore_reference_sequence=False,\n    ):\n        \"\"\"\n        Returns True if  `self` and `other` are equal. Uses the underlying table\n        equality, see :meth:`TableCollection.equals` for details and options.\n        \"\"\"\n        return self.tables.equals(\n            other.tables,\n            ignore_metadata=ignore_metadata,\n            ignore_ts_metadata=ignore_ts_metadata,\n            ignore_provenance=ignore_provenance,\n            ignore_timestamps=ignore_timestamps,\n            ignore_tables=ignore_tables,\n            ignore_reference_sequence=ignore_reference_sequence,\n        )\n\n    @property\n    def ll_tree_sequence(self):\n        return self.get_ll_tree_sequence()\n\n    def get_ll_tree_sequence(self):\n        return self._ll_tree_sequence\n\n    def aslist(self, **kwargs):\n        \"\"\"\n        Returns the trees in this tree sequence as a list. Each tree is\n        represented by a different instance of :class:`Tree`. As such, this\n        method is inefficient and may use a large amount of memory, and should\n        not be used when performance is a consideration. The :meth:`.trees`\n        method is the recommended way to efficiently iterate over the trees\n        in a tree sequence.\n\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned trees. For example ``ts.aslist(sample_lists=True)`` will result\n            in a list of :class:`Tree` instances created with ``sample_lists=True``.\n        :return: A list of the trees in this tree sequence.\n        :rtype: list\n        \"\"\"\n        return [tree.copy() for tree in self.trees(**kwargs)]\n\n    @classmethod\n    def load(cls, file_or_path, *, skip_tables=False, skip_reference_sequence=False):\n        file, local_file = util.convert_file_like_to_open_file(file_or_path, \"rb\")\n        try:\n            ts = _tskit.TreeSequence()\n            ts.load(\n                file,\n                skip_tables=skip_tables,\n                skip_reference_sequence=skip_reference_sequence,\n            )\n            return TreeSequence(ts)\n        except tskit.FileFormatError as e:\n            util.raise_known_file_format_errors(file, e)\n        finally:\n            if local_file:\n                file.close()\n\n    @classmethod\n    def load_tables(cls, tables, *, build_indexes=False):\n        ts = _tskit.TreeSequence()\n        ts.load_tables(tables._ll_tables, build_indexes=build_indexes)\n        return TreeSequence(ts)\n\n    def dump(self, file_or_path, zlib_compression=False):\n        \"\"\"\n        Writes the tree sequence to the specified path or file object.\n\n        :param str file_or_path: The file object or path to write the TreeSequence to.\n        :param bool zlib_compression: This parameter is deprecated and ignored.\n        \"\"\"\n        if zlib_compression:\n            # Note: the msprime CLI before version 1.0 uses this option, so we need\n            # to keep it indefinitely.\n            warnings.warn(\n                \"The zlib_compression option is no longer supported and is ignored\",\n                RuntimeWarning,\n                stacklevel=4,\n            )\n        file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n        try:\n            self._ll_tree_sequence.dump(file)\n        finally:\n            if local_file:\n                file.close()\n\n    @property\n    def reference_sequence(self):\n        \"\"\"\n        The :class:`.ReferenceSequence` associated with this :class:`.TreeSequence`\n        if one is defined (see :meth:`.TreeSequence.has_reference_sequence`),\n        or None otherwise.\n        \"\"\"\n        if self.has_reference_sequence():\n            return tables.ReferenceSequence(self._ll_tree_sequence.reference_sequence)\n        return None\n\n    def has_reference_sequence(self):\n        \"\"\"\n        Returns True if this :class:`.TreeSequence` has an associated\n        :ref:`reference sequence<sec_data_model_reference_sequence>`.\n        \"\"\"\n        return bool(self._ll_tree_sequence.has_reference_sequence())\n\n    @property\n    def tables_dict(self):\n        \"\"\"\n        Returns a dictionary mapping names to tables in the\n        underlying :class:`.TableCollection`. Equivalent to calling\n        ``ts.tables.table_name_map``.\n        \"\"\"\n        return self.tables.table_name_map\n\n    @property\n    def tables(self):\n        \"\"\"\n        Returns an immutable view of the tables underlying this tree sequence.\n\n        This view shares the same data as the TreeSequence (zero-copy).\n        Use :meth:`.dump_tables` for a modifiable copy.\n\n        Note that if tskit was built with Numpy 1, this method acts as\n        :meth:`.dump_tables` and returns a mutable TableCollection.\n\n        :return: An immutable view of the TableCollection underlying this tree sequence.\n        \"\"\"\n        if not _tskit.HAS_NUMPY_2:\n            warnings.warn(\n                \"Immutable table views require tskit to be built against NumPy 2.0 or \"\n                \"newer. Falling back to returning a mutable TableCollection.\",\n                UserWarning,\n                stacklevel=2,\n            )\n            return self.dump_tables()\n        if self._immutable_tables is None:\n            self._immutable_tables = tables.ImmutableTableCollection(\n                self._ll_tree_sequence\n            )\n        return self._immutable_tables\n\n    @property\n    def nbytes(self):\n        \"\"\"\n        Returns the total number of bytes required to store the data\n        in this tree sequence. Note that this may not be equal to\n        the actual memory footprint.\n        \"\"\"\n        return self.tables.nbytes\n\n    def dump_tables(self):\n        \"\"\"\n        Returns a modifiable copy of the :class:`tables<TableCollection>` defining\n        this tree sequence.\n\n        :return: A :class:`TableCollection` containing all tables underlying\n            the tree sequence.\n        :rtype: TableCollection\n        \"\"\"\n        ll_tables = _tskit.TableCollection(self.sequence_length)\n        self._ll_tree_sequence.dump_tables(ll_tables)\n        return tables.TableCollection(ll_tables=ll_tables)\n\n    def link_ancestors(self, samples, ancestors):\n        \"\"\"\n        Equivalent to :meth:`TableCollection.link_ancestors`; see that method for full\n        documentation and parameter semantics.\n\n        :param list[int] samples: Node IDs to retain as samples.\n        :param list[int] ancestors: Node IDs to treat as ancestors.\n        :return: An :class:`tables.EdgeTable` containing the genealogical links between\n            the supplied ``samples`` and ``ancestors``.\n        :rtype: tables.EdgeTable\n        \"\"\"\n        samples = util.safe_np_int_cast(samples, np.int32)\n        ancestors = util.safe_np_int_cast(ancestors, np.int32)\n        ll_edge_table = self._ll_tree_sequence.link_ancestors(samples, ancestors)\n        return tables.EdgeTable(ll_table=ll_edge_table)\n\n    def dump_text(\n        self,\n        nodes=None,\n        edges=None,\n        sites=None,\n        mutations=None,\n        individuals=None,\n        populations=None,\n        migrations=None,\n        provenances=None,\n        precision=6,\n        encoding=\"utf8\",\n        base64_metadata=True,\n    ):\n        \"\"\"\n        Writes a text representation of the tables underlying the tree sequence\n        to the specified connections.\n\n        If Base64 encoding is not used, then metadata will be saved directly, possibly\n        resulting in errors reading the tables back in if metadata includes whitespace.\n\n        :param io.TextIOBase nodes: The file-like object (having a .write() method) to\n            write the NodeTable to.\n        :param io.TextIOBase edges: The file-like object to write the EdgeTable to.\n        :param io.TextIOBase sites: The file-like object to write the SiteTable to.\n        :param io.TextIOBase mutations: The file-like object to write the\n            MutationTable to.\n        :param io.TextIOBase individuals: The file-like object to write the\n            IndividualTable to.\n        :param io.TextIOBase populations: The file-like object to write the\n            PopulationTable to.\n        :param io.TextIOBase migrations: The file-like object to write the\n            MigrationTable to.\n        :param io.TextIOBase provenances: The file-like object to write the\n            ProvenanceTable to.\n        :param int precision: The number of digits of precision.\n        :param str encoding: Encoding used for text representation.\n        :param bool base64_metadata: Only used if a schema is not present on each table\n            being dumped. If True, metadata is encoded using Base64\n            encoding; otherwise, as plain text.\n        \"\"\"\n        text_formats.dump_text(\n            self,\n            nodes=nodes,\n            edges=edges,\n            sites=sites,\n            mutations=mutations,\n            individuals=individuals,\n            populations=populations,\n            migrations=migrations,\n            provenances=provenances,\n            precision=precision,\n            encoding=encoding,\n            base64_metadata=base64_metadata,\n        )\n\n    def __str__(self):\n        \"\"\"\n        Return a plain text summary of the contents of a tree sequence\n        \"\"\"\n        ts_rows = [\n            [\"Trees\", util.format_number(self.num_trees, sep=\",\")],\n            [\"Sequence Length\", util.format_number(self.sequence_length, sep=\",\")],\n            [\"Time Units\", self.time_units],\n            [\"Sample Nodes\", util.format_number(self.num_samples, sep=\",\")],\n            [\"Total Size\", util.naturalsize(self.nbytes)],\n        ]\n        header = [\"Table\", \"Rows\", \"Size\", \"Has Metadata\"]\n        table_rows = []\n        for name, table in self.tables.table_name_map.items():\n            table_rows.append(\n                [\n                    name.capitalize(),\n                    f\"{util.format_number(table.num_rows, sep=',')}\",\n                    util.naturalsize(table.nbytes),\n                    (\n                        \"Yes\"\n                        if hasattr(table, \"metadata\") and len(table.metadata) > 0\n                        else \"No\"\n                    ),\n                ]\n            )\n        return util.unicode_table(ts_rows, title=\"TreeSequence\") + util.unicode_table(\n            table_rows, header=header\n        )\n\n    def _repr_html_(self):\n        \"\"\"\n        Return an html summary of a tree sequence. Called by jupyter notebooks\n        to render a TreeSequence.\n        \"\"\"\n        return util.tree_sequence_html(self)\n\n    # num_samples was originally called sample_size, and so we must keep sample_size\n    # around as a deprecated alias.\n    @property\n    def num_samples(self):\n        \"\"\"\n        Returns the number of sample nodes in this tree sequence. This is also the\n        number of sample nodes in each tree.\n\n        :return: The number of sample nodes in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_samples()\n\n    @property\n    def table_metadata_schemas(self) -> TableMetadataSchemas:\n        \"\"\"\n        The set of metadata schemas for the tables in this tree sequence.\n        \"\"\"\n        return self._table_metadata_schemas\n\n    @property\n    def sample_size(self):\n        # Deprecated alias for num_samples\n        return self.num_samples\n\n    def get_sample_size(self):\n        # Deprecated alias for num_samples\n        return self.num_samples\n\n    @property\n    def file_uuid(self):\n        return self._ll_tree_sequence.get_file_uuid()\n\n    @property\n    def discrete_genome(self):\n        \"\"\"\n        Returns True if all genome coordinates in this TreeSequence are\n        discrete integer values. This is true iff all the following are true:\n\n        - The sequence length is discrete\n        - All site positions are discrete\n        - All left and right edge coordinates are discrete\n        - All migration left and right coordinates are discrete\n\n        :return: True if this TreeSequence uses discrete genome coordinates.\n        :rtype: bool\n        \"\"\"\n        return bool(self._ll_tree_sequence.get_discrete_genome())\n\n    @property\n    def discrete_time(self):\n        \"\"\"\n        Returns True if all time coordinates in this TreeSequence are\n        discrete integer values. This is true iff all the following are true:\n\n        - All node times are discrete\n        - All mutation times are discrete\n        - All migration times are discrete\n\n        Note that ``tskit.UNKNOWN_TIME`` counts as discrete.\n\n        :return: True if this TreeSequence uses discrete time coordinates.\n        :rtype: bool\n        \"\"\"\n        return bool(self._ll_tree_sequence.get_discrete_time())\n\n    @property\n    def min_time(self):\n        \"\"\"\n        Returns the min time in this tree sequence. This is the minimum\n        of the node times and mutation times.\n\n        Note that mutation times with the value ``tskit.UNKNOWN_TIME``\n        are ignored.\n\n        :return: The min time of the nodes and mutations in this tree sequence.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree_sequence.get_min_time()\n\n    @property\n    def max_time(self):\n        \"\"\"\n        Returns the max time in this tree sequence. This is the maximum\n        of the node times and mutation times.\n\n        Note that mutation times with the value ``tskit.UNKNOWN_TIME``\n        are ignored.\n\n        :return: The max time of the nodes and mutations in this tree sequence.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree_sequence.get_max_time()\n\n    @property\n    def sequence_length(self):\n        \"\"\"\n        Returns the sequence length in this tree sequence. This defines the\n        genomic scale over which tree coordinates are defined. Given a\n        tree sequence with a sequence length :math:`L`, the constituent\n        trees will be defined over the half-closed interval\n        :math:`[0, L)`. Each tree then covers some subset of this\n        interval --- see :attr:`tskit.Tree.interval` for details.\n\n        :return: The length of the sequence in this tree sequence in bases.\n        :rtype: float\n        \"\"\"\n        return self.get_sequence_length()\n\n    def get_sequence_length(self):\n        return self._ll_tree_sequence.get_sequence_length()\n\n    @property\n    def metadata(self) -> Any:\n        \"\"\"\n        The decoded metadata for this TreeSequence.\n        \"\"\"\n        return self.metadata_schema.decode_row(self._ll_tree_sequence.get_metadata())\n\n    @property\n    def metadata_schema(self) -> metadata_module.MetadataSchema:\n        \"\"\"\n        The :class:`tskit.MetadataSchema` for this TreeSequence.\n        \"\"\"\n        return metadata_module.parse_metadata_schema(\n            self._ll_tree_sequence.get_metadata_schema()\n        )\n\n    @property\n    def time_units(self) -> str:\n        \"\"\"\n        String describing the units of the time dimension for this TreeSequence.\n        \"\"\"\n        return self._ll_tree_sequence.get_time_units()\n\n    @property\n    def num_edges(self):\n        \"\"\"\n        Returns the number of :ref:`edges <sec_edge_table_definition>` in this\n        tree sequence.\n\n        :return: The number of edges in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_edges()\n\n    def get_num_trees(self):\n        # Deprecated alias for self.num_trees\n        return self.num_trees\n\n    @property\n    def num_trees(self):\n        \"\"\"\n        Returns the number of distinct trees in this tree sequence. This\n        is equal to the number of trees returned by the :meth:`.trees`\n        method.\n\n        :return: The number of trees in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_trees()\n\n    def get_num_sites(self):\n        # Deprecated alias for self.num_sites\n        return self._ll_tree_sequence.get_num_sites()\n\n    @property\n    def num_sites(self):\n        \"\"\"\n        Returns the number of :ref:`sites <sec_site_table_definition>` in\n        this tree sequence.\n\n        :return: The number of sites in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self.get_num_sites()\n\n    def get_num_mutations(self):\n        # Deprecated alias for self.num_mutations\n        return self.num_mutations\n\n    @property\n    def num_mutations(self):\n        \"\"\"\n        Returns the number of :ref:`mutations <sec_mutation_table_definition>`\n        in this tree sequence.\n\n        :return: The number of mutations in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_mutations()\n\n    def get_num_nodes(self):\n        # Deprecated alias for self.num_nodes\n        return self.num_nodes\n\n    @property\n    def num_individuals(self):\n        \"\"\"\n        Returns the number of :ref:`individuals <sec_individual_table_definition>` in\n        this tree sequence.\n\n        :return: The number of individuals in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_individuals()\n\n    @property\n    def num_nodes(self):\n        \"\"\"\n        Returns the number of :ref:`nodes <sec_node_table_definition>` in\n        this tree sequence.\n\n        :return: The number of nodes in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_nodes()\n\n    @property\n    def num_provenances(self):\n        \"\"\"\n        Returns the number of :ref:`provenances <sec_provenance_table_definition>`\n        in this tree sequence.\n\n        :return: The number of provenances in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_provenances()\n\n    @property\n    def num_populations(self):\n        \"\"\"\n        Returns the number of :ref:`populations <sec_population_table_definition>`\n        in this tree sequence.\n\n        :return: The number of populations in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_populations()\n\n    @property\n    def num_migrations(self):\n        \"\"\"\n        Returns the number of :ref:`migrations <sec_migration_table_definition>`\n        in this tree sequence.\n\n        :return: The number of migrations in this tree sequence.\n        :rtype: int\n        \"\"\"\n        return self._ll_tree_sequence.get_num_migrations()\n\n    @property\n    def max_root_time(self):\n        \"\"\"\n        Returns the time of the oldest root in any of the trees in this tree sequence.\n        This is usually equal to ``np.max(ts.tables.nodes.time)`` but may not be\n        since there can be non-sample nodes that are not present in any tree. Note that\n        isolated samples are also defined as roots (so there can be a max_root_time\n        even in a tree sequence with no edges).\n\n        :return: The maximum time of a root in this tree sequence.\n        :rtype: float\n        :raises ValueError: If there are no samples in the tree, and hence no roots (as\n            roots are defined by the ends of the upward paths from the set of samples).\n        \"\"\"\n        if self.num_samples == 0:\n            raise ValueError(\n                \"max_root_time is not defined in a tree sequence with 0 samples\"\n            )\n        ret = max(self.nodes_time[u] for u in self.samples())\n        if self.num_edges > 0:\n            # Edges are guaranteed to be listed in parent-time order, so we can get the\n            # last one to get the oldest root\n            edge = self.edge(self.num_edges - 1)\n            # However, we can have situations where there is a sample older than a\n            # 'proper' root\n            ret = max(ret, self.nodes_time[edge.parent])\n        return ret\n\n    def migrations(self):\n        \"\"\"\n        Returns an iterable sequence of all the\n        :ref:`migrations <sec_migration_table_definition>` in this tree sequence.\n\n        Migrations are returned in nondecreasing order of the ``time`` value.\n\n        :return: An iterable sequence of all migrations.\n        :rtype: Sequence(:class:`.Migration`)\n        \"\"\"\n        return SimpleContainerSequence(self.migration, self.num_migrations)\n\n    def individuals(self):\n        \"\"\"\n        Returns an iterable sequence of all the\n        :ref:`individuals <sec_individual_table_definition>` in this tree sequence.\n\n        :return: An iterable sequence of all individuals.\n        :rtype: Sequence(:class:`.Individual`)\n        \"\"\"\n        return SimpleContainerSequence(self.individual, self.num_individuals)\n\n    def nodes(self, *, order=None):\n        \"\"\"\n        Returns an iterable sequence of all the :ref:`nodes <sec_node_table_definition>`\n        in this tree sequence.\n\n        .. note::\n            Although node ids are commonly ordered by node time, this is not a\n            formal tree sequence requirement. If you wish to iterate over nodes in\n            time order, you should therefore use ``order=\"timeasc\"`` (and wrap the\n            resulting sequence in the standard Python :func:`python:reversed` function\n            if you wish to iterate over older nodes before younger ones)\n\n        :param str order: The order in which the nodes should be returned: must be\n            one of \"id\" (default) or \"timeasc\" (ascending order of time, then by\n            ascending node id, matching the first two ordering requirements of\n            parent nodes in a :meth:`sorted <TableCollection.sort>` edge table).\n        :return: An iterable sequence of all nodes.\n        :rtype: Sequence(:class:`.Node`)\n        \"\"\"\n        order = \"id\" if order is None else order\n        if order not in [\"id\", \"timeasc\"]:\n            raise ValueError('order must be \"id\" or \"timeasc\"')\n        odr = None\n        if order == \"timeasc\":\n            odr = np.lexsort((np.arange(self.num_nodes), self.nodes_time))\n        return SimpleContainerSequence(self.node, self.num_nodes, order=odr)\n\n    def edges(self):\n        \"\"\"\n        Returns an iterable sequence of all the :ref:`edges <sec_edge_table_definition>`\n        in this tree sequence. Edges are returned in the order required\n        for a :ref:`valid tree sequence <sec_valid_tree_sequence_requirements>`. So,\n        edges are guaranteed to be ordered such that (a) all parents with a\n        given ID are contiguous; (b) edges are returned in non-decreasing\n        order of parent time ago; (c) within the edges for a given parent, edges\n        are sorted first by child ID and then by left coordinate.\n\n        :return: An iterable sequence of all edges.\n        :rtype: Sequence(:class:`.Edge`)\n        \"\"\"\n        return SimpleContainerSequence(self.edge, self.num_edges)\n\n    def edgesets(self):\n        # TODO the order that these records are returned in is not well specified.\n        # Hopefully this does not matter, and we can just state that the ordering\n        # should not be depended on.\n        children = collections.defaultdict(set)\n        active_edgesets = {}\n        for (left, right), edges_out, edges_in in self.edge_diffs():\n            # Complete and return any edgesets that are affected by this tree\n            # transition\n            parents = iter(edge.parent for edge in itertools.chain(edges_out, edges_in))\n            for parent in parents:\n                if parent in active_edgesets:\n                    edgeset = active_edgesets.pop(parent)\n                    edgeset.right = left\n                    edgeset.children = sorted(children[parent])\n                    yield edgeset\n            for edge in edges_out:\n                children[edge.parent].remove(edge.child)\n            for edge in edges_in:\n                children[edge.parent].add(edge.child)\n            # Update the active edgesets\n            for edge in itertools.chain(edges_out, edges_in):\n                if len(children[edge.parent]) > 0 and edge.parent not in active_edgesets:\n                    active_edgesets[edge.parent] = Edgeset(left, right, edge.parent, [])\n\n        for parent in active_edgesets.keys():\n            edgeset = active_edgesets[parent]\n            edgeset.right = self.sequence_length\n            edgeset.children = sorted(children[edgeset.parent])\n            yield edgeset\n\n    def _edge_diffs_forward(self, include_terminal=False):\n        metadata_decoder = self.table_metadata_schemas.edge.decode_row\n        edge_left = self.edges_left\n        edge_right = self.edges_right\n        sequence_length = self.sequence_length\n        in_order = self.indexes_edge_insertion_order\n        out_order = self.indexes_edge_removal_order\n        M = self.num_edges\n        j = 0\n        k = 0\n        left = 0.0\n        while j < M or left < sequence_length:\n            edges_out = []\n            edges_in = []\n            while k < M and edge_right[out_order[k]] == left:\n                edges_out.append(\n                    Edge(\n                        *self._ll_tree_sequence.get_edge(out_order[k]),\n                        id=out_order[k],\n                        metadata_decoder=metadata_decoder,\n                    )\n                )\n                k += 1\n            while j < M and edge_left[in_order[j]] == left:\n                edges_in.append(\n                    Edge(\n                        *self._ll_tree_sequence.get_edge(in_order[j]),\n                        id=in_order[j],\n                        metadata_decoder=metadata_decoder,\n                    )\n                )\n                j += 1\n            right = sequence_length\n            if j < M:\n                right = min(right, edge_left[in_order[j]])\n            if k < M:\n                right = min(right, edge_right[out_order[k]])\n            yield EdgeDiff(Interval(left, right), edges_out, edges_in)\n            left = right\n\n        if include_terminal:\n            edges_out = []\n            while k < M:\n                edges_out.append(\n                    Edge(\n                        *self._ll_tree_sequence.get_edge(out_order[k]),\n                        id=out_order[k],\n                        metadata_decoder=metadata_decoder,\n                    )\n                )\n                k += 1\n            yield EdgeDiff(Interval(left, right), edges_out, [])\n\n    def _edge_diffs_reverse(self, include_terminal=False):\n        metadata_decoder = self.table_metadata_schemas.edge.decode_row\n        edge_left = self.edges_left\n        edge_right = self.edges_right\n        sequence_length = self.sequence_length\n        in_order = self.indexes_edge_removal_order\n        out_order = self.indexes_edge_insertion_order\n        M = self.num_edges\n        j = M - 1\n        k = M - 1\n        right = sequence_length\n        while j >= 0 or right > 0:\n            edges_out = []\n            edges_in = []\n            while k >= 0 and edge_left[out_order[k]] == right:\n                edges_out.append(\n                    Edge(\n                        *self._ll_tree_sequence.get_edge(out_order[k]),\n                        id=out_order[k],\n                        metadata_decoder=metadata_decoder,\n                    )\n                )\n                k -= 1\n            while j >= 0 and edge_right[in_order[j]] == right:\n                edges_in.append(\n                    Edge(\n                        *self._ll_tree_sequence.get_edge(in_order[j]),\n                        id=in_order[j],\n                        metadata_decoder=metadata_decoder,\n                    )\n                )\n                j -= 1\n            left = 0\n            if j >= 0:\n                left = max(left, edge_right[in_order[j]])\n            if k >= 0:\n                left = max(left, edge_left[out_order[k]])\n            yield EdgeDiff(Interval(left, right), edges_out, edges_in)\n            right = left\n\n        if include_terminal:\n            edges_out = []\n            while k >= 0:\n                edges_out.append(\n                    Edge(\n                        *self._ll_tree_sequence.get_edge(out_order[k]),\n                        id=out_order[k],\n                        metadata_decoder=metadata_decoder,\n                    )\n                )\n                k -= 1\n            yield EdgeDiff(Interval(left, right), edges_out, [])\n\n    def edge_diffs(self, include_terminal=False, *, direction=tskit.FORWARD):\n        \"\"\"\n        Returns an iterator over all the :ref:`edges <sec_edge_table_definition>` that\n        are inserted and removed to build the trees as we move from left-to-right along\n        the tree sequence. Each iteration yields a named tuple consisting of 3 values,\n        ``(interval, edges_out, edges_in)``. The first value, ``interval``, is the\n        genomic interval ``(left, right)`` covered by the incoming tree\n        (see :attr:`Tree.interval`). The second, ``edges_out`` is a list of the edges\n        that were just-removed to create the tree covering the interval\n        (hence ``edges_out`` will always be empty for the first tree). The last value,\n        ``edges_in``, is a list of edges that were just\n        inserted to construct the tree covering the current interval.\n\n        The edges returned within each ``edges_in`` list are ordered by ascending\n        time of the parent node, then ascending parent id, then ascending child id.\n        The edges within each ``edges_out`` list are the reverse order (e.g.\n        descending parent time, parent id, then child_id). This means that within\n        each list, edges with the same parent appear consecutively.\n\n        The ``direction`` argument can be used to control whether diffs are produced\n        in the forward (left-to-right, increasing genome coordinate value)\n        or reverse (right-to-left, decreasing genome coordinate value) direction.\n\n        :param bool include_terminal: If False (default), the iterator terminates\n            after the final interval in the tree sequence (i.e., it does not\n            report a final removal of all remaining edges), and the number\n            of iterations will be equal to the number of trees in the tree\n            sequence. If True, an additional iteration takes place, with the last\n            ``edges_out`` value reporting all the edges contained in the final\n            tree (with both ``left`` and ``right`` equal to the sequence length).\n        :param int direction: The direction of travel along the sequence for\n            diffs. Must be one of :data:`.FORWARD` or :data:`.REVERSE`.\n            (Default: :data:`.FORWARD`).\n        :return: An iterator over the (interval, edges_out, edges_in) tuples. This\n            is a named tuple, so the 3 values can be accessed by position\n            (e.g. ``returned_tuple[0]``) or name (e.g. ``returned_tuple.interval``).\n        :rtype: :class:`collections.abc.Iterable`\n        \"\"\"\n        if direction == _tskit.FORWARD:\n            return self._edge_diffs_forward(include_terminal=include_terminal)\n        elif direction == _tskit.REVERSE:\n            return self._edge_diffs_reverse(include_terminal=include_terminal)\n        else:\n            raise ValueError(\"direction must be either tskit.FORWARD or tskit.REVERSE\")\n\n    def sites(self):\n        \"\"\"\n        Returns an iterable sequence of all the :ref:`sites <sec_site_table_definition>`\n        in this tree sequence. Sites are returned in order of increasing ID\n        (and also position). See the :class:`Site` class for details on\n        the available fields for each site.\n\n        :return: An iterable sequence of all sites.\n        :rtype: Sequence(:class:`.Site`)\n        \"\"\"\n        return SimpleContainerSequence(self.site, self.num_sites)\n\n    def mutations(self):\n        \"\"\"\n        Returns an iterator over all the\n        :ref:`mutations <sec_mutation_table_definition>` in this tree sequence.\n        Mutations are returned in order of nondecreasing site ID.\n        See the :class:`Mutation` class for details on the available fields for\n        each mutation.\n\n        The returned iterator is equivalent to iterating over all sites\n        and all mutations in each site, i.e.::\n\n            for site in tree_sequence.sites():\n                for mutation in site.mutations:\n                    yield mutation\n\n        :return: An iterator over all mutations in this tree sequence.\n        :rtype: iter(:class:`Mutation`)\n        \"\"\"\n        for site in self.sites():\n            yield from site.mutations\n\n    def populations(self):\n        \"\"\"\n        Returns an iterable sequence of all the\n        :ref:`populations <sec_population_table_definition>` in this tree sequence.\n\n        :return: An iterable sequence of all populations.\n        :rtype: Sequence(:class:`.Population`)\n        \"\"\"\n        return SimpleContainerSequence(self.population, self.num_populations)\n\n    def provenances(self):\n        \"\"\"\n        Returns an iterable sequence of all the\n        :ref:`provenances <sec_provenance_table_definition>` in this tree sequence.\n\n        :return: An iterable sequence of all provenances.\n        :rtype: Sequence(:class:`.Provenance`)\n        \"\"\"\n        return SimpleContainerSequence(self.provenance, self.num_provenances)\n\n    def breakpoints(self, as_array=False):\n        \"\"\"\n        Returns the breakpoints that separate trees along the chromosome, including the\n        two extreme points 0 and L. This is equivalent to::\n\n            iter([0] + [t.interval.right for t in self.trees()])\n\n        By default we return an iterator over the breakpoints as Python float objects;\n        if ``as_array`` is True we return them as a numpy array.\n\n        Note that the ``as_array`` form will be more efficient and convenient in most\n        cases; the default iterator behaviour is mainly kept to ensure compatibility\n        with existing code.\n\n        :param bool as_array: If True, return the breakpoints as a numpy array.\n        :return: The breakpoints defined by the tree intervals along the sequence.\n        :rtype: collections.abc.Iterable or numpy.ndarray\n        \"\"\"\n        breakpoints = self.ll_tree_sequence.get_breakpoints()\n        if not as_array:\n            # Convert to Python floats for backward compatibility.\n            breakpoints = map(float, breakpoints)\n        return breakpoints\n\n    def at(self, position, **kwargs):\n        \"\"\"\n        Returns the tree covering the specified genomic location. The returned tree\n        will have ``tree.interval.left`` <= ``position`` < ``tree.interval.right``.\n        See also :meth:`Tree.seek`.\n\n        .. include:: substitutions/linear_traversal_warning.rst\n\n        :param float position: A genomic location.\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example ``ts.at(2.5, sample_lists=True)`` will\n            result in a :class:`Tree` created with ``sample_lists=True``.\n        :return: A new instance of :class:`Tree` positioned to cover the specified\n            genomic location.\n        :rtype: Tree\n        \"\"\"\n        tree = Tree(self, **kwargs)\n        tree.seek(position)\n        return tree\n\n    def at_index(self, index, **kwargs):\n        \"\"\"\n        Returns the tree at the specified index. See also :meth:`Tree.seek_index`.\n\n        .. include:: substitutions/linear_traversal_warning.rst\n\n        :param int index: The index of the required tree.\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example ``ts.at_index(4, sample_lists=True)``\n            will result in a :class:`Tree` created with ``sample_lists=True``.\n        :return: A new instance of :class:`Tree` positioned at the specified index.\n        :rtype: Tree\n        \"\"\"\n        tree = Tree(self, **kwargs)\n        tree.seek_index(index)\n        return tree\n\n    def first(self, **kwargs):\n        \"\"\"\n        Returns the first tree in this :class:`TreeSequence`. To iterate over all\n        trees in the sequence, use the :meth:`.trees` method.\n\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example ``ts.first(sample_lists=True)`` will\n            result in a :class:`Tree` created with ``sample_lists=True``.\n        :return: The first tree in this tree sequence.\n        :rtype: :class:`Tree`.\n        \"\"\"\n        tree = Tree(self, **kwargs)\n        tree.first()\n        return tree\n\n    def last(self, **kwargs):\n        \"\"\"\n        Returns the last tree in this :class:`TreeSequence`. To iterate over all\n        trees in the sequence, use the :meth:`.trees` method.\n\n        :param \\\\**kwargs: Further arguments used as parameters when constructing the\n            returned :class:`Tree`. For example ``ts.first(sample_lists=True)`` will\n            result in a :class:`Tree` created with ``sample_lists=True``.\n        :return: The last tree in this tree sequence.\n        :rtype: :class:`Tree`.\n        \"\"\"\n        tree = Tree(self, **kwargs)\n        tree.last()\n        return tree\n\n    def trees(\n        self,\n        tracked_samples=None,\n        *,\n        sample_lists=False,\n        root_threshold=1,\n        sample_counts=None,\n        tracked_leaves=None,\n        leaf_counts=None,\n        leaf_lists=None,\n    ):\n        \"\"\"\n        Returns an iterator over the trees in this tree sequence. Each value\n        returned in this iterator is an instance of :class:`Tree`. Upon\n        successful termination of the iterator, the tree will be in the\n        \"cleared\" null state.\n\n        The ``sample_lists`` and ``tracked_samples`` parameters are passed\n        to the :class:`Tree` constructor, and control\n        the options that are set in the returned tree instance.\n\n        .. warning:: Do not store the results of this iterator in a list!\n           For performance reasons, the same underlying object is used\n           for every tree returned which will most likely lead to unexpected\n           behaviour. If you wish to obtain a list of trees in a tree sequence\n           please use ``ts.aslist()`` instead.\n\n        :param list tracked_samples: The list of samples to be tracked and\n            counted using the :meth:`Tree.num_tracked_samples` method.\n        :param bool sample_lists: If True, provide more efficient access\n            to the samples beneath a given node using the\n            :meth:`Tree.samples` method.\n        :param int root_threshold: The minimum number of samples that a node\n            must be ancestral to for it to be in the list of roots. By default\n            this is 1, so that isolated samples (representing missing data)\n            are roots. To efficiently restrict the roots of the tree to\n            those subtending meaningful topology, set this to 2. This value\n            is only relevant when trees have multiple roots.\n        :param bool sample_counts: Deprecated since 0.2.4.\n        :return: An iterator over the Trees in this tree sequence.\n        :rtype: collections.abc.Iterable, :class:`Tree`\n        \"\"\"\n        # tracked_leaves, leaf_counts and leaf_lists are deprecated aliases\n        # for tracked_samples, sample_counts and sample_lists respectively.\n        # These are left over from an older version of the API when leaves\n        # and samples were synonymous.\n        if tracked_leaves is not None:\n            tracked_samples = tracked_leaves\n        if leaf_counts is not None:\n            sample_counts = leaf_counts\n        if leaf_lists is not None:\n            sample_lists = leaf_lists\n        tree = Tree(\n            self,\n            tracked_samples=tracked_samples,\n            sample_lists=sample_lists,\n            root_threshold=root_threshold,\n            sample_counts=sample_counts,\n        )\n        return TreeIterator(tree)\n\n    def coiterate(self, other, **kwargs):\n        \"\"\"\n        Returns an iterator over the pairs of trees for each distinct\n        interval in the specified pair of tree sequences.\n\n        :param TreeSequence other: The other tree sequence from which to take trees. The\n            sequence length must be the same as the current tree sequence.\n        :param \\\\**kwargs: Further named arguments that will be passed to the\n            :meth:`.trees` method when constructing the returned trees.\n\n        :return: An iterator returning successive tuples of the form\n            ``(interval, tree_self, tree_other)``. For example, the first item returned\n            will consist of an tuple of the initial interval, the first tree of the\n            current tree sequence, and the first tree of the ``other`` tree sequence;\n            the ``.left`` attribute of the initial interval will be 0 and the ``.right``\n            attribute will be the smallest non-zero breakpoint of the 2 tree sequences.\n        :rtype: iter(:class:`Interval`, :class:`Tree`, :class:`Tree`)\n\n        \"\"\"\n        if self.sequence_length != other.sequence_length:\n            raise ValueError(\"Tree sequences must be of equal sequence length.\")\n        L = self.sequence_length\n        trees1 = self.trees(**kwargs)\n        trees2 = other.trees(**kwargs)\n        tree1 = next(trees1)\n        tree2 = next(trees2)\n        right = 0\n        while right != L:\n            left = right\n            right = min(tree1.interval.right, tree2.interval.right)\n            yield Interval(left, right), tree1, tree2\n            # Advance\n            if tree1.interval.right == right:\n                tree1 = next(trees1, None)\n            if tree2.interval.right == right:\n                tree2 = next(trees2, None)\n\n    def _check_genomic_range(self, left, right, ensure_integer=False):\n        if left is None:\n            left = 0\n        if right is None:\n            right = self.sequence_length\n        if np.isnan(left) or left < 0 or left >= self.sequence_length:\n            raise ValueError(\n                \"`left` not between zero (inclusive) and sequence length (exclusive)\"\n            )\n        if np.isnan(right) or right <= 0 or right > self.sequence_length:\n            raise ValueError(\n                \"`right` not between zero (exclusive) and sequence length (inclusive)\"\n            )\n        if left >= right:\n            raise ValueError(\"`left` must be less than `right`\")\n        if ensure_integer:\n            if left != int(left) or right != int(right):\n                raise ValueError(\"`left` and `right` must be integers\")\n            return Interval(int(left), int(right))\n        return Interval(left, right)\n\n    def _haplotypes_array(\n        self,\n        *,\n        interval,\n        isolated_as_missing=None,\n        missing_data_character=None,\n        samples=None,\n    ):\n        # return an array of haplotypes and the first and last site positions\n        if missing_data_character is None:\n            missing_data_character = \"N\"\n\n        start_site, stop_site = np.searchsorted(self.sites_position, interval)\n        H = np.empty(\n            (\n                self.num_samples if samples is None else len(samples),\n                stop_site - start_site,\n            ),\n            dtype=np.int8,\n        )\n        missing_int8 = ord(missing_data_character.encode(\"ascii\"))\n        for var in self.variants(\n            samples=samples,\n            isolated_as_missing=isolated_as_missing,\n            left=interval.left,\n            right=interval.right,\n        ):\n            alleles = np.full(len(var.alleles), missing_int8, dtype=np.int8)\n            for i, allele in enumerate(var.alleles):\n                if allele is not None:\n                    if len(allele) != 1:\n                        raise TypeError(\n                            \"Multi-letter allele or deletion detected at site {}\".format(\n                                var.site.id\n                            )\n                        )\n                    try:\n                        ascii_allele = allele.encode(\"ascii\")\n                    except UnicodeEncodeError:\n                        raise TypeError(\n                            \"Non-ascii character in allele at site {}\".format(\n                                var.site.id\n                            )\n                        )\n                    allele_int8 = ord(ascii_allele)\n                    if allele_int8 == missing_int8:\n                        raise ValueError(\n                            \"The missing data character '{}' clashes with an \"\n                            \"existing allele at site {}\".format(\n                                missing_data_character, var.site.id\n                            )\n                        )\n                    alleles[i] = allele_int8\n            H[:, var.site.id - start_site] = alleles[var.genotypes]\n        return H, (start_site, stop_site - 1)\n\n    def haplotypes(\n        self,\n        *,\n        isolated_as_missing=None,\n        missing_data_character=None,\n        samples=None,\n        left=None,\n        right=None,\n        impute_missing_data=None,\n    ):\n        \"\"\"\n        Returns an iterator over the strings of haplotypes that result from\n        the trees and mutations in this tree sequence. Each haplotype string\n        is guaranteed to be of the same length. A tree sequence with\n        :math:`n` requested nodes (default: the number of sample nodes) and with\n        :math:`s` sites lying between ``left`` and ``right`` will return a total\n        of :math:`n` strings of :math:`s` alleles concatenated together, where an allele\n        consists of a single ascii character (tree sequences that include alleles\n        which are not a single character in length, or where the character is\n        non-ascii, will raise an error). The first string returned is the\n        haplotype for the first requested node, and so on.\n\n        The alleles at each site must be represented by single byte characters,\n        (i.e., variants must be single nucleotide polymorphisms, or SNPs), hence\n        the strings returned will all be of length :math:`s`. If the ``left``\n        position is less than or equal to the position of the first site, for a\n        haplotype ``h``, the value of ``h[j]`` will therefore be the observed\n        allelic state at site ``j``.\n\n        If ``isolated_as_missing`` is True (the default), isolated nodes without\n        mutations directly above them (whether samples or non-samples) will be treated as\n        :ref:`missing data<sec_data_model_missing_data>` and will be\n        represented in the string by the ``missing_data_character``. If\n        instead it is set to False, missing data will be assigned the ancestral state\n        (unless they have mutations directly above them, in which case they will take\n        the most recent derived mutational state for that node). This was the default\n        behaviour in versions prior to 0.2.0. Prior to 0.3.0 the `impute_missing_data`\n        argument controlled this behaviour.\n\n        It is also possible to provide **non-sample** nodes via the ``samples``\n        argument if you wish to output haplotypes for (e.g.) internal nodes.\n        See also the :meth:`.variants` iterator for site-centric access\n        to genotypes for the requested nodes.\n\n        .. warning::\n            For large datasets, this method can consume a **very large** amount of\n            memory! To output all the sample data, it is more efficient to iterate\n            over sites rather than over samples.\n\n        :return: An iterator over the haplotype strings for the samples in\n            this tree sequence.\n        :param bool isolated_as_missing: If True, the allele assigned to\n            missing samples (i.e., isolated samples without mutations) is\n            the ``missing_data_character``. If False,\n            missing samples will be assigned the ancestral state.\n            Default: True.\n        :param str missing_data_character: A single ascii character that will\n            be used to represent missing data.\n            If any normal allele contains this character, an error is raised.\n            Default: 'N'.\n        :param list[int] samples: The node IDs for which to output haplotypes. If\n            ``None`` (default), return haplotypes for all the sample nodes in the tree\n            sequence, in the order given by the :meth:`.samples` method. Non-sample\n            nodes may also be provided.\n        :param int left: Haplotype strings will start with the first site at or after\n            this genomic position. If ``None`` (default) start at the first site.\n        :param int right: Haplotype strings will end with the last site before this\n            position. If ``None`` (default) assume ``right`` is the sequence length\n            (i.e. the last character in the string will be the last site in the tree\n            sequence).\n        :param bool impute_missing_data:\n            *Deprecated in 0.3.0. Use ``isolated_as_missing``, but inverting value.\n            Will be removed in a future version*\n        :rtype: collections.abc.Iterable\n        :raises TypeError: if the ``missing_data_character`` or any of the alleles\n            at a site are not a single ascii character.\n        :raises ValueError: if the ``missing_data_character`` exists in one of the\n            alleles\n        \"\"\"\n        if impute_missing_data is not None:\n            warnings.warn(\n                \"The impute_missing_data parameter was deprecated in 0.3.0 and will\"\n                \" be removed. Use ``isolated_as_missing=False`` instead of\"\n                \"``impute_missing_data=True``.\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        # Only use impute_missing_data if isolated_as_missing has the default value\n        if isolated_as_missing is None:\n            isolated_as_missing = not impute_missing_data\n        interval = self._check_genomic_range(left, right)\n        H, _ = self._haplotypes_array(\n            interval=interval,\n            isolated_as_missing=isolated_as_missing,\n            missing_data_character=missing_data_character,\n            samples=samples,\n        )\n        for h in H:\n            yield h.tobytes().decode(\"ascii\")\n\n    def variants(\n        self,\n        *,\n        samples=None,\n        isolated_as_missing=None,\n        alleles=None,\n        impute_missing_data=None,\n        copy=None,\n        left=None,\n        right=None,\n    ):\n        \"\"\"\n        Returns an iterator over the variants between the ``left`` (inclusive)\n        and ``right`` (exclusive) genomic positions in this tree sequence. Each\n        returned :class:`Variant` object has a site, a list of possible allelic\n        states and an array of genotypes for the specified ``samples``. The\n        ``genotypes`` value is a numpy array containing indexes into the\n        ``alleles`` list. By default, this list is generated automatically for\n        each site such that the first entry, ``alleles[0]``, is the ancestral\n        state and subsequent alleles are listed in no\n        particular order. This means that the encoding of alleles in\n        terms of genotype values can vary from site-to-site, which is\n        sometimes inconvenient. It is possible to specify a fixed mapping\n        from allele strings to genotype values using the ``alleles``\n        parameter. For example, if we set ``alleles=(\"A\", \"C\", \"G\", \"T\")``,\n        this will map allele \"A\" to 0, \"C\" to 1 and so on (the\n        :data:`ALLELES_ACGT` constant provides a shortcut for this\n        common mapping).\n\n        By default, genotypes are generated for all samples. The ``samples``\n        parameter allows us to specify the nodes for which genotypes are\n        generated; output order of genotypes in the returned variants\n        corresponds to the order of the samples in this list. It is also\n        possible to provide **non-sample** nodes as an argument here, if you\n        wish to generate genotypes for (e.g.) internal nodes. Missingness is\n        detected for any requested node (sample or non-sample) when\n        ``isolated_as_missing`` is True: if a node is isolated at a site (i.e.,\n        has no parent and no children in the marginal tree) and has no mutation\n        above it at that site, its genotype will be reported as\n        :data:`MISSING_DATA` (-1). If ``isolated_as_missing`` is False, such\n        nodes are assigned the site's ancestral allele index.\n\n        If isolated samples are present at a given site without mutations above them,\n        they are interpreted by default as\n        :ref:`missing data<sec_data_model_missing_data>`, and the genotypes array\n        will contain a special value :data:`MISSING_DATA` (-1) to identify them\n        while the ``alleles`` tuple will end with the value ``None`` (note that this\n        will be the case whether or not we specify a fixed mapping using the\n        ``alleles`` parameter; see the :class:`Variant` class for more details).\n        Alternatively, if ``isolated_as_missing`` is set to to False, such isolated\n        samples will not be treated as missing, and instead assigned the ancestral\n        state (this was the default behaviour in versions prior to 0.2.0). Prior to\n        0.3.0 the `impute_missing_data` argument controlled this behaviour.\n\n        :param array_like samples: An array of node IDs for which to generate\n            genotypes, or None for all sample nodes. Default: None.\n        :param bool isolated_as_missing: If True, the genotype value assigned to\n            missing samples (i.e., isolated samples without mutations) is\n            :data:`.MISSING_DATA` (-1). If False, missing samples will be\n            assigned the allele index for the ancestral state.\n            Default: True.\n        :param tuple alleles: A tuple of strings defining the encoding of\n            alleles as integer genotype values. At least one allele must be provided.\n            If duplicate alleles are provided, output genotypes will always be\n            encoded as the first occurrence of the allele. If None (the default),\n            the alleles are encoded as they are encountered during genotype\n            generation.\n        :param bool impute_missing_data:\n            *Deprecated in 0.3.0. Use ``isolated_as_missing``, but inverting value.\n            Will be removed in a future version*\n        :param bool copy:\n            If False re-use the same Variant object for each site such that any\n            references held to it are overwritten when the next site is visited.\n            If True return a fresh :class:`Variant` for each site. Default: True.\n        :param int left: Start with the first site at or after\n            this genomic position. If ``None`` (default) start at the first site.\n        :param int right: End with the last site before this position. If ``None``\n            (default) assume ``right`` is the sequence length, so that the last\n            variant corresponds to the last site in the tree sequence.\n        :return: An iterator over all variants in this tree sequence.\n        :rtype: iter(:class:`Variant`)\n        \"\"\"\n        interval = self._check_genomic_range(left, right)\n        if impute_missing_data is not None:\n            warnings.warn(\n                \"The impute_missing_data parameter was deprecated in 0.3.0 and will\"\n                \" be removed. Use ``isolated_as_missing=False`` instead of\"\n                \"``impute_missing_data=True``.\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        # Only use impute_missing_data if isolated_as_missing has the default value\n        if isolated_as_missing is None:\n            isolated_as_missing = not impute_missing_data\n        if copy is None:\n            copy = True\n        # See comments for the Variant type for discussion on why the\n        # present form was chosen.\n        variant = tskit.Variant(\n            self,\n            samples=samples,\n            isolated_as_missing=isolated_as_missing,\n            alleles=alleles,\n        )\n        if left == 0 and right == self.sequence_length:\n            start = 0\n            stop = self.num_sites\n        else:\n            start, stop = np.searchsorted(self.sites_position, interval)\n\n        if copy:\n            for site_id in range(start, stop):\n                variant.decode(site_id)\n                yield variant.copy()\n        else:\n            for site_id in range(start, stop):\n                variant.decode(site_id)\n                yield variant\n\n    def genotype_matrix(\n        self,\n        *,\n        samples=None,\n        isolated_as_missing=None,\n        alleles=None,\n        impute_missing_data=None,\n    ):\n        \"\"\"\n        Returns an :math:`m \\\\times n` numpy array of the genotypes in this\n        tree sequence, where :math:`m` is the number of sites and :math:`n`\n        is the number of requested nodes (default: the number of sample nodes).\n        The genotypes are the indexes into the array of ``alleles``, as\n        described for the :class:`Variant` class.\n\n        It is possible to provide **non-sample** nodes via the ``samples``\n        argument if you wish to generate genotypes for (e.g.) internal nodes.\n        Missingness is detected for any requested node (sample or non-sample)\n        when ``isolated_as_missing`` is True: if a node is isolated at a site\n        (i.e., has no parent and no children in the marginal tree) and has no\n        mutation above it at that site, its genotype will be reported as\n        :data:`MISSING_DATA` (-1).\n\n        Such nodes are treated as missing data by default. If\n        ``isolated_as_missing`` is set to False, they will not be treated as\n        missing, and will instead be assigned the ancestral state. This was the\n        default behaviour in versions prior to 0.2.0. Prior to 0.3.0 the\n        ``impute_missing_data`` argument controlled this behaviour.\n\n        .. warning::\n            This method can consume a **very large** amount of memory! If\n            all genotypes are not needed at once, it is usually better to\n            access them sequentially using the :meth:`.variants` iterator.\n\n        :param array_like samples: An array of node IDs for which to generate\n            genotypes. If ``None`` (default), generate genotypes for all sample\n            nodes. Non-sample nodes may also be provided, in which case genotypes\n            will be generated for those nodes too.\n        :param bool isolated_as_missing: If True, the genotype value assigned to\n            isolated nodes without mutations (samples or non-samples) is\n            :data:`.MISSING_DATA` (-1). If False, such nodes will be\n            assigned the allele index for the ancestral state.\n            Default: True.\n        :param tuple alleles: A tuple of strings describing the encoding of\n            alleles to genotype values. At least one allele must be provided.\n            If duplicate alleles are provided, output genotypes will always be\n            encoded as the first occurrence of the allele. If None (the default),\n            the alleles are encoded as they are encountered during genotype\n            generation.\n        :param bool impute_missing_data:\n            *Deprecated in 0.3.0. Use ``isolated_as_missing``, but inverting value.\n            Will be removed in a future version*\n\n        :return: The full matrix of genotypes.\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        if impute_missing_data is not None:\n            warnings.warn(\n                \"The impute_missing_data parameter was deprecated in 0.3.0 and will\"\n                \" be removed. Use ``isolated_as_missing=False`` instead of\"\n                \"``impute_missing_data=True``.\",\n                FutureWarning,\n                stacklevel=4,\n            )\n        # Only use impute_missing_data if isolated_as_missing has the default value\n        if isolated_as_missing is None:\n            isolated_as_missing = not impute_missing_data\n\n        variant = tskit.Variant(\n            self,\n            samples=samples,\n            isolated_as_missing=isolated_as_missing,\n            alleles=alleles,\n        )\n\n        num_samples = self.num_samples if samples is None else len(samples)\n        ret = np.zeros(shape=(self.num_sites, num_samples), dtype=np.int32)\n\n        for site_id in range(self.num_sites):\n            variant.decode(site_id)\n            ret[site_id, :] = variant.genotypes\n\n        return ret\n\n    def alignments(\n        self,\n        *,\n        reference_sequence=None,\n        missing_data_character=None,\n        isolated_as_missing=None,\n        samples=None,\n        left=None,\n        right=None,\n    ):\n        \"\"\"\n        Returns an iterator over the full sequence alignments for the defined samples\n        in this tree sequence. Each yielded alignment ``a`` is a string of length\n        ``L`` where the first character is the genomic sequence at the ``start``\n        position in the genome (defaulting to 0) and the last character is the\n        genomic sequence one position before the ``stop`` value (defaulting to the\n        :attr:`.sequence_length` of this tree sequence, which must have\n        :attr:`.discrete_genome` equal to True). By default ``L`` is therefore equal\n        to the :attr:`.sequence_length`, and ``a[j]`` is the nucleotide value at\n        genomic position ``j``.\n\n        .. note::\n            This is inherently a **zero-based** representation of the sequence\n            coordinate space. Care will be needed when interacting with other\n            libraries and upstream coordinate spaces.\n\n\n        The :ref:`sites<sec_data_model_definitions_site>` in a tree sequence will\n        usually only define the variation for a subset of the ``L`` nucleotide\n        positions along the genome, and the remaining positions are filled using\n        a :ref:`reference sequence <sec_data_model_reference_sequence>`.\n        The reference sequence data is defined either via the\n        ``reference_sequence`` parameter to this method, or embedded within\n        with the tree sequence itself via the :attr:`.TreeSequence.reference_sequence`.\n\n        Site information from the tree sequence takes precedence over the reference\n        sequence so that, for example, at a site with no mutations all samples\n        will have the site's ancestral state.\n\n        The reference sequence bases are determined in the following way:\n\n        - If the ``reference_sequence`` parameter is supplied this will be\n          used, regardless of whether the tree sequence has an embedded\n          reference sequence.\n        - Otherwise, if the tree sequence has an embedded reference sequence,\n          this will be used.\n        - If the ``reference_sequence`` parameter is not specified and\n          there is no embedded reference sequence, ``L`` copies of the\n          ``missing_data_character`` (which defaults to 'N') are used\n          instead.\n\n        .. warning:: The :class:`.ReferenceSequence` API is preliminary and\n           some behaviours may change in the future. In particular, a\n           tree sequence is currently regarded as having an embedded reference\n           sequence even if it only has some metadata defined. In this case\n           the ``reference_sequence`` parameter will need to be explicitly set.\n\n        .. note::\n            Two common options for setting a reference sequence are:\n\n            - Mark them as missing data, by setting\n              ``reference_sequence=\"N\" * int(ts.sequence_length)``\n            - Fill the gaps with random nucleotides, by setting\n              ``reference_sequence=tskit.random_nucleotides(ts.sequence_length)``.\n              See the :func:`.random_nucleotides` function for more information.\n\n        .. warning:: Insertions and deletions are not currently supported and\n           the alleles at each site must be represented by\n           single byte characters, (i.e., variants must be single nucleotide\n           polymorphisms, or SNPs).\n\n        Missing data handling\n\n        - If ``isolated_as_missing=True`` (default), nodes that are isolated\n          (no parent and no children) are rendered as the missing character across\n          each tree interval. At site positions, the per-site allele overrides the\n          missing character; if a genotype is missing (``-1``), the missing\n          character is retained.\n        - If ``isolated_as_missing=False``, no missing overlay is applied. At sites,\n          genotypes are decoded as usual; at non-sites, bases come from the\n          reference sequence.\n\n        See also the :meth:`.variants` iterator for site-centric access\n        to sample genotypes and :meth:`.haplotypes` for access to sample sequences\n        at just the sites in the tree sequence.\n\n        :param str reference_sequence: The reference sequence to fill in\n            gaps between sites in the alignments. If provided, it must be a\n            string of length equal to :attr:`.sequence_length`; the sequence is\n            sliced internally to the requested ``[left, right)`` interval.\n        :param str missing_data_character: A single ascii character that will\n            be used to represent missing data.\n            If any normal allele contains this character, an error is raised.\n            Default: 'N'.\n        :param bool isolated_as_missing: If True, treat isolated nodes as missing\n            across the covered tree intervals (see above). If None (default), this\n            is treated as True.\n        :param list[int] samples: The nodes for which to output alignments. If\n            ``None`` (default), return alignments for all sample nodes in the order\n            given by the :meth:`.samples` method. Non-sample nodes are also supported\n            and will be decoded at sites in the same way as samples.\n        :param int left: Alignments will start at this genomic position. If ``None``\n            (default) alignments start at 0.\n        :param int right: Alignments will stop before this genomic position.\n            If ``None`` (default) alignments will continue until the end of the\n            tree sequence.\n        :return: An iterator over the alignment strings for specified samples in\n            this tree sequence, in the order given in ``samples``. Each string has\n            length ``L = right - left``.\n        :rtype: collections.abc.Iterable\n        :raises ValueError: if any genome coordinate in this tree sequence is not\n            discrete, or if the ``reference_sequence`` is not of the correct length.\n        :raises TypeError: if any of the alleles at a site are not a\n            single ascii character.\n        \"\"\"\n        if not self.discrete_genome:\n            raise ValueError(\"sequence alignments only defined for discrete genomes\")\n        interval = self._check_genomic_range(left, right, ensure_integer=True)\n        missing_data_character = (\n            \"N\" if missing_data_character is None else missing_data_character\n        )\n\n        if isolated_as_missing is None:\n            isolated_as_missing = True\n\n        if len(missing_data_character) != 1:\n            raise TypeError(\"missing_data_character must be a single character\")\n\n        # Determine the reference sequence for the whole tree sequence\n        full_ref = None\n        if reference_sequence is not None:\n            full_ref = reference_sequence\n        elif self.has_reference_sequence():\n            # This may be inefficient - see #1989. However, since we're\n            # n copies of the reference sequence anyway, this is a relatively\n            # minor tweak. We may also want to recode the below not to use direct\n            # access to the .data attribute, e.g. if we allow reference sequences\n            # to start at non-zero positions\n            full_ref = self.reference_sequence.data\n\n        if full_ref is None:\n            full_ref = missing_data_character * int(self.sequence_length)\n        else:\n            if len(full_ref) != int(self.sequence_length):\n                raise ValueError(\n                    \"The reference sequence must be equal to the tree sequence length\"\n                )\n\n        try:\n            ref_bytes = full_ref.encode(\"ascii\")\n            missing_data_character.encode(\"ascii\")\n        except UnicodeEncodeError:\n            raise\n\n        sample_ids = self.samples() if samples is None else list(samples)\n\n        flat = self._ll_tree_sequence.decode_alignments(\n            ref_bytes,\n            sample_ids,\n            int(interval.left),\n            int(interval.right),\n            missing_data_character,\n            bool(isolated_as_missing),\n        )\n\n        span = int(interval.span)\n        for j in range(len(sample_ids)):\n            offset = j * span\n            yield flat[offset : offset + span].decode(\"ascii\")\n\n    @property\n    def individuals_population(self):\n        \"\"\"\n        Returns the length-``num_individuals`` array containing, for each\n        individual, the ``population`` attribute of their nodes, or\n        ``tskit.NULL`` for individuals with no nodes. Errors if any individual\n        has nodes with inconsistent non-NULL populations.\n        \"\"\"\n        if self._individuals_population is None:\n            self._individuals_population = (\n                self._ll_tree_sequence.get_individuals_population()\n            )\n        return self._individuals_population\n\n    @property\n    def individual_populations(self):\n        # Undocumented alias for individuals_population to avoid breaking\n        # pre-1.0 pyslim code\n        return self.individuals_population\n\n    @property\n    def individuals_time(self):\n        \"\"\"\n        Returns the length-``num_individuals`` array containing, for each\n        individual, the ``time`` attribute of their nodes or ``np.nan`` for\n        individuals with no nodes. Errors if any individual has nodes with\n        inconsistent times.\n        \"\"\"\n        if self._individuals_time is None:\n            self._individuals_time = self._ll_tree_sequence.get_individuals_time()\n        return self._individuals_time\n\n    @property\n    def individual_times(self):\n        # Undocumented alias for individuals_time to avoid breaking\n        # pre-1.0 pyslim code\n        return self.individuals_time\n\n    @property\n    def individuals_location(self):\n        \"\"\"\n        Convenience method returning the ``num_individuals x n`` array\n        whose row k-th row contains the ``location`` property of the k-th\n        individual. The method only works if all individuals' locations\n        have the same length (which is ``n``), and errors otherwise.\n        \"\"\"\n        if self._individuals_location is None:\n            individuals = self.tables.individuals\n            n = 0\n            lens = np.unique(np.diff(individuals.location_offset))\n            if len(lens) > 1:\n                raise ValueError(\"Individual locations are not all the same length.\")\n            if len(lens) > 0:\n                n = lens[0]\n            self._individuals_location = individuals.location.reshape(\n                (self.num_individuals, n)\n            )\n        return self._individuals_location\n\n    @property\n    def individual_locations(self):\n        # Undocumented alias for individuals_time to avoid breaking\n        # pre-1.0 pyslim code\n        return self.individuals_location\n\n    @property\n    def individuals_flags(self):\n        \"\"\"\n        Efficient access to the bitwise ``flags`` column in the\n        :ref:`sec_individual_table_definition` as a numpy array (dtype=np.uint32).\n        Equivalent to ``ts.tables.individuals.flags`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._individuals_flags\n\n    @property\n    def individuals_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_individual_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.individual.structured_array_from_buffer(\n            self._individuals_metadata\n        )\n\n    @property\n    def individuals_nodes(self):\n        \"\"\"\n        Return an array of node IDs for each individual in the tree sequence.\n\n        :return: Array of shape (num_individuals, max_ploidy) containing node IDs.\n            Values of -1 indicate unused slots for individuals with ploidy\n            less than the maximum.\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        if self._individuals_nodes is None:\n            self._individuals_nodes = self._ll_tree_sequence.get_individuals_nodes()\n        return self._individuals_nodes\n\n    @property\n    def nodes_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_node_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.node.structured_array_from_buffer(\n            self._nodes_metadata\n        )\n\n    @property\n    def nodes_time(self):\n        \"\"\"\n        Efficient access to the ``time`` column in the\n        :ref:`sec_node_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.nodes.time`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._nodes_time\n\n    @property\n    def nodes_flags(self):\n        \"\"\"\n        Efficient access to the bitwise ``flags`` column in the\n        :ref:`sec_node_table_definition` as a numpy array (dtype=np.uint32).\n        Equivalent to ``ts.tables.nodes.flags`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._nodes_flags\n\n    @property\n    def nodes_population(self):\n        \"\"\"\n        Efficient access to the ``population`` column in the\n        :ref:`sec_node_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.nodes.population`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._nodes_population\n\n    @property\n    def nodes_individual(self):\n        \"\"\"\n        Efficient access to the ``individual`` column in the\n        :ref:`sec_node_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.nodes.individual`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._nodes_individual\n\n    @property\n    def edges_left(self):\n        \"\"\"\n        Efficient access to the ``left`` column in the\n        :ref:`sec_edge_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.edges.left`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._edges_left\n\n    @property\n    def edges_right(self):\n        \"\"\"\n        Efficient access to the ``right`` column in the\n        :ref:`sec_edge_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.edges.right`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._edges_right\n\n    @property\n    def edges_parent(self):\n        \"\"\"\n        Efficient access to the ``parent`` column in the\n        :ref:`sec_edge_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.edges.parent`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._edges_parent\n\n    @property\n    def edges_child(self):\n        \"\"\"\n        Efficient access to the ``child`` column in the\n        :ref:`sec_edge_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.edges.child`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._edges_child\n\n    @property\n    def edges_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_edge_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.edge.structured_array_from_buffer(\n            self._edges_metadata\n        )\n\n    @property\n    def sites_position(self):\n        \"\"\"\n        Efficient access to the ``position`` column in the\n        :ref:`sec_site_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.sites.position`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._sites_position\n\n    @property\n    def sites_ancestral_state(self):\n        \"\"\"\n        The ``ancestral_state`` column in the\n        :ref:`sec_site_table_definition` as a numpy array (dtype=StringDtype).\n        \"\"\"\n        if not _tskit.HAS_NUMPY_2:\n            raise RuntimeError(\n                \"The sites_ancestral_state property requires numpy 2.0 or later.\"\n            )\n        if self._sites_ancestral_state is None:\n            self._sites_ancestral_state = (\n                self._ll_tree_sequence.sites_ancestral_state_string\n            )\n        return self._sites_ancestral_state\n\n    @property\n    def sites_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_site_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.site.structured_array_from_buffer(\n            self._sites_metadata\n        )\n\n    @property\n    def mutations_site(self):\n        \"\"\"\n        Efficient access to the ``site`` column in the\n        :ref:`sec_mutation_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.mutations.site`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n\n        .. note::\n            To efficently get an array of the number of mutations per site, you\n            can use ``np.bincount(ts.mutations_site, minlength=ts.num_sites)``.\n        \"\"\"\n        return self._mutations_site\n\n    @property\n    def mutations_node(self):\n        \"\"\"\n        Efficient access to the ``node`` column in the\n        :ref:`sec_mutation_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.mutations.node`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._mutations_node\n\n    @property\n    def mutations_parent(self):\n        \"\"\"\n        Efficient access to the ``parent`` column in the\n        :ref:`sec_mutation_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.mutations.parent`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._mutations_parent\n\n    @property\n    def mutations_time(self):\n        \"\"\"\n        Efficient access to the ``time`` column in the\n        :ref:`sec_mutation_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.mutations.time`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._mutations_time\n\n    @property\n    def mutations_derived_state(self):\n        \"\"\"\n        Access to the ``derived_state`` column in the\n        :ref:`sec_mutation_table_definition` as a numpy array (dtype=StringDtype).\n        \"\"\"\n        if not _tskit.HAS_NUMPY_2:\n            raise RuntimeError(\n                \"The mutations_derived_state property requires numpy 2.0 or later.\"\n            )\n        if self._mutations_derived_state is None:\n            self._mutations_derived_state = (\n                self._ll_tree_sequence.mutations_derived_state_string\n            )\n        return self._mutations_derived_state\n\n    @property\n    def mutations_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_mutation_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.mutation.structured_array_from_buffer(\n            self._mutations_metadata\n        )\n\n    @property\n    def mutations_edge(self):\n        \"\"\"\n        Return an array of the ID of the edge each mutation sits on in the tree sequence.\n\n        :return: Array of shape (num_mutations,) containing edge IDs.\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        if self._mutations_edge is None:\n            self._mutations_edge = self._ll_tree_sequence.get_mutations_edge()\n        return self._mutations_edge\n\n    @property\n    def mutations_inherited_state(self):\n        \"\"\"\n        Return an array of the inherited state for each mutation in the tree sequence.\n\n        The inherited state for a mutation is the state that existed at the site\n        before the mutation occurred. This is either the ancestral state of the site\n        (if the mutation has no parent) or the derived state of the mutation's\n        parent mutation (if it has a parent).\n\n        :return: Array of shape (num_mutations,) containing inherited states.\n        :rtype: numpy.ndarray\n        \"\"\"\n        if not _tskit.HAS_NUMPY_2:\n            raise RuntimeError(\n                \"The mutations_inherited_state property requires numpy 2.0 or later.\"\n            )\n        if self._mutations_inherited_state is None:\n            self._mutations_inherited_state = (\n                self._ll_tree_sequence.mutations_inherited_state_string\n            )\n        return self._mutations_inherited_state\n\n    @property\n    def migrations_left(self):\n        \"\"\"\n        Efficient access to the ``left`` column in the\n        :ref:`sec_migration_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.migrations.left`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._migrations_left\n\n    @property\n    def migrations_right(self):\n        \"\"\"\n        Efficient access to the ``right`` column in the\n        :ref:`sec_migration_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.migrations.right`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._migrations_right\n\n    @property\n    def migrations_node(self):\n        \"\"\"\n        Efficient access to the ``node`` column in the\n        :ref:`sec_migration_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.migrations.node`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._migrations_node\n\n    @property\n    def migrations_source(self):\n        \"\"\"\n        Efficient access to the ``source`` column in the\n        :ref:`sec_migration_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.migrations.source`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._migrations_source\n\n    @property\n    def migrations_dest(self):\n        \"\"\"\n        Efficient access to the ``dest`` column in the\n        :ref:`sec_migration_table_definition` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.migrations.dest`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._migrations_dest\n\n    @property\n    def migrations_time(self):\n        \"\"\"\n        Efficient access to the ``time`` column in the\n        :ref:`sec_migration_table_definition` as a numpy array (dtype=np.float64).\n        Equivalent to ``ts.tables.migrations.time`` (but avoiding the full copy\n        of the table data that accessing ``ts.tables`` currently entails).\n        \"\"\"\n        return self._migrations_time\n\n    @property\n    def migrations_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_migration_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.migration.structured_array_from_buffer(\n            self._migrations_metadata\n        )\n\n    @property\n    def populations_metadata(self):\n        \"\"\"\n        Efficient access to the ``metadata`` column in the\n        :ref:`sec_population_table_definition` as a structured numpy array.\n        The returned dtype will depend on the metadata schema used. Only a subset\n        of `struct` metadata schemas are supported.\n        See :ref:`sec_structured_array_metadata` for more information.\n        \"\"\"\n        return self.table_metadata_schemas.population.structured_array_from_buffer(\n            self._populations_metadata\n        )\n\n    @property\n    def indexes_edge_insertion_order(self):\n        \"\"\"\n        Efficient access to the ``edge_insertion_order`` column in the\n        :ref:`sec_table_indexes` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.indexes.edge_insertion_order`` (but avoiding\n        the full copy of the table data that accessing ``ts.tables``\n        currently entails).\n        \"\"\"\n        return self._indexes_edge_insertion_order\n\n    @property\n    def indexes_edge_removal_order(self):\n        \"\"\"\n        Efficient access to the ``edge_removal_order`` column in the\n        :ref:`sec_table_indexes` as a numpy array (dtype=np.int32).\n        Equivalent to ``ts.tables.indexes.edge_removal_order`` (but avoiding\n        the full copy of the table data that accessing ``ts.tables``\n        currently entails).\n        \"\"\"\n        return self._indexes_edge_removal_order\n\n    def individual(self, id_):\n        \"\"\"\n        Returns the :ref:`individual <sec_individual_table_definition>`\n        in this tree sequence with the specified ID.  As with python lists, negative\n        IDs can be used to index backwards from the last individual.\n\n        :rtype: :class:`Individual`\n        \"\"\"\n        id_ = self.check_index(id_, self.num_individuals)\n        (\n            flags,\n            location,\n            parents,\n            metadata,\n            nodes,\n        ) = self._ll_tree_sequence.get_individual(id_)\n        ind = Individual(\n            id=id_,\n            flags=flags,\n            location=location,\n            parents=parents,\n            metadata=metadata,\n            nodes=nodes,\n            metadata_decoder=self.table_metadata_schemas.individual.decode_row,\n            tree_sequence=self,\n        )\n        return ind\n\n    def node(self, id_):\n        \"\"\"\n        Returns the :ref:`node <sec_node_table_definition>` in this tree sequence\n        with the specified ID. As with python lists, negative IDs can be used to\n        index backwards from the last node.\n\n        :rtype: :class:`Node`\n        \"\"\"\n        id_ = self.check_index(id_, self.num_nodes)\n        (\n            flags,\n            time,\n            population,\n            individual,\n            metadata,\n        ) = self._ll_tree_sequence.get_node(id_)\n        return Node(\n            id=id_,\n            flags=flags,\n            time=time,\n            population=population,\n            individual=individual,\n            metadata=metadata,\n            metadata_decoder=self.table_metadata_schemas.node.decode_row,\n        )\n\n    @staticmethod\n    def check_index(index, length):\n        if not isinstance(index, numbers.Integral):\n            raise TypeError(\n                f\"Index must be of integer type, not '{type(index).__name__}'\"\n            )\n        if index < 0:\n            index += length\n        if index < 0 or index >= length:\n            raise IndexError(\"Index out of bounds\")\n        return index\n\n    def edge(self, id_):\n        \"\"\"\n        Returns the :ref:`edge <sec_edge_table_definition>` in this tree sequence\n        with the specified ID. As with python lists, negative IDs can be used to\n        index backwards from the last edge.\n\n        :rtype: :class:`Edge`\n        \"\"\"\n        id_ = self.check_index(id_, self.num_edges)\n        left, right, parent, child, metadata = self._ll_tree_sequence.get_edge(id_)\n        return Edge(\n            id=id_,\n            left=left,\n            right=right,\n            parent=parent,\n            child=child,\n            metadata=metadata,\n            metadata_decoder=self.table_metadata_schemas.edge.decode_row,\n        )\n\n    def migration(self, id_):\n        \"\"\"\n        Returns the :ref:`migration <sec_migration_table_definition>` in this tree\n        sequence with the specified ID. As with python lists, negative IDs can be\n        used to index backwards from the last migration.\n\n        :rtype: :class:`.Migration`\n        \"\"\"\n        id_ = self.check_index(id_, self.num_migrations)\n        (\n            left,\n            right,\n            node,\n            source,\n            dest,\n            time,\n            metadata,\n        ) = self._ll_tree_sequence.get_migration(id_)\n        return Migration(\n            id=id_,\n            left=left,\n            right=right,\n            node=node,\n            source=source,\n            dest=dest,\n            time=time,\n            metadata=metadata,\n            metadata_decoder=self.table_metadata_schemas.migration.decode_row,\n        )\n\n    def mutation(self, id_):\n        \"\"\"\n        Returns the :ref:`mutation <sec_mutation_table_definition>` in this tree sequence\n        with the specified ID. As with python lists, negative IDs can be used to\n        index backwards from the last mutation.\n\n        :rtype: :class:`Mutation`\n        \"\"\"\n        id_ = self.check_index(id_, self.num_mutations)\n        (\n            site,\n            node,\n            derived_state,\n            parent,\n            metadata,\n            time,\n            edge,\n            inherited_state,\n        ) = self._ll_tree_sequence.get_mutation(id_)\n        return Mutation(\n            id=id_,\n            site=site,\n            node=node,\n            derived_state=derived_state,\n            parent=parent,\n            metadata=metadata,\n            time=time,\n            edge=edge,\n            inherited_state=inherited_state,\n            metadata_decoder=self.table_metadata_schemas.mutation.decode_row,\n        )\n\n    def site(self, id_=None, *, position=None):\n        \"\"\"\n        Returns the :ref:`site <sec_site_table_definition>` in this tree sequence\n        with either the specified ID or position. As with python lists, negative IDs\n        can be used to index backwards from the last site.\n\n        When position is specified instead of site ID, a binary search is done\n        on the list of positions of the sites to try to find a site\n        with the user-specified position.\n\n        :rtype: :class:`Site`\n        \"\"\"\n        if id_ is None and position is None:\n            raise TypeError(\"Site id or position must be provided.\")\n        elif id_ is not None and position is not None:\n            raise TypeError(\"Only one of site id or position needs to be provided.\")\n        elif id_ is None:\n            position = np.array(position)\n            if len(position.shape) > 0:\n                raise ValueError(\"Position must be provided as a scalar value.\")\n            if position < 0 or position >= self.sequence_length:\n                raise ValueError(\n                    \"Position is beyond the coordinates defined by sequence length.\"\n                )\n            site_pos = self.sites_position\n            id_ = site_pos.searchsorted(position)\n            if id_ >= len(site_pos) or site_pos[id_] != position:\n                raise ValueError(f\"There is no site at position {position}.\")\n        else:\n            id_ = self.check_index(id_, self.num_sites)\n        ll_site = self._ll_tree_sequence.get_site(id_)\n        pos, ancestral_state, ll_mutations, _, metadata = ll_site\n        mutations = [self.mutation(mut_id) for mut_id in ll_mutations]\n        return Site(\n            id=id_,\n            position=pos,\n            ancestral_state=ancestral_state,\n            mutations=mutations,\n            metadata=metadata,\n            metadata_decoder=self.table_metadata_schemas.site.decode_row,\n        )\n\n    def population(self, id_):\n        \"\"\"\n        Returns the :ref:`population <sec_population_table_definition>`\n        in this tree sequence with the specified ID.  As with python lists, negative\n        IDs can be used to index backwards from the last population.\n\n        :rtype: :class:`Population`\n        \"\"\"\n        id_ = self.check_index(id_, self.num_populations)\n        (metadata,) = self._ll_tree_sequence.get_population(id_)\n        return Population(\n            id=id_,\n            metadata=metadata,\n            metadata_decoder=self.table_metadata_schemas.population.decode_row,\n        )\n\n    def provenance(self, id_):\n        \"\"\"\n        Returns the :ref:`provenance <sec_provenance_table_definition>`\n        in this tree sequence with the specified ID.  As with python lists,\n        negative IDs can be used to index backwards from the last provenance.\n        \"\"\"\n        id_ = self.check_index(id_, self.num_provenances)\n        timestamp, record = self._ll_tree_sequence.get_provenance(id_)\n        return Provenance(id=id_, timestamp=timestamp, record=record)\n\n    def get_samples(self, population_id=None):\n        # Deprecated alias for samples()\n        return self.samples(population_id)\n\n    def samples(self, population=None, *, population_id=None, time=None):\n        \"\"\"\n        Returns an array of the sample node IDs in this tree sequence. If\n        `population` is specified, only return sample IDs from that population.\n        It is also possible to restrict samples by time using the parameter\n        `time`. If `time` is a numeric value, only return sample IDs whose node\n        time is approximately equal to the specified time. If `time` is a pair\n        of values of the form `(min_time, max_time)`, only return sample IDs\n        whose node time `t` is in this interval such that `min_time <= t < max_time`.\n\n        :param int population: The population of interest. If None, do not\n            filter samples by population.\n        :param int population_id: Deprecated alias for ``population``.\n        :param float,tuple time: The time or time interval of interest. If\n            None, do not filter samples by time.\n        :return: A numpy array of the node IDs for the samples of interest,\n            listed in numerical order.\n        :rtype: numpy.ndarray (dtype=np.int32)\n        \"\"\"\n        if population is not None and population_id is not None:\n            raise ValueError(\n                \"population_id and population are aliases. Cannot specify both\"\n            )\n        if population_id is not None:\n            population = population_id\n        samples = self._ll_tree_sequence.get_samples()\n        keep = np.full(shape=samples.shape, fill_value=True)\n        if population is not None:\n            if not isinstance(population, numbers.Integral):\n                raise ValueError(\"`population` must be an integer ID\")\n            population = int(population)\n            sample_population = self.nodes_population[samples]\n            keep = np.logical_and(keep, sample_population == population)\n        if time is not None:\n            # ndmin is set so that scalars are converted into 1d arrays\n            time = np.array(time, ndmin=1, dtype=float)\n            sample_times = self.nodes_time[samples]\n            if time.shape == (1,):\n                keep = np.logical_and(keep, np.isclose(sample_times, time))\n            elif time.shape == (2,):\n                if time[1] <= time[0]:\n                    raise ValueError(\"time_interval max is less than or equal to min.\")\n                keep = np.logical_and(keep, sample_times >= time[0])\n                keep = np.logical_and(keep, sample_times < time[1])\n            else:\n                raise ValueError(\n                    \"time must be either a single value or a pair of values \"\n                    \"(min_time, max_time).\"\n                )\n        return samples[keep]\n\n    def as_vcf(self, *args, **kwargs):\n        \"\"\"\n        Return the result of :meth:`.write_vcf` as a string.\n        Keyword parameters are as defined in :meth:`.write_vcf`.\n\n        :return: A VCF encoding of the variants in this tree sequence as a string.\n        :rtype: str\n        \"\"\"\n        buff = io.StringIO()\n        self.write_vcf(buff, *args, **kwargs)\n        return buff.getvalue()\n\n    def write_vcf(\n        self,\n        output,\n        ploidy=None,\n        *,\n        contig_id=\"1\",\n        individuals=None,\n        individual_names=None,\n        position_transform=None,\n        site_mask=None,\n        sample_mask=None,\n        isolated_as_missing=None,\n        allow_position_zero=None,\n        include_non_sample_nodes=None,\n    ):\n        \"\"\"\n        Convert the genetic variation data in this tree sequence to Variant\n        Call Format and write to the specified file-like object.\n\n        .. seealso: See the :ref:`sec_export_vcf` section for examples\n            and explanations of how we map VCF to the tskit data model.\n\n        Multiploid samples in the output VCF are generated either using\n        individual information in the data model (see\n        :ref:`sec_individual_table_definition`), or by combining genotypes for\n        adjacent sample nodes using the ``ploidy`` argument. See the\n        :ref:`sec_export_vcf_constructing_gt` section for more details\n        and examples.\n\n        If individuals are defined in the\n        data model (see :ref:`sec_individual_table_definition`), the genotypes\n        for each of the individual's nodes are combined into a phased\n        multiploid values at each site. By default, all individuals are\n        included with their sample nodes, individuals with no nodes are\n        omitted. The ``include_non_sample_nodes`` argument can be used to\n        included non-sample nodes in the output VCF.\n\n        Subsets or permutations of the sample individuals may be specified\n        using the ``individuals`` argument.\n\n        Mixed-sample individuals (e.g., those associated with one node\n        that is a sample and another that is not) in the data model will\n        only have the sample nodes output by default. However, non-sample\n        nodes can be included using the ``include_non_sample_nodes`` argument.\n\n        If there are no individuals in the tree sequence,\n        synthetic individuals are created by combining adjacent samples, and\n        the number of samples combined is equal to the ``ploidy`` value (1 by\n        default). For example, if we have a ``ploidy`` of 2 and 6 sample nodes,\n        then we will have 3 diploid samples in the VCF, consisting of the\n        combined genotypes for samples [0, 1], [2, 3] and [4, 5]. If we had\n        genotypes 011110 at a particular variant, then we would output the\n        diploid genotypes 0|1, 1|1 and 1|0 in VCF.\n\n        Each individual in the output is identified by a string; these are the\n        VCF \"sample\" names. By default, these are of the form ``tsk_0``,\n        ``tsk_1`` etc, up to the number of individuals, but can be manually\n        specified using the ``individual_names`` argument. We do not check\n        for duplicates in this array, or perform any checks to ensure that\n        the output VCF is well-formed.\n\n        .. note::\n            The default individual names (VCF sample IDs) are always of\n            the form ``tsk_0``, ``tsk_1``, ..., ``tsk_{N - 1}``, where\n            N is the number of individuals we output. These numbers\n            are **not** necessarily the individual IDs.\n\n        The REF value in the output VCF is the ancestral allele for a site\n        and ALT values are the remaining alleles. It is important to note,\n        therefore, that for real data this means that the REF value for a given\n        site **may not** be equal to the reference allele. We also do not\n        check that the alleles result in a valid VCF---for example, it is possible\n        to use the tab character as an allele, leading to a broken VCF.\n\n        The ID value in the output VCF file is the integer ID of the\n        corresponding :ref:`site <sec_site_table_definition>` (``site.id``).\n        These ID values can be utilized to match the contents of the VCF file\n        to the sites in the tree sequence object.\n\n        .. note::\n            Older code often uses the ``ploidy=2`` argument, because old\n            versions of msprime did not output individual data. Specifying\n            individuals in the tree sequence is more robust, and since tree\n            sequences now  typically contain individuals (e.g., as produced by\n            ``msprime.sim_ancestry( )``), this is not necessary, and the\n            ``ploidy`` argument can safely be removed as part of the process\n            of updating from the msprime 0.x legacy API.\n\n        :param io.IOBase output: The file-like object to write the VCF output.\n        :param int ploidy: The ploidy of the individuals to be written to\n            VCF. This sample size must be evenly divisible by ploidy. Cannot be\n            used if there is individual data in the tree sequence.\n        :param str contig_id: The value of the CHROM column in the output VCF.\n        :param list(int) individuals: A list containing the individual IDs to\n            corresponding to the VCF samples. Defaults to all individuals\n            associated with sample nodes in the tree sequence.\n            See the {ref}`sec_export_vcf_constructing_gt` section for more\n            details and examples.\n        :param list(str) individual_names: A list of string names to identify\n            individual columns in the VCF. In VCF nomenclature, these are the\n            sample IDs. If specified, this must be a list of strings of\n            length equal to the number of individuals to be output. Note that\n            we do not check the form of these strings in any way, so that is\n            is possible to output malformed VCF (for example, by embedding a\n            tab character within on of the names). The default is to output\n            ``tsk_j`` for the jth individual.\n            See the :ref:`sec_export_vcf_individual_names` for examples\n            and more information.\n        :param position_transform: A callable that transforms the\n            site position values into integer valued coordinates suitable for\n            VCF. The function takes a single positional parameter x and must\n            return an integer numpy array the same dimension as x. By default,\n            this is set to ``numpy.round()`` which will round values to the\n            nearest integer. If the string \"legacy\" is provided here, the\n            pre 0.2.0 legacy behaviour of rounding values to the nearest integer\n            (starting from 1) and avoiding the output of identical positions\n            by incrementing is used.\n            See the :ref:`sec_export_vcf_modifying_coordinates` for examples\n            and more information.\n        :param site_mask: A numpy boolean array (or something convertable to\n            a numpy boolean array) with num_sites elements, used to mask out\n            sites in the output. If  ``site_mask[j]`` is True, then this\n            site (i.e., the line in the VCF file) will be omitted.\n            See the :ref:`sec_export_vcf_masking_output` for examples\n            and more information.\n        :param sample_mask: A numpy boolean array (or something convertable to\n            a numpy boolean array) with num_samples elements, or a callable\n            that returns such an array, such that if\n            ``sample_mask[j]`` is True, then the genotype for sample ``j``\n            will be marked as missing using a \".\". If ``sample_mask`` is a\n            callable, it must take a single argument and return a boolean\n            numpy array. This function will be called for each (unmasked) site\n            with the corresponding :class:`.Variant` object, allowing\n            for dynamic masks to be generated.\n            See the :ref:`sec_export_vcf_masking_output` for examples\n            and more information.\n        :param bool isolated_as_missing: If True, the genotype value assigned to\n            missing samples (i.e., isolated samples without mutations) is \".\"\n            If False, missing samples will be assigned the ancestral allele.\n            See :meth:`.variants` for more information. Default: True.\n        :param bool allow_position_zero: If True allow sites with position zero to be\n            output to the VCF, otherwise if one is present an error will be raised.\n            The VCF spec does not allow for sites at position 0. However, in practise\n            many tools will be fine with this. Default: False.\n        :param bool include_non_sample_nodes: If True, include non-sample nodes\n            in the output VCF. By default, only sample nodes are included.\n        \"\"\"\n        if allow_position_zero is None:\n            allow_position_zero = False\n        writer = vcf.VcfWriter(\n            self,\n            ploidy=ploidy,\n            contig_id=contig_id,\n            individuals=individuals,\n            individual_names=individual_names,\n            position_transform=position_transform,\n            site_mask=site_mask,\n            sample_mask=sample_mask,\n            isolated_as_missing=isolated_as_missing,\n            allow_position_zero=allow_position_zero,\n            include_non_sample_nodes=include_non_sample_nodes,\n        )\n        writer.write(output)\n\n    def write_fasta(\n        self,\n        file_or_path,\n        *,\n        wrap_width=60,\n        reference_sequence=None,\n        missing_data_character=None,\n        isolated_as_missing=None,\n    ):\n        \"\"\"\n        Writes the :meth:`.alignments` for this tree sequence to file in\n        `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`__ format.\n        Please see the :meth:`.alignments` method for details on how\n        reference sequences are handled.\n\n        Alignments are returned for the\n        :ref:`sample nodes<sec_data_model_definitions>` in this tree\n        sequence, and a sample with node id ``u`` is given the label\n        ``f\"n{u}\"``, following the same convention as the\n        :meth:`.write_nexus` and :meth:`Tree.as_newick` methods.\n\n        The ``wrap_width`` parameter controls the maximum width of lines\n        of sequence data in the output. By default this is 60\n        characters in accordance with fasta standard outputs. To turn off\n        line-wrapping of sequences, set ``wrap_width`` = 0.\n\n        Example usage:\n\n        .. code-block:: python\n\n            ts.write_fasta(\"output.fa\")\n\n        :param file_or_path: The file object or path to write the output.\n            Paths can be either strings or :class:`python:pathlib.Path` objects.\n        :param int wrap_width: The number of sequence\n            characters to include on each line in the fasta file, before wrapping\n            to the next line for each sequence, or 0 to turn off line wrapping.\n            (Default=60).\n        :param str reference_sequence: As for the :meth:`.alignments` method.\n        :param str missing_data_character: As for the :meth:`.alignments` method.\n        :param bool isolated_as_missing: As for the :meth:`.alignments` method.\n        \"\"\"\n        text_formats.write_fasta(\n            self,\n            file_or_path,\n            wrap_width=wrap_width,\n            reference_sequence=reference_sequence,\n            missing_data_character=missing_data_character,\n            isolated_as_missing=isolated_as_missing,\n        )\n\n    def as_fasta(self, **kwargs):\n        \"\"\"\n        Return the result of :meth:`.write_fasta` as a string.\n        Keyword parameters are as defined in :meth:`.write_fasta`.\n\n        :return: A FASTA encoding of the alignments in this tree sequence as a string.\n        :rtype: str\n        \"\"\"\n        buff = io.StringIO()\n        self.write_fasta(buff, **kwargs)\n        return buff.getvalue()\n\n    def write_nexus(\n        self,\n        file_or_path,\n        *,\n        precision=None,\n        include_trees=None,\n        include_alignments=None,\n        reference_sequence=None,\n        missing_data_character=None,\n        isolated_as_missing=None,\n        node_labels=None,\n    ):\n        \"\"\"\n        Returns a `nexus encoding <https://en.wikipedia.org/wiki/Nexus_file>`_\n        of this tree sequence. By default, tree topologies are included\n        in the output, and sequence data alignments are included by default\n        if this tree sequence has discrete genome coordinates and one or\n        more sites. Inclusion of these sections can be controlled manually\n        using the ``include_trees`` and ``include_alignments`` parameters.\n\n        Tree topologies and branch lengths are listed\n        sequentially in the TREES block and the spatial location of each tree\n        encoded within the tree name labels. Specifically, a tree spanning\n        the interval :math:`[x, y)`` is given the name ``f\"t{x}^{y}\"``\n        (See below for a description of the precision at which these spatial\n        coordinates are printed out).\n\n        The :ref:`sample nodes<sec_data_model_definitions>` in this tree\n        sequence are regarded as taxa, and a sample with node id ``u``\n        is given the label ``f\"n{u}\"``, following the same convention\n        as the :meth:`Tree.as_newick` method.\n\n        By default, genome positions are printed out with with sufficient\n        precision for them to be recovered exactly in double precision.\n        If the tree sequence is defined on a :attr:`.discrete_genome`,\n        then positions are written out as integers. Otherwise, 17 digits\n        of precision is used. Branch length precision defaults are handled\n        in the same way as :meth:`.Tree.as_newick`.\n\n        If the ``precision`` argument is provided, genome positions and\n        branch lengths are printed out with this many digits of precision.\n\n        For example, here is the nexus encoding of a simple tree sequence\n        with integer times and genome coordinates with three samples\n        and two trees::\n\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN TREES;\n              TREE t0^2 = [&R] (n0:3,(n1:2,n2:2):1);\n              TREE t2^10 = [&R] (n1:2,(n0:1,n2:1):1);\n            END;\n\n        If sequence data :meth:`.alignments` are defined for this tree sequence\n        and there is at least one site present, sequence alignment data will also\n        be included by default (this can be suppressed by setting\n        ``include_alignments=False``). For example, this tree sequence has\n        a sequence length of 10, two variable sites and no\n        :ref:`reference sequence<sec_data_model_reference_sequence>`::\n\n            #NEXUS\n            BEGIN TAXA;\n              DIMENSIONS NTAX=3;\n              TAXLABELS n0 n1 n2;\n            END;\n            BEGIN DATA;\n              DIMENSIONS NCHAR=10;\n              FORMAT DATATYPE=DNA MISSING=?;\n              MATRIX\n                n0 ??G??????T\n                n1 ??A??????C\n                n2 ??A??????C\n              ;\n            END;\n            BEGIN TREES;\n              TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);\n            END;\n\n        Please see the :meth:`.alignments` method for details on how\n        reference sequences are handled.\n\n        .. note:: Note the default ``missing_data_character`` for this method\n            is \"?\" rather then \"N\", in keeping with common conventions for\n            nexus data. This can be changed using the ``missing_data_character``\n            parameter.\n\n        .. warning:: :ref:`Missing data<sec_data_model_missing_data>`\n            is not supported for encoding tree topology information\n            as our convention of using trees with multiple roots\n            is not often supported by newick parsers. Thus, the method\n            will raise a ValueError if we try to output trees with\n            multiple roots.\n\n        .. seealso: See also the :meth:`.as_nexus` method which will\n            return this nexus representation as a string.\n\n        :param int precision: The numerical precision with which branch lengths\n            and tree positions are printed.\n        :param bool include_trees: True if the tree topology information should\n            be included; False otherwise (default=True).\n        :param bool include_alignments: True if the sequence data alignment information\n            should be included; False otherwise (default=True if sequence alignments\n            are well-defined and the tree sequence contains at least one site).\n        :param str reference_sequence: As for the :meth:`.alignments` method.\n        :param str missing_data_character: As for the :meth:`.alignments` method,\n            but defaults to \"?\".\n        :param bool isolated_as_missing: As for the :meth:`.alignments` method.\n        :param node_labels: A map of type `{node_id: name}`.  Samples present\n            in the map will have the given name instead of `n{node_id}`.  Note\n            that the names must not have whitespace (spaces should be replaced\n            by underscores) or puncuation in them.\n        :return: A nexus representation of this :class:`TreeSequence`\n        :rtype: str\n        \"\"\"\n        text_formats.write_nexus(\n            self,\n            file_or_path,\n            precision=precision,\n            include_trees=include_trees,\n            include_alignments=include_alignments,\n            reference_sequence=reference_sequence,\n            missing_data_character=missing_data_character,\n            isolated_as_missing=isolated_as_missing,\n            node_labels=node_labels,\n        )\n\n    def as_nexus(self, **kwargs):\n        \"\"\"\n        Return the result of :meth:`.write_nexus` as a string.\n        Keyword parameters are as defined in :meth:`.write_nexus`.\n\n        :return: A nexus encoding of the alignments in this tree sequence as a string.\n        :rtype: str\n        \"\"\"\n        buff = io.StringIO()\n        self.write_nexus(buff, **kwargs)\n        return buff.getvalue()\n\n    # TODO\n    # (1) Move the definition to text_formats.py\n    # (2) Rename to as_macs and keep to_macs as a deprecated synonym\n    def to_macs(self):\n        \"\"\"\n        Return a `macs encoding <https://github.com/gchen98/macs>`_\n        of this tree sequence.\n\n        :return: The macs representation of this TreeSequence as a string.\n        :rtype: str\n        \"\"\"\n        n = self.get_sample_size()\n        m = self.get_sequence_length()\n        output = [f\"COMMAND:\\tnot_macs {n} {m}\"]\n        output.append(\"SEED:\\tASEED\")\n        for variant in self.variants(copy=False):\n            if any(len(allele) > 1 for allele in variant.alleles):\n                raise ValueError(\"macs output only supports single letter alleles\")\n            bytes_genotypes = np.empty(self.num_samples, dtype=np.uint8)\n            lookup = np.array([ord(a[0]) for a in variant.alleles], dtype=np.uint8)\n            bytes_genotypes[:] = lookup[variant.genotypes]\n            genotypes = bytes_genotypes.tobytes().decode()\n            output.append(\n                f\"SITE:\\t{variant.index}\\t{variant.position / m}\\t0.0\\t{genotypes}\"\n            )\n        return \"\\n\".join(output) + \"\\n\"\n\n    def simplify(\n        self,\n        samples=None,\n        *,\n        map_nodes=False,\n        reduce_to_site_topology=False,\n        filter_populations=None,\n        filter_individuals=None,\n        filter_sites=None,\n        filter_nodes=None,\n        update_sample_flags=None,\n        keep_unary=False,\n        keep_unary_in_individuals=None,\n        keep_input_roots=False,\n        record_provenance=True,\n        filter_zero_mutation_sites=None,  # Deprecated alias for filter_sites\n    ):\n        \"\"\"\n        Returns a simplified tree sequence that retains only the history of\n        the nodes given in the list ``samples``. If ``map_nodes`` is true,\n        also return a numpy array whose ``u``-th element is the ID of the node\n        in the simplified tree sequence that corresponds to node ``u`` in the\n        original tree sequence, or :data:`tskit.NULL` (-1) if ``u`` is no longer\n        present in the simplified tree sequence.\n\n        .. note::\n            If you wish to simplify a set of tables that do not satisfy all\n            requirements for building a TreeSequence, then use\n            :meth:`TableCollection.simplify`.\n\n        If the ``reduce_to_site_topology`` parameter is True, the returned tree\n        sequence will contain only topological information that is necessary to\n        represent the trees that contain sites. If there are zero sites in this\n        tree sequence, this will result in an output tree sequence with zero edges.\n        When the number of sites is greater than zero, every tree in the output\n        tree sequence will contain at least one site. For a given site, the\n        topology of the tree containing that site will be identical\n        (up to node ID remapping) to the topology of the corresponding tree\n        in the input tree sequence.\n\n        If ``filter_populations``, ``filter_individuals``, ``filter_sites``, or\n        ``filter_nodes`` is True, any of the corresponding objects that are not\n        referenced elsewhere are filtered out. As this is the default behaviour,\n        it is important to realise IDs for these objects may change through\n        simplification. By setting these parameters to False, however, the\n        corresponding tables can be preserved without changes.\n\n        If ``filter_nodes`` is False, then the output node table will be\n        unchanged except for updating the sample status of nodes and any ID\n        remappings caused by filtering individuals and populations (if the\n        ``filter_individuals`` and ``filter_populations`` options are enabled).\n        Nodes that are in the specified list of ``samples`` will be marked as\n        samples in the output, and nodes that are currently marked as samples\n        in the node table but not in the specified list of ``samples`` will\n        have their :data:`tskit.NODE_IS_SAMPLE` flag cleared. Note also that\n        the order of the ``samples`` list is not meaningful when\n        ``filter_nodes`` is False. In this case, the returned node mapping is\n        always the identity mapping, such that ``a[u] == u`` for all nodes.\n\n        Setting the ``update_sample_flags`` parameter to False disables the\n        automatic sample status update of nodes (described above) from\n        occuring, making it the responsibility of calling code to keep track of\n        the ultimate sample status of nodes. This is an advanced option, mostly\n        of use when combined with the ``filter_nodes=False``,\n        ``filter_populations=False`` and ``filter_individuals=False`` options,\n        which then guarantees that the node table will not be altered by\n        simplification.\n\n        :param list[int] samples: A list of node IDs to retain as samples. They\n            need not be nodes marked as samples in the original tree sequence, but\n            will constitute the entire set of samples in the returned tree sequence.\n            If not specified or None, use all nodes marked with the IS_SAMPLE flag.\n            The list may be provided as a numpy array (or array-like) object\n            (dtype=np.int32).\n        :param bool map_nodes: If True, return a tuple containing the resulting\n            tree sequence and a numpy array mapping node IDs in the current tree\n            sequence to their corresponding node IDs in the returned tree sequence.\n            If False (the default), return only the tree sequence object itself.\n        :param bool reduce_to_site_topology: Whether to reduce the topology down\n            to the trees that are present at sites. (Default: False)\n        :param bool filter_populations: If True, remove any populations that are\n            not referenced by nodes after simplification; new population IDs are\n            allocated sequentially from zero. If False, the population table will\n            not be altered in any way. (Default: None, treated as True)\n        :param bool filter_individuals: If True, remove any individuals that are\n            not referenced by nodes after simplification; new individual IDs are\n            allocated sequentially from zero. If False, the individual table will\n            not be altered in any way. (Default: None, treated as True)\n        :param bool filter_sites: If True, remove any sites that are\n            not referenced by mutations after simplification; new site IDs are\n            allocated sequentially from zero. If False, the site table will not\n            be altered in any way. (Default: None, treated as True)\n        :param bool filter_nodes: If True, remove any nodes that are\n            not referenced by edges after simplification. If False, the only\n            potential change to the node table may be to change the node flags\n            (if ``samples`` is specified and different from the existing samples).\n            (Default: None, treated as True)\n        :param bool update_sample_flags: If True, update node flags to so that\n            nodes in the specified list of samples have the NODE_IS_SAMPLE\n            flag after simplification, and nodes that are not in this list\n            do not. (Default: None, treated as True)\n        :param bool keep_unary: If True, preserve unary nodes (i.e., nodes with\n            exactly one child) that exist on the path from samples to root.\n            (Default: False)\n        :param bool keep_unary_in_individuals: If True, preserve unary nodes\n            that exist on the path from samples to root, but only if they are\n            associated with an individual in the individuals table. Cannot be\n            specified at the same time as ``keep_unary``. (Default: ``None``,\n            equivalent to False)\n        :param bool keep_input_roots: Whether to retain history ancestral to the\n            MRCA of the samples. If ``False``, no topology older than the MRCAs of the\n            samples will be included. If ``True`` the roots of all trees in the returned\n            tree sequence will be the same roots as in the original tree sequence.\n            (Default: False)\n        :param bool record_provenance: If True, record details of this call to\n            simplify in the returned tree sequence's provenance information\n            (Default: True).\n        :param bool filter_zero_mutation_sites: Deprecated alias for ``filter_sites``.\n        :return: The simplified tree sequence, or (if ``map_nodes`` is True)\n            a tuple consisting of the simplified tree sequence and a numpy array\n            mapping source node IDs to their corresponding IDs in the new tree\n            sequence.\n        :rtype: tskit.TreeSequence or (tskit.TreeSequence, numpy.ndarray)\n        \"\"\"\n        tables = self.dump_tables()\n        assert tables.sequence_length == self.sequence_length\n        node_map = tables.simplify(\n            samples=samples,\n            reduce_to_site_topology=reduce_to_site_topology,\n            filter_populations=filter_populations,\n            filter_individuals=filter_individuals,\n            filter_sites=filter_sites,\n            filter_nodes=filter_nodes,\n            update_sample_flags=update_sample_flags,\n            keep_unary=keep_unary,\n            keep_unary_in_individuals=keep_unary_in_individuals,\n            keep_input_roots=keep_input_roots,\n            record_provenance=record_provenance,\n            filter_zero_mutation_sites=filter_zero_mutation_sites,\n        )\n        new_ts = tables.tree_sequence()\n        assert new_ts.sequence_length == self.sequence_length\n        if map_nodes:\n            return new_ts, node_map\n        else:\n            return new_ts\n\n    def delete_sites(self, site_ids, record_provenance=True):\n        \"\"\"\n        Returns a copy of this tree sequence with the specified sites (and their\n        associated mutations) entirely removed. The site IDs do not need to be in any\n        particular order, and specifying the same ID multiple times does not have any\n        effect (i.e., calling ``tree_sequence.delete_sites([0, 1, 1])`` has the same\n        effect as calling ``tree_sequence.delete_sites([0, 1])``.\n\n        .. note::\n            To remove only the mutations associated with a site, but keep the site\n            itself, use the :meth:`MutationTable.keep_rows` method.\n\n        :param list[int] site_ids: A list of site IDs specifying the sites to remove.\n        :param bool record_provenance: If ``True``, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: ``True``).\n        \"\"\"\n        tables = self.dump_tables()\n        tables.delete_sites(site_ids, record_provenance)\n        return tables.tree_sequence()\n\n    def delete_intervals(self, intervals, simplify=True, record_provenance=True):\n        \"\"\"\n        Returns a copy of this tree sequence for which information in the\n        specified list of genomic intervals has been deleted. Edges spanning these\n        intervals are truncated or deleted, and sites and mutations falling within\n        them are discarded. Note that it is the information in the intervals that\n        is deleted, not the intervals themselves, so in particular, all samples\n        will be isolated in the deleted intervals.\n\n        Note that node IDs may change as a result of this operation,\n        as by default :meth:`.simplify` is called on the returned tree sequence\n        to remove redundant nodes. If you wish to map node IDs onto the same\n        nodes before and after this method has been called, specify ``simplify=False``.\n\n        See also :meth:`.keep_intervals`, :meth:`.ltrim`, :meth:`.rtrim`, and\n        :ref:`missing data<sec_data_model_missing_data>`.\n\n        :param array_like intervals: A list (start, end) pairs describing the\n            genomic intervals to delete. Intervals must be non-overlapping and\n            in increasing order. The list of intervals must be interpretable as a\n            2D numpy array with shape (N, 2), where N is the number of intervals.\n        :param bool simplify: If True, return a simplified tree sequence where nodes\n            no longer used are discarded. (Default: True).\n        :param bool record_provenance: If ``True``, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: ``True``).\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        tables = self.dump_tables()\n        tables.delete_intervals(intervals, simplify, record_provenance)\n        return tables.tree_sequence()\n\n    def keep_intervals(self, intervals, simplify=True, record_provenance=True):\n        \"\"\"\n        Returns a copy of this tree sequence which includes only information in\n        the specified list of genomic intervals. Edges are truncated to lie within\n        these intervals, and sites and mutations falling outside these intervals\n        are discarded.  Note that it is the information outside the intervals that\n        is deleted, not the intervals themselves, so in particular, all samples\n        will be isolated outside of the retained intervals.\n\n        Note that node IDs may change as a result of this operation,\n        as by default :meth:`.simplify` is called on the returned tree sequence\n        to remove redundant nodes. If you wish to map node IDs onto the same\n        nodes before and after this method has been called, specify ``simplify=False``.\n\n        See also :meth:`.keep_intervals`, :meth:`.ltrim`, :meth:`.rtrim`, and\n        :ref:`missing data<sec_data_model_missing_data>`.\n\n        :param array_like intervals: A list (start, end) pairs describing the\n            genomic intervals to keep. Intervals must be non-overlapping and\n            in increasing order. The list of intervals must be interpretable as a\n            2D numpy array with shape (N, 2), where N is the number of intervals.\n        :param bool simplify: If True, return a simplified tree sequence where nodes\n            no longer used are discarded. (Default: True).\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence.\n            (Default: True).\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        tables = self.dump_tables()\n        tables.keep_intervals(intervals, simplify, record_provenance)\n        return tables.tree_sequence()\n\n    def ltrim(self, record_provenance=True):\n        \"\"\"\n        Returns a copy of this tree sequence with a potentially changed coordinate\n        system, such that empty regions (i.e., those not covered by any edge) at the\n        start of the tree sequence are trimmed away, and the leftmost edge starts at\n        position 0. This affects the reported position of sites and\n        edges. Additionally, sites and their associated mutations to the left of\n        the new zero point are thrown away.\n\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        \"\"\"\n        tables = self.dump_tables()\n        tables.ltrim(record_provenance)\n        return tables.tree_sequence()\n\n    def rtrim(self, record_provenance=True):\n        \"\"\"\n        Returns a copy of this tree sequence with the ``sequence_length`` property reset\n        so that the sequence ends at the end of the rightmost edge. Additionally, sites\n        and their associated mutations at positions greater than the new\n        ``sequence_length`` are thrown away.\n\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        \"\"\"\n        tables = self.dump_tables()\n        tables.rtrim(record_provenance)\n        return tables.tree_sequence()\n\n    def trim(self, record_provenance=True):\n        \"\"\"\n        Returns a copy of this tree sequence with any empty regions (i.e., those not\n        covered by any edge) on the right and left trimmed away. This may reset both the\n        coordinate system and the ``sequence_length`` property. It is functionally\n        equivalent to :meth:`.rtrim` followed by :meth:`.ltrim`. Sites and their\n        associated mutations in the empty regions are thrown away.\n\n        :param bool record_provenance: If True, add details of this operation to the\n            provenance information of the returned tree sequence. (Default: True).\n        \"\"\"\n        tables = self.dump_tables()\n        tables.trim(record_provenance)\n        return tables.tree_sequence()\n\n    def shift(self, value, sequence_length=None, record_provenance=True):\n        \"\"\"\n        Shift the coordinate system (used by edges and sites) of this TableCollection by\n        a given value. Positive values shift the coordinate system to the right, negative\n        values to the left. The sequence length of the tree sequence will be changed by\n        ``value``, unless ``sequence_length`` is given, in which case this will be used\n        for the new sequence length.\n\n        .. note::\n            By setting ``value=0``, this method will simply return a tree sequence\n            with a new sequence length.\n\n        :param value: The amount by which to shift the coordinate system.\n        :param sequence_length: The new sequence length of the tree sequence. If\n            ``None`` (default) add ``value`` to the sequence length.\n        :raises ValueError: If the new coordinate system is invalid (e.g., if\n            shifting the coordinate system results in negative coordinates).\n        \"\"\"\n        tables = self.dump_tables()\n        tables.shift(\n            value=value,\n            sequence_length=sequence_length,\n            record_provenance=record_provenance,\n        )\n        return tables.tree_sequence()\n\n    def concatenate(\n        self, *args, node_mappings=None, record_provenance=True, add_populations=None\n    ):\n        r\"\"\"\n        Concatenate a set of tree sequences to the right of this one, by shifting\n        their coordinate systems and adding all edges, sites, mutations, and\n        any additional nodes, individuals, or populations needed for these.\n        Concretely, to concatenate an ``other`` tree sequence to ``self``, the value\n        of ``self.sequence_length`` is added to all genomic coordinates in ``other``,\n        and then the concatenated tree sequence  will contain all edges, sites, and\n        mutations in both. Which nodes in ``other`` are treated as \"new\", and hence\n        added as well, is controlled by ``node_mappings``. Any individuals to which\n        new nodes belong are added as well.\n\n        The method uses :meth:`.shift` followed by :meth:`.union`, with\n        ``all_mutations=True``, ``all_edges=True``, and ``check_shared_equality=False``.\n\n        By default, the samples in current and input tree sequences are assumed to\n        refer to the same nodes, and are matched based on the numerical order of\n        sample node IDs; all other nodes are assumed to be new. This can be\n        changed by providing explicit ``node_mappings`` for each input tree sequence\n        (see below).\n\n        .. note::\n            To add gaps between the concatenated tree sequences, use :meth:`shift`\n            or to remove gaps, use :meth:`trim` before concatenating.\n\n        :param TreeSequence \\*args: A list of other tree sequences to append to\n            the right of this one.\n        :param Union[list, None] node_mappings: A list of node mappings for each\n            input tree sequence in ``args``. Each should either be an array of\n            integers of the same length as the number of nodes in the equivalent\n            input tree sequence (see :meth:`~TreeSequence.union` for details), or\n            ``None``. If ``None``, only sample nodes are mapped to each other.\n            Default: ``None``, treated as ``[None] * len(args)``.\n        :param bool record_provenance: If True (default), record details of this\n            call to ``concatenate`` in the returned tree sequence's provenance\n            information (Default: True).\n        :param bool add_populations: If True (default), nodes new to ``self`` will\n            be assigned new population IDs (see :meth:`~TreeSequence.union`)\n        \"\"\"\n        if node_mappings is None:\n            node_mappings = [None] * len(args)\n        if add_populations is None:\n            add_populations = True\n        if len(node_mappings) != len(args):\n            raise ValueError(\"You must provide the same number of node_mappings as args\")\n\n        samples = self.samples()\n        tables = self.dump_tables()\n        tables.drop_index()\n\n        for node_mapping, other in zip(node_mappings, args):\n            if node_mapping is None:\n                other_samples = other.samples()\n                if len(other_samples) != len(samples):\n                    raise ValueError(\n                        \"each `other` must have the same number of samples as `self`\"\n                    )\n                node_mapping = np.full(other.num_nodes, tskit.NULL, dtype=np.int32)\n                node_mapping[other_samples] = samples\n            other_tables = other.dump_tables()\n            other_tables.shift(tables.sequence_length, record_provenance=False)\n            tables.sequence_length = other_tables.sequence_length\n            # NB: should we use a different default for add_populations?\n            tables.union(\n                other_tables,\n                node_mapping=node_mapping,\n                check_shared_equality=False,  # Else checks fail with internal samples\n                all_mutations=True,\n                all_edges=True,\n                record_provenance=False,\n                add_populations=add_populations,\n            )\n        if record_provenance:\n            parameters = {\n                \"command\": \"concatenate\",\n                \"TODO\": \"add concatenate parameters\",  # tricky as both have provenances\n            }\n            tables.provenances.add_row(\n                record=json.dumps(provenance.get_provenance_dict(parameters))\n            )\n\n        return tables.tree_sequence()\n\n    def split_edges(self, time, *, flags=None, population=None, metadata=None):\n        \"\"\"\n        Returns a copy of this tree sequence in which we replace any\n        edge ``(left, right, parent, child)`` in which\n        ``node_time[child] < time < node_time[parent]`` with two edges\n        ``(left, right, parent, u)`` and ``(left, right, u, child)``,\n        where ``u`` is a newly added node for each intersecting edge.\n\n        If ``metadata``, ``flags``, or ``population`` are specified, newly\n        added nodes will be assigned these values. Otherwise, default values\n        will be used. The default metadata is an empty dictionary if a metadata\n        schema is defined for the node table, and is an empty byte string\n        otherwise. The default population for the new node is\n        :data:`tskit.NULL`. Newly added have a default ``flags`` value of 0.\n\n        Any metadata associated with a split edge will be copied to the new edge.\n\n        .. warning:: This method currently does not support migrations\n            and a error will be raised if the migration table is not\n            empty. Future versions may take migrations that intersect with the\n            edge into account when determining the default population\n            assignments for new nodes.\n\n        Any mutations lying on the edge whose time is >= ``time`` will have\n        their node value set to ``u``. Note that the time of the mutation is\n        defined as the time of the child node if the mutation's time is\n        unknown.\n\n        :param float time: The cutoff time.\n        :param int flags: The flags value for newly-inserted nodes. (Default = 0)\n        :param int population: The population value for newly inserted nodes.\n            Defaults to ``tskit.NULL`` if not specified.\n        :param metadata: The metadata for any newly inserted nodes. See\n            :meth:`.NodeTable.add_row` for details on how default metadata\n            is produced for a given schema (or none).\n        :return: A copy of this tree sequence with edges split at the specified time.\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        population = tskit.NULL if population is None else population\n        flags = 0 if flags is None else flags\n        schema = self.table_metadata_schemas.node\n        if metadata is None:\n            metadata = schema.empty_value\n        metadata = schema.validate_and_encode_row(metadata)\n        ll_ts = self._ll_tree_sequence.split_edges(\n            time=time,\n            flags=flags,\n            population=population,\n            metadata=metadata,\n        )\n        return TreeSequence(ll_ts)\n\n    def decapitate(self, time, *, flags=None, population=None, metadata=None):\n        \"\"\"\n        Delete all edge topology and mutational information at least as old\n        as the specified time from this tree sequence.\n\n        Removes all edges in which the time of the child is >= the specified\n        time ``t``, and breaks edges that intersect with ``t``. For each edge\n        intersecting with ``t`` we create a new node with time equal to ``t``,\n        and set the parent of the edge to this new node. The node table\n        is not altered in any other way. Newly added nodes have values\n        for ``flags``, ``population`` and ``metadata`` controlled by parameters\n        to this function in the same way as :meth:`.split_edges`.\n\n        .. note::\n            Note that each edge is treated independently, so that even if two\n            edges that are broken by this operation share the same parent and\n            child nodes, there will be two different new parent nodes inserted.\n\n        Any mutation whose time is >= ``t`` will be removed. A mutation's time\n        is its associated ``time`` value, or the time of its node if the\n        mutation's time was marked as unknown (:data:`UNKNOWN_TIME`).\n\n        Migrations are not supported, and a LibraryError will be raised if\n        called on a tree sequence containing migration information.\n\n        .. seealso:: This method is implemented using the :meth:`.split_edges`\n            and :meth:`TableCollection.delete_older` functions.\n\n        :param float time: The cutoff time.\n        :param int flags: The flags value for newly-inserted nodes. (Default = 0)\n        :param int population: The population value for newly inserted nodes.\n            Defaults to ``tskit.NULL`` if not specified.\n        :param metadata: The metadata for any newly inserted nodes. See\n            :meth:`.NodeTable.add_row` for details on how default metadata\n            is produced for a given schema (or none).\n        :return: A copy of this tree sequence with edges split at the specified time.\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        split_ts = self.split_edges(\n            time, flags=flags, population=population, metadata=metadata\n        )\n        tables = split_ts.dump_tables()\n        del split_ts\n        tables.delete_older(time)\n        return tables.tree_sequence()\n\n    def extend_haplotypes(self, max_iter=10):\n        \"\"\"\n        Returns a new tree sequence in which the span covered by ancestral nodes\n        is \"extended\" to regions of the genome according to the following rule:\n        If an ancestral segment corresponding to node `n` has ancestor `p` and\n        descendant `c` on some portion of the genome, and on an adjacent segment of\n        genome `p` is still an ancestor of `c`, then `n` is inserted into the\n        path from `p` to `c`. For instance, if `p` is the parent of `n` and `n`\n        is the parent of `c`, then the span of the edges from `p` to `n` and\n        `n` to `c` are extended, and the span of the edge from `p` to `c` is\n        reduced. Thus, the ancestral haplotype represented by `n` is extended\n        to a longer span of the genome. However, any edges whose child node is\n        a sample are not modified. See\n        `Fritze et al. (2025) <https://doi.org/10.1093/genetics/iyaf198>`_\n        for more details.\n\n        Since some edges may be removed entirely, this process usually reduces\n        the number of edges in the tree sequence.\n\n        The method works by iterating over the genome to look for paths that can\n        be extended in this way; the maximum number of such iterations is\n        controlled by ``max_iter``.\n\n        The rationale is that we know that `n` carries a portion of the segment\n        of ancestral genome inherited by `c` from `p`, and so likely carries\n        the *entire* inherited segment (since the implication otherwise would\n        be that distinct recombined segments were passed down separately from\n        `p` to `c`).\n\n        In the example above, if there was a mutation on the node above `c`\n        older than the time of `n` in the span into which `n` was extended,\n        then the mutation will now occur above `n`. So, this operation may change\n        mutations' nodes (but will not affect genotypes).  This is only\n        unambiguous if the mutation's time is known, so the method requires\n        known mutation times.  See :meth:`.impute_unknown_mutations_time` if\n        mutation times are not known.\n\n        .. note::\n            The method will not affect the marginal trees (so, if the original tree\n            sequence was simplified, then following up with `simplify` will recover\n            the original tree sequence, possibly with edges in a different order).\n            It will also not affect the genotype matrix, or any of the tables other\n            than the edge table or the node column in the mutation table.\n\n        :param int max_iter: The maximum number of iterations over the tree\n            sequence. Defaults to 10.\n        :return: A new tree sequence with unary nodes extended.\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        max_iter = int(max_iter)\n        ll_ts = self._ll_tree_sequence.extend_haplotypes(max_iter)\n        return TreeSequence(ll_ts)\n\n    def subset(\n        self,\n        nodes,\n        record_provenance=True,\n        reorder_populations=True,\n        remove_unreferenced=True,\n    ):\n        \"\"\"\n        Returns a tree sequence containing only information directly\n        referencing the provided list of nodes to retain.  The result will\n        retain only the nodes whose IDs are listed in ``nodes``, only edges for\n        which both parent and child are in ``nodes```, only mutations whose\n        node is in ``nodes``, and only individuals that are referred to by one\n        of the retained nodes.  Note that this does *not* retain\n        the ancestry of these nodes - for that, see :meth:`.simplify`.\n\n        This has the side effect that it may change the order of the nodes,\n        populations, individuals, and migrations in the tree sequence. Nodes\n        in the new tree sequence will be in the order provided in ``nodes``.\n        Populations will be ordered in ascending order of the lowest ID of\n        the nodes that refer to them. Individuals will be not only ordered\n        so that :attr:`~Individual.parents` come before children (see\n        :meth:`~TableCollection.sort_individuals`) but in addition\n        will be secondarily sorted in ascending order of the lowest ID of\n        their referring nodes. (However, ``reorder_populations`` may be set\n        to ``False`` to keep the population table unchanged.)\n\n        By default, the method removes all individuals and populations not\n        referenced by any nodes, and all sites not referenced by any mutations.\n        To retain these unreferenced individuals, populations, and sites, pass\n        ``remove_unreferenced=False``. If this is done, the site table will\n        remain unchanged, unreferenced individuals will appear at the end of\n        the individuals table (and in their original order), and unreferenced\n        populations will appear at the end of the population table (unless\n        ``reorder_populations=False``).\n\n        .. seealso::\n\n            :meth:`.keep_intervals` for subsetting a given portion of the genome;\n            :meth:`.simplify` for retaining the ancestry of a subset of nodes.\n\n        :param list nodes: The list of nodes for which to retain information. This\n            may be a numpy array (or array-like) object (dtype=np.int32).\n        :param bool record_provenance: Whether to record a provenance entry\n            in the provenance table for this operation.\n        :param bool reorder_populations: Whether to reorder populations\n            (default: True).  If False, the population table will not be altered in\n            any way.\n        :param bool remove_unreferenced: Whether sites, individuals, and populations\n            that are not referred to by any retained entries in the tables should\n            be removed (default: True). See the description for details.\n        :rtype: tskit.TreeSequence\n        \"\"\"\n        tables = self.dump_tables()\n        tables.subset(\n            nodes,\n            record_provenance=record_provenance,\n            reorder_populations=reorder_populations,\n            remove_unreferenced=remove_unreferenced,\n        )\n        return tables.tree_sequence()\n\n    def union(\n        self,\n        other,\n        node_mapping,\n        check_shared_equality=True,\n        add_populations=True,\n        record_provenance=True,\n        *,\n        all_edges=False,\n        all_mutations=False,\n    ):\n        \"\"\"\n        Returns an expanded tree sequence which contains the node-wise union of\n        ``self`` and ``other``, obtained by adding the non-shared portions of\n        ``other`` onto ``self``. The \"shared\" portions are specified using a\n        map that specifies which nodes in ``other`` are equivalent to those in\n        ``self``: the ``node_mapping`` argument should be an array of length\n        equal to the number of nodes in ``other`` and whose entries are the ID\n        of the matching node in ``self``, or ``tskit.NULL`` if there is no\n        matching node. Those nodes in ``other`` that map to ``tskit.NULL`` will\n        be added to ``self``, along with:\n\n        1. Individuals whose nodes are new to ``self``.\n        2. Edges whose parent or child are new to ``self``.\n        3. Mutations whose nodes are new to ``self``.\n        4. Sites whose positions are not present in the site positions in\n           ``self``, if the site contains a newly added mutation.\n\n        This can be thought of as a \"node-wise\" union: for instance, it can not\n        be used to add new edges between two nodes already in ``self`` or new\n        mutations above nodes already in ``self``.\n\n        By default, with ``add_populations=True``, populations of all newly added\n        nodes are assumed to be new populations, and added to the end of the\n        population table as well. This is appropriate if all nodes to be added\n        are from distinct populations not already in ``self`` and ordering of\n        populations is not important. On the other hand, if\n        ``add_populations=False`` then no new populations are added, so any\n        populations referred to in ``other`` must already exist in ``self``.\n        If some new nodes are in populations already in ``self`` but other new\n        nodes are in entirely new populations, then you must set up the\n        population table first, and then union with ``add_populations=False``.\n\n        This method makes sense if the \"shared\" portions of the tree sequences\n        are equal; the option ``check_shared_equality`` performs a consistency\n        check that this is true. If this check is disabled, it is very easy to\n        produce nonsensical results via subtle inconsistencies.\n\n        The behavior above can be changed by ``all_edges`` and ``all_mutations``.\n        If ``all_edges`` is True, then all edges in ``other`` are added to\n        ``self``, instead of only edges adjacent to added nodes. If\n        ``all_mutations`` is True, then similarly all mutations in ``other``\n        are added (not just those on added nodes); furthermore, all sites\n        at positions without a site already present are added to ``self``.\n        The intended use case for these options is a \"disjoint\" union,\n        where for instance the two tree sequences contain information about\n        disjoint segments of the genome (see :meth:`.concatenate`).\n        For some such applications it may be necessary to set\n        ``check_shared_equality=False``: for instance, if ``other`` has\n        an identical copy of the node table but no edges, then\n        ``all_mutations=True, check_shared_equality=False`` can be used\n        to add mutations to ``self``.\n\n        .. warning::\n            If an equivalent node is specified in ``other``, the\n            version in ``self`` is used without checking the node\n            properties are the same. Similarly, if the same site position\n            is present in both ``self`` and ``other``, the version in\n            ``self`` is used without checking that site properties are\n            the same. In these cases metadata and e.g. node times or ancestral\n            states in ``other`` are simply ignored.\n\n        .. note::\n            This operation also sorts the resulting tables, so the resulting\n            tree sequence may not be equal to ``self`` even if nothing new\n            was added (although it would differ only in ordering of the tables).\n\n        :param TreeSequence other: Another tree sequence.\n        :param list node_mapping: An array of node IDs that relate nodes in\n            ``other`` to nodes in ``self``.\n        :param bool all_edges: If True, then all edges in ``other`` are added\n            to ``self``.\n        :param bool all_mutations: If True, then all mutations and sites in\n            ``other`` are added to ``self``.\n        :param bool check_shared_equality: If True, the shared portions of the\n            tree sequences will be checked for equality. It does so by\n            running :meth:`TreeSequence.subset` on both ``self`` and ``other``\n            for the equivalent nodes specified in ``node_mapping``, and then\n            checking for equality of the subsets.\n        :param bool add_populations: If True, nodes new to ``self`` will be\n            assigned new population IDs.\n        :param bool record_provenance: Whether to record a provenance entry\n            in the provenance table for this operation.\n        :return: The union of the two tree sequences.\n        :rtype: tskit.TreeSequence\n        :raises: **tskit.LibraryError** -- If the resulting tree sequence is invalid\n            (for instance, a node is specified to have two distinct\n            parents on the same interval)\n        \"\"\"\n        tables = self.dump_tables()\n        other_tables = other.dump_tables()\n        tables.union(\n            other_tables,\n            node_mapping,\n            check_shared_equality=check_shared_equality,\n            add_populations=add_populations,\n            record_provenance=record_provenance,\n            all_edges=all_edges,\n            all_mutations=all_mutations,\n        )\n        return tables.tree_sequence()\n\n    def draw_svg(\n        self,\n        path=None,\n        *,\n        size=None,\n        x_scale=None,\n        time_scale=None,\n        tree_height_scale=None,\n        title=None,\n        node_labels=None,\n        mutation_labels=None,\n        node_titles=None,\n        mutation_titles=None,\n        root_svg_attributes=None,\n        style=None,\n        order=None,\n        force_root_branch=None,\n        symbol_size=None,\n        x_axis=None,\n        x_label=None,\n        x_lim=None,\n        x_regions=None,\n        y_axis=None,\n        y_label=None,\n        y_ticks=None,\n        y_gridlines=None,\n        omit_sites=None,\n        canvas_size=None,\n        max_num_trees=None,\n        preamble=None,\n        **kwargs,\n    ):\n        \"\"\"\n        Return an SVG representation of a tree sequence. See the\n        :ref:`visualization tutorial<tutorials:sec_tskit_viz>` for more details.\n\n        :param str path: The path to the file to write the output. If None, do not write\n            to file.\n        :param tuple(int, int) size: A tuple of (width, height) specifying a target\n            drawing size in abstract user units (usually interpreted as pixels on\n            initial display). Components of the drawing will be scaled so that the total\n            plot including labels etc. normally fits onto a canvas of this size (see\n            ``canvas_size`` below). If ``None``, chose values such that each tree is\n            drawn at a size appropriate for a reasonably small set of samples (this will\n            nevertheless result in a very wide drawing if there are many trees to\n            display). Default: ``None``\n        :param str x_scale: Control how the X axis is drawn. If \"physical\" (the default)\n            the axis scales linearly with physical distance along the sequence,\n            background shading is used to indicate the position of the trees along the\n            X axis, and sites (with associated mutations) are marked at the\n            appropriate physical position on axis line. If \"treewise\", each axis tick\n            corresponds to a tree boundary, which are positioned evenly along the axis,\n            so that the X axis is of variable scale, no background scaling is required,\n            and site positions are not marked on the axis.\n        :param str time_scale: Control how height values for nodes are computed.\n            If this is equal to ``\"time\"``, node heights are proportional to their time\n            values (this is the default). If this is equal to ``\"log_time\"``, node\n            heights are proportional to their log(time) values. If it is equal to\n            ``\"rank\"``, node heights are spaced equally according to their ranked times.\n        :param str tree_height_scale: Deprecated alias for time_scale. (Deprecated in\n            0.3.6)\n        :param str title: A title string to be included in the SVG output. If ``None``\n            (default) no title is shown, which gives more vertical space for the tree.\n        :param node_labels: If specified, show custom labels for the nodes\n            (specified by ID) that are present in this map; any nodes not present will\n            not have a label.\n        :type node_labels: dict(int, str)\n        :param mutation_labels: If specified, show custom labels for the\n            mutations (specified by ID) that are present in the map; any mutations\n            not present will not have a label.\n        :param dict(int, str) node_titles: If specified, add a ``<title>`` string to\n            symbols for each node (specified by ID) present in this map. SVG visualizers\n            such as web browsers will commonly display this string on mousing over\n            node symbol.\n        :param dict(int, str) mutation_titles: If specified, add a ``<title>`` string to\n            symbols for each mutation (specified by ID) present in this map. SVG\n            visualizers such as web browsers will commonly display this string on\n            mousing over the mutation symbol in the tree and (if show) on the x axis.\n        :param dict root_svg_attributes: Additional attributes, such as an id, that will\n            be embedded in the root ``<svg>`` tag of the generated drawing.\n        :param str style: A `css string <https://www.w3.org/TR/CSS21/syndata.htm>`_\n            that will be included in the ``<style>`` tag of the generated svg.\n        :param str order: The left-to-right ordering of child nodes in each drawn tree.\n            This can be either: ``\"minlex\"``, which minimises the differences\n            between adjacent trees (see also the ``\"minlex_postorder\"`` traversal\n            order for the :meth:`.Tree.nodes` method); or ``\"tree\"`` which draws trees\n            in the left-to-right order defined by the\n            :ref:`quintuply linked tree structure <sec_data_model_tree_structure>`.\n            If not specified or None, this defaults to ``\"minlex\"``.\n        :param bool force_root_branch: If ``True`` plot a branch (edge) above every tree\n            root in the tree sequence. If ``None`` (default) then only plot such\n            root branches if any root in the tree sequence has a mutation above it.\n        :param float symbol_size: Change the default size of the node and mutation\n            plotting symbols. If ``None`` (default) use a standard size.\n        :param bool x_axis: Should the plot have an X axis line, showing the positions\n            of trees along the genome. The scale used is determined by the ``x_scale``\n            parameter. If ``None`` (default) plot an X axis.\n        :param str x_label: Place a label under the plot. If ``None`` (default) and\n            there is an X axis, create and place an appropriate label.\n        :param list x_lim: A list of size two giving the genomic positions between which\n            trees should be plotted. If the first is ``None``, then plot from the first\n            non-empty region of the tree sequence. If the second is ``None``, then plot\n            up to the end of the last non-empty region of the tree sequence. The default\n            value ``x_lim=None`` is shorthand for the list [``None``, ``None``]. If\n            numerical values are given, then regions outside the interval have all\n            information discarded: this means that mutations outside the interval will\n            not be shown. To force display of the entire tree sequence, including empty\n            flanking regions, specify ``x_lim=[0, ts.sequence_length]``.\n        :param dict x_regions: A dictionary mapping (left, right) tuples to names. This\n            draws a box, labelled with the name, on the X axis between the left and\n            right positions, and can be used for annotating genomic regions (e.g.\n            genes) on the X axis. If ``None`` (default) do not plot any regions.\n        :param Union[bool, str] y_axis: Should the plot have an Y axis line, showing\n            time. If ``False`` do not plot a Y axis. If ``True``, plot the Y axis on\n            left hand side of the plot. Can also take the strings ``\"left\"`` or\n            ``\"right\"``, specifying the side of the plot on which to plot the Y axis.\n            Default: ``None``, treated as ``False``.\n        :param str y_label: Place a label to the left of the plot. If ``None`` (default)\n            and there is a Y axis, create and place an appropriate label.\n        :param Union[list, dict] y_ticks: A list of Y values at which to plot\n            tickmarks, or a dictionary mapping Y values to labels (``[]`` gives no\n            tickmarks). If ``None`` (default), plot one tickmark for each unique node\n            value. Note that if ``time_scale=\"rank\"``, the Y values refer to the\n            zero-based rank of the plotted nodes, rather than the node time itself.\n        :param bool y_gridlines: Whether to plot horizontal lines behind the tree\n            at each y tickmark.\n        :param bool omit_sites: If True, omit sites and mutations from the drawing.\n            Default: False\n        :param tuple(int, int) canvas_size: The (width, height) of the SVG canvas.\n            This will change the SVG width and height without rescaling graphical\n            elements, allowing extra room e.g. for unusually long labels. If ``None``\n            take the canvas size to be the same as the target drawing size (see\n            ``size``, above). Default: None\n        :param int max_num_trees: The maximum number of trees to plot. If there are\n            more trees than this in the tree sequence, the middle trees will be skipped\n            from the plot and a message \"XX trees skipped\" displayed in their place.\n            If ``None``, all the trees will be plotted: this can produce a very wide\n            plot if there are many trees in the tree sequence. Default: None\n        :param str preamble: SVG commands to be included at the start of the returned\n            object, immediately after the opening tag. These can include custom svg\n            elements such as legends or annotations or even entire ``<svg>`` elements.\n            The preamble is not checked for validity, so it is up to the user to\n            ensure that it is valid SVG. Default: None\n\n        :return: An SVG representation of a tree sequence.\n        :rtype: SVGString\n\n        .. note::\n            Technically, x_lim[0] specifies a *minimum* value for the start of the X\n            axis, and x_lim[1] specifies a *maximum* value for the end. This is only\n            relevant if the tree sequence contains \"empty\" regions with no edges or\n            mutations. In this case if x_lim[0] lies strictly within an empty region\n            (i.e., ``empty_tree.interval.left < x_lim[0] < empty_tree.interval.right``)\n            then that tree will not be plotted on the left hand side, and the X axis\n            will start at ``empty_tree.interval.right``. Similarly, if x_lim[1] lies\n            strictly within an empty region then that tree will not be plotted on the\n            right hand side, and the X axis will end at ``empty_tree.interval.left``\n        \"\"\"\n        svgtreesequence = drawing.SvgTreeSequence(\n            self,\n            size,\n            x_scale=x_scale,\n            time_scale=time_scale,\n            tree_height_scale=tree_height_scale,\n            title=title,\n            node_labels=node_labels,\n            mutation_labels=mutation_labels,\n            node_titles=node_titles,\n            mutation_titles=mutation_titles,\n            root_svg_attributes=root_svg_attributes,\n            style=style,\n            order=order,\n            force_root_branch=force_root_branch,\n            symbol_size=symbol_size,\n            x_axis=x_axis,\n            x_label=x_label,\n            x_lim=x_lim,\n            x_regions=x_regions,\n            y_axis=y_axis,\n            y_label=y_label,\n            y_ticks=y_ticks,\n            y_gridlines=y_gridlines,\n            omit_sites=omit_sites,\n            canvas_size=canvas_size,\n            max_num_trees=max_num_trees,\n            preamble=preamble,\n            **kwargs,\n        )\n        return svgtreesequence.draw(path)\n\n    def draw_text(\n        self,\n        *,\n        node_labels=None,\n        use_ascii=False,\n        time_label_format=None,\n        position_label_format=None,\n        order=None,\n        **kwargs,\n    ):\n        \"\"\"\n        Create a text representation of a tree sequence.\n\n        :param dict node_labels: If specified, show custom labels for the nodes\n            that are present in the map. Any nodes not specified in the map will\n            not have a node label.\n        :param bool use_ascii: If ``False`` (default) then use unicode\n            `box drawing characters \\\n<https://en.wikipedia.org/wiki/Box-drawing_character>`_\n            to render the tree. If ``True``, use plain ascii characters, which look\n            cruder but are less susceptible to misalignment or font substitution.\n            Alternatively, if you are having alignment problems with Unicode, you can try\n            out the solution documented `here \\\n<https://github.com/tskit-dev/tskit/issues/189#issuecomment-499114811>`_.\n        :param str time_label_format: A python format string specifying the format (e.g.\n            number of decimal places or significant figures) used to print the numerical\n            time values on the time axis. If ``None``, this defaults to ``\"{:.2f}\"``.\n        :param str position_label_format: A python format string specifying the format\n            (e.g. number of decimal places or significant figures) used to print genomic\n            positions. If ``None``, this defaults to ``\"{:.2f}\"``.\n        :param str order: The left-to-right ordering of child nodes in the drawn tree.\n            This can be either: ``\"minlex\"``, which minimises the differences\n            between adjacent trees (see also the ``\"minlex_postorder\"`` traversal\n            order for the :meth:`.Tree.nodes` method); or ``\"tree\"`` which draws trees\n            in the left-to-right order defined by the\n            :ref:`quintuply linked tree structure <sec_data_model_tree_structure>`.\n            If not specified or None, this defaults to ``\"minlex\"``.\n\n        :return: A text representation of a tree sequence.\n        :rtype: str\n        \"\"\"\n        return str(\n            drawing.TextTreeSequence(\n                self,\n                node_labels=node_labels,\n                use_ascii=use_ascii,\n                time_label_format=time_label_format,\n                position_label_format=position_label_format,\n                order=order,\n            )\n        )\n\n    ############################################\n    #\n    # Statistics computation\n    #\n    ############################################\n\n    def general_stat(\n        self,\n        W,\n        f,\n        output_dim,\n        windows=None,\n        polarised=False,\n        mode=None,\n        span_normalise=True,\n        strict=True,\n    ):\n        \"\"\"\n        Compute a windowed statistic from weights and a summary function.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        On each tree, this\n        propagates the weights ``W`` up the tree, so that the \"weight\" of each\n        node is the sum of the weights of all samples at or below the node.\n        Then the summary function ``f`` is applied to the weights, giving a\n        summary for each node in each tree. How this is then aggregated depends\n        on ``mode``:\n\n        \"site\"\n            Adds together the total summary value across all alleles in each window.\n\n        \"branch\"\n            Adds together the summary value for each node, multiplied by the\n            length of the branch above the node and the span of the tree.\n\n        \"node\"\n            Returns each node's summary value added across trees and multiplied\n            by the span of the tree.\n\n        Both the weights and the summary can be multidimensional: if ``W`` has ``k``\n        columns, and ``f`` takes a ``k``-vector and returns an ``m``-vector,\n        then the output will be ``m``-dimensional for each node or window (depending\n        on \"mode\").\n\n        .. note::\n            The summary function ``f`` should return zero when given both 0 and\n            the total weight (i.e., ``f(0) = 0`` and ``f(np.sum(W, axis=0)) = 0``),\n            unless ``strict=False``.  This is necessary for the statistic to be\n            unaffected by parts of the tree sequence ancestral to none or all\n            of the samples, respectively.\n\n        :param numpy.ndarray W: An array of values with one row for each sample and one\n            column for each weight.\n        :param f: A function that takes a one-dimensional array of length\n            equal to the number of columns of ``W`` and returns a one-dimensional\n            array.\n        :param int output_dim: The length of ``f``'s return value.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param bool polarised: Whether to leave the ancestral state out of computations:\n            see :ref:`sec_stats` for more details.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :param bool strict: Whether to check that f(0) and f(total weight) are zero.\n        :return: A ndarray with shape equal to (num windows, num statistics).\n        \"\"\"\n        if mode is None:\n            mode = \"site\"\n        if strict:\n            total_weights = np.sum(W, axis=0)\n            for x in [total_weights, total_weights * 0.0]:\n                with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                    fx = np.array(f(x))\n                fx[np.isnan(fx)] = 0.0\n                if not np.allclose(fx, np.zeros((output_dim,))):\n                    raise ValueError(\n                        \"Summary function does not return zero for both \"\n                        \"zero weight and total weight.\"\n                    )\n        return self.__run_windowed_stat(\n            windows,\n            self.ll_tree_sequence.general_stat,\n            W,\n            f,\n            output_dim,\n            polarised=polarised,\n            span_normalise=span_normalise,\n            mode=mode,\n        )\n\n    def sample_count_stat(\n        self,\n        sample_sets,\n        f,\n        output_dim,\n        windows=None,\n        polarised=False,\n        mode=None,\n        span_normalise=True,\n        strict=True,\n    ):\n        \"\"\"\n        Compute a windowed statistic from sample counts and a summary function.\n        This is a wrapper around :meth:`.general_stat` for the common case in\n        which the weights are all either 1 or 0, i.e., functions of the joint\n        allele frequency spectrum.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`sample sets <sec_stats_sample_sets>`,\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        If ``sample_sets`` is a list of ``k`` sets of samples, then\n        ``f`` should be a function that takes an argument of length ``k`` and\n        returns a one-dimensional array. The ``j``-th element of the argument\n        to ``f`` will be the number of samples in ``sample_sets[j]`` that lie\n        below the node that ``f`` is being evaluated for. See\n        :meth:`.general_stat`  for more details.\n\n        Here is a contrived example: suppose that ``A`` and ``B`` are two sets\n        of samples with ``nA`` and ``nB`` elements, respectively. Passing these\n        as sample sets will give ``f`` an argument of length two, giving the number\n        of samples in ``A`` and ``B`` below the node in question. So, if we define\n\n\n        .. code-block:: python\n\n            def f(x):\n                pA = x[0] / nA\n                pB = x[1] / nB\n                return np.array([pA * pB])\n\n        then if all sites are biallelic,\n\n        .. code-block:: python\n\n            ts.sample_count_stat(\n                [A, B], f, 1, windows=\"sites\", polarised=False, mode=\"site\")\n\n        would compute, for each site, the product of the derived allele\n        frequencies in the two sample sets, in a (num sites, 1) array.  If\n        instead ``f`` returns ``np.array([pA, pB, pA * pB])``, then the\n        output would be a (num sites, 3) array, with the first two columns\n        giving the allele frequencies in ``A`` and ``B``, respectively.\n\n        .. note::\n            The summary function ``f`` should return zero when given both 0 and\n            the sample size (i.e., ``f(0) = 0`` and\n            ``f(np.array([len(x) for x in sample_sets])) = 0``).  This is\n            necessary for the statistic to be unaffected by parts of the tree\n            sequence ancestral to none or all of the samples, respectively.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param f: A function that takes a one-dimensional array of length\n            equal to the number of sample sets and returns a one-dimensional array.\n        :param int output_dim: The length of ``f``'s return value.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param bool polarised: Whether to leave the ancestral state out of computations:\n            see :ref:`sec_stats` for more details.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :param bool strict: Whether to check that f(0) and f(total weight) are zero.\n        :return: A ndarray with shape equal to (num windows, num statistics).\n        \"\"\"\n        # helper function for common case where weights are indicators of sample sets\n        for U in sample_sets:\n            if len(U) != len(set(U)):\n                raise ValueError(\n                    \"Elements of sample_sets must be lists without repeated elements.\"\n                )\n            if len(U) == 0:\n                raise ValueError(\"Elements of sample_sets cannot be empty.\")\n            for u in U:\n                if not self.node(u).is_sample():\n                    raise ValueError(\"Not all elements of sample_sets are samples.\")\n\n        W = np.array([[float(u in A) for A in sample_sets] for u in self.samples()])\n        return self.general_stat(\n            W,\n            f,\n            output_dim,\n            windows=windows,\n            polarised=polarised,\n            mode=mode,\n            span_normalise=span_normalise,\n            strict=strict,\n        )\n\n    def parse_windows(self, windows):\n        # Note: need to make sure windows is a string or we try to compare the\n        # target with a numpy array elementwise.\n        if windows is None:\n            windows = [0.0, self.sequence_length]\n        elif isinstance(windows, str):\n            if windows == \"trees\":\n                windows = self.breakpoints(as_array=True)\n            elif windows == \"sites\":\n                # breakpoints are at 0.0 and at the sites and at the end\n                windows = np.concatenate(\n                    [\n                        [] if self.num_sites > 0 else [0.0],\n                        self.sites_position,\n                        [self.sequence_length],\n                    ]\n                )\n                windows[0] = 0.0\n            else:\n                raise ValueError(\n                    f\"Unrecognized window specification {windows}:\",\n                    \"the only allowed strings are 'sites' or 'trees'\",\n                )\n        return np.array(windows)\n\n    def parse_time_windows(self, time_windows):\n        if time_windows is None:\n            time_windows = [0.0, math.inf]\n        return np.array(time_windows)\n\n    def __run_windowed_stat(self, windows, method, *args, **kwargs):\n        strip_win = windows is None\n        windows = self.parse_windows(windows)\n        stat = method(*args, **kwargs, windows=windows)\n        if strip_win:\n            stat = stat[0]\n        return stat\n\n    # only for temporary tw version\n    def __run_windowed_stat_tw(self, windows, time_windows, method, *args, **kwargs):\n        strip_win = windows is None\n        strip_timewin = time_windows is None\n        windows = self.parse_windows(windows)\n        time_windows = self.parse_time_windows(time_windows)\n        stat = method(*args, **kwargs, windows=windows, time_windows=time_windows)\n        if strip_win and strip_timewin:\n            stat = stat[0, 0, :]\n        elif strip_win:\n            stat = stat[0, :, :]\n        elif strip_timewin:\n            stat = stat[:, 0, :]\n        return stat\n\n    def __one_way_sample_set_stat(\n        self,\n        ll_method,\n        sample_sets,\n        windows=None,\n        time_windows=None,\n        mode=None,\n        span_normalise=True,\n        polarised=False,\n    ):\n        if sample_sets is None:\n            sample_sets = self.samples()\n        # First try to convert to a 1D numpy array. If it is, then we strip off\n        # the corresponding dimension from the output.\n        drop_dimension = False\n        try:\n            sample_sets = np.array(sample_sets, dtype=np.uint64)\n        except ValueError:\n            pass\n        else:\n            # If we've successfully converted sample_sets to a 1D numpy array\n            # of integers then drop the dimension\n            if len(sample_sets.shape) == 1:\n                sample_sets = [sample_sets]\n                drop_dimension = True\n        sample_set_sizes = np.array(\n            [len(sample_set) for sample_set in sample_sets], dtype=np.uint32\n        )\n        if np.any(sample_set_sizes == 0):\n            raise ValueError(\"Sample sets must contain at least one element\")\n\n        flattened = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n        # this next line is temporary, while time windows are implemented\n        # in other methods\n        use_tw = ll_method.__name__ == \"allele_frequency_spectrum\"\n        if use_tw:\n            stat = self.__run_windowed_stat_tw(\n                windows,\n                time_windows,\n                ll_method,\n                sample_set_sizes,\n                flattened,\n                mode=mode,\n                span_normalise=span_normalise,\n                polarised=polarised,\n            )\n        else:\n            stat = self.__run_windowed_stat(\n                windows,\n                ll_method,\n                sample_set_sizes,\n                flattened,\n                mode=mode,\n                span_normalise=span_normalise,\n                polarised=polarised,\n            )\n        if drop_dimension:\n            stat = stat.reshape(stat.shape[:-1])\n            if stat.shape == () and windows is None and time_windows is None:\n                stat = stat[()]\n        return stat\n\n    def parse_sites(self, sites):\n        row_sites, col_sites = None, None\n        if sites is not None:\n            if any(not hasattr(a, \"__getitem__\") or isinstance(a, str) for a in sites):\n                raise ValueError(\"Sites must be a list of lists, tuples, or ndarrays\")\n            if len(sites) == 2:\n                row_sites, col_sites = sites\n            elif len(sites) == 1:\n                row_sites = col_sites = sites[0]\n            else:\n                raise ValueError(\n                    f\"Sites must be a length 1 or 2 list, got a length {len(sites)} list\"\n                )\n        return row_sites, col_sites\n\n    def parse_positions(self, positions):\n        row_positions, col_positions = None, None\n        if positions is not None:\n            if any(\n                not hasattr(a, \"__getitem__\") or isinstance(a, str) for a in positions\n            ):\n                raise ValueError(\n                    \"Positions must be a list of lists, tuples, or ndarrays\"\n                )\n            if len(positions) == 2:\n                row_positions, col_positions = positions\n            elif len(positions) == 1:\n                row_positions = col_positions = positions[0]\n            else:\n                raise ValueError(\n                    \"Positions must be a length 1 or 2 list, \"\n                    f\"got a length {len(positions)} list\"\n                )\n        return row_positions, col_positions\n\n    def __two_locus_sample_set_stat(\n        self,\n        ll_method,\n        sample_sets,\n        sites=None,\n        positions=None,\n        mode=None,\n    ):\n        if sample_sets is None:\n            sample_sets = self.samples()\n        row_sites, col_sites = self.parse_sites(sites)\n        row_positions, col_positions = self.parse_positions(positions)\n\n        # First try to convert to a 1D numpy array. If we succeed, then we strip off\n        # the corresponding dimension from the output.\n        drop_dimension = False\n        try:\n            sample_sets = np.array(sample_sets, dtype=np.uint64)\n        except ValueError:\n            pass\n        else:\n            # If we've successfully converted sample_sets to a 1D numpy array\n            # of integers then drop the dimension\n            if len(sample_sets.shape) == 1:\n                sample_sets = [sample_sets]\n                drop_dimension = True\n\n        sample_set_sizes = np.array(\n            [len(sample_set) for sample_set in sample_sets], dtype=np.uint32\n        )\n        if np.any(sample_set_sizes == 0):\n            raise ValueError(\"Sample sets must contain at least one element\")\n\n        flattened = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n\n        result = ll_method(\n            sample_set_sizes,\n            flattened,\n            row_sites,\n            col_sites,\n            row_positions,\n            col_positions,\n            mode,\n        )\n\n        if drop_dimension:\n            result = result.reshape(result.shape[:2])\n        else:\n            # Orient the data so that the first dimension is the sample set.\n            # With this orientation, we get one LD matrix per sample set.\n            result = result.swapaxes(0, 2).swapaxes(1, 2)\n\n        return result\n\n    def __k_way_two_locus_sample_set_stat(\n        self,\n        ll_method,\n        k,\n        sample_sets,\n        indexes=None,\n        sites=None,\n        positions=None,\n        mode=None,\n    ):\n        sample_set_sizes = np.array(\n            [len(sample_set) for sample_set in sample_sets], dtype=np.uint32\n        )\n        if np.any(sample_set_sizes == 0):\n            raise ValueError(\"Sample sets must contain at least one element\")\n        flattened = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n        row_sites, col_sites = self.parse_sites(sites)\n        row_positions, col_positions = self.parse_positions(positions)\n        drop_dimension = False\n        indexes = util.safe_np_int_cast(indexes, np.int32)\n        if len(indexes.shape) == 1:\n            indexes = indexes.reshape((1, indexes.shape[0]))\n            drop_dimension = True\n        if len(indexes.shape) != 2 or indexes.shape[1] != k:\n            raise ValueError(\n                \"Indexes must be convertable to a 2D numpy array with {} columns\".format(\n                    k\n                )\n            )\n        result = ll_method(\n            sample_set_sizes,\n            flattened,\n            indexes,\n            row_sites,\n            col_sites,\n            row_positions,\n            col_positions,\n            mode,\n        )\n        if drop_dimension:\n            result = result.reshape(result.shape[:2])\n        else:\n            # Orient the data so that the first dimension is the sample set.\n            # With this orientation, we get one LD matrix per sample set.\n            result = result.swapaxes(0, 2).swapaxes(1, 2)\n        return result\n\n    def __k_way_sample_set_stat(\n        self,\n        ll_method,\n        k,\n        sample_sets,\n        indexes=None,\n        windows=None,\n        mode=None,\n        span_normalise=True,\n        polarised=False,\n        centre=True,\n    ):\n        sample_set_sizes = np.array(\n            [len(sample_set) for sample_set in sample_sets], dtype=np.uint32\n        )\n        if np.any(sample_set_sizes == 0):\n            raise ValueError(\"Sample sets must contain at least one element\")\n        flattened = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n        drop_based_on_index = False\n        if indexes is None:\n            drop_based_on_index = True\n            if len(sample_sets) != k:\n                raise ValueError(\n                    \"Must specify indexes if there are not exactly {} sample \"\n                    \"sets.\".format(k)\n                )\n            indexes = np.arange(k, dtype=np.int32)\n        drop_dimension = False\n        indexes = util.safe_np_int_cast(indexes, np.int32)\n        if len(indexes.shape) == 1:\n            indexes = indexes.reshape((1, indexes.shape[0]))\n            drop_dimension = True\n        if len(indexes.shape) != 2 or indexes.shape[1] != k:\n            raise ValueError(\n                \"Indexes must be convertable to a 2D numpy array with {} columns\".format(\n                    k\n                )\n            )\n        stat = self.__run_windowed_stat(\n            windows,\n            ll_method,\n            sample_set_sizes,\n            flattened,\n            indexes,\n            mode=mode,\n            span_normalise=span_normalise,\n            polarised=polarised,\n            centre=centre,\n        )\n        if drop_dimension:\n            stat = stat.reshape(stat.shape[:-1])\n            if stat.shape == () and windows is None and drop_based_on_index:\n                stat = stat[()]\n        return stat\n\n    def __k_way_weighted_stat(\n        self,\n        ll_method,\n        k,\n        W,\n        indexes=None,\n        windows=None,\n        mode=None,\n        span_normalise=True,\n        polarised=False,\n        centre=True,\n    ):\n        W = np.asarray(W)\n        if indexes is None:\n            if W.shape[1] != k:\n                raise ValueError(\n                    \"Must specify indexes if there are not exactly {} columns \"\n                    \"in W.\".format(k)\n                )\n            indexes = np.arange(k, dtype=np.int32)\n        drop_dimension = False\n        indexes = util.safe_np_int_cast(indexes, np.int32)\n        if len(indexes.shape) == 1:\n            indexes = indexes.reshape((1, indexes.shape[0]))\n            drop_dimension = True\n        if len(indexes.shape) != 2 or indexes.shape[1] != k:\n            raise ValueError(\n                \"Indexes must be convertable to a 2D numpy array with {} columns\".format(\n                    k\n                )\n            )\n        stat = self.__run_windowed_stat(\n            windows,\n            ll_method,\n            W,\n            indexes,\n            mode=mode,\n            span_normalise=span_normalise,\n            polarised=polarised,\n            centre=centre,\n        )\n        if drop_dimension:\n            stat = stat.reshape(stat.shape[:-1])\n        return stat\n\n    def __weighted_vector_stat(\n        self,\n        ll_method,\n        W,\n        windows=None,\n        mode=None,\n        span_normalise=True,\n        centre=True,\n        nodes=None,\n    ):\n        W = np.asarray(W)\n        if len(W.shape) == 1:\n            W = W.reshape(W.shape[0], 1)\n        if nodes is None:\n            nodes = list(self.samples())\n        else:\n            if centre:\n                raise ValueError(\"If `nodes` is provided, must have centre=False.\")\n        try:\n            nodes = util.safe_np_int_cast(nodes, np.int32)\n        except Exception:\n            raise ValueError(\"Could not interpret `nodes` as a list of node IDs.\")\n        stat = self.__run_windowed_stat(\n            windows,\n            ll_method,\n            W,\n            mode=mode,\n            span_normalise=span_normalise,\n            centre=centre,\n            nodes=nodes,\n        )\n        return stat\n\n    ############################################\n    # Statistics definitions\n    ############################################\n\n    def diversity(\n        self, sample_sets=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes mean genetic diversity (also known as \"pi\") in each of the\n        sets of nodes from ``sample_sets``.  The statistic is also known as\n        \"sample heterozygosity\"; a common citation for the definition is\n        `Nei and Li (1979) <https://doi.org/10.1073/pnas.76.10.5269>`_\n        (equation 22), so it is sometimes called called \"Nei's pi\"\n        (but also sometimes \"Tajima's pi\").\n\n        Please see the :ref:`one-way statistics <sec_stats_sample_sets_one_way>`\n        section for details on how the ``sample_sets`` argument is interpreted\n        and how it interacts with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        Note that this quantity can also be computed by the\n        :meth:`divergence <.TreeSequence.divergence>` method.\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            Mean pairwise genetic diversity: the average over all n choose 2 pairs of\n            sample nodes, of the density of sites at\n            which the two carry different alleles, per unit of chromosome length.\n\n        \"branch\"\n            Mean distance in the tree: the average across over all n choose 2 pairs of\n            sample nodes and locations in the window, of the mean distance in\n            the tree between the two samples (in units of time).\n\n        \"node\"\n            For each node, the proportion of genome on which the node is an ancestor to\n            only one of a pair of sample nodes from the sample set, averaged\n            over over all n choose 2 pairs of sample nodes.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes for which the statistic is computed. If any of the\n            sample sets contain only a single node, the returned diversity will be\n            NaN. If ``None`` (default), average over all n choose 2 pairs of distinct\n            sample nodes in the tree sequence.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A numpy array whose length is equal to the number of sample sets.\n            If there is one sample set and windows=None, a numpy scalar is returned.\n        \"\"\"\n        return self.__one_way_sample_set_stat(\n            self._ll_tree_sequence.diversity,\n            sample_sets,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def divergence(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        r\"\"\"\n        Computes mean genetic divergence between (and within) pairs of\n        sets of nodes from ``sample_sets``.\n        This is the \"average number of differences\", usually referred to as \"dxy\";\n        a common citation for this definition is Nei and Li (1979), who called it\n        :math:`\\pi_{XY}`. Note that the mean pairwise nucleotide diversity of a\n        sample set to itself (computed by passing an index of the form (j,j))\n        is its :meth:`diversity <.TreeSequence.diversity>` (see the note below).\n\n        Operates on ``k = 2`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        .. note::\n            To avoid unexpected results, sample sets should be nonoverlapping,\n            since comparisons of individuals to themselves are not removed when computing\n            divergence between distinct sample sets. (However, specifying an index\n            ``(j, j)`` computes the :meth:`diversity <.TreeSequence.diversity>`\n            of ``sample_set[j]``, which removes self comparisons to provide\n            an unbiased estimate.)\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            Mean pairwise genetic divergence: the average across every possible pair of\n            chromosomes (one from each sample set), of the density of sites at which\n            the two carry different alleles, per unit of chromosome length.\n\n        \"branch\"\n            Mean distance in the tree: the average across every possible pair of\n            chromosomes (one from each sample set) and locations in the window, of\n            the mean distance in the tree between the two samples (in units of time).\n\n        \"node\"\n            For each node, the proportion of genome on which the node is an ancestor to\n            only one of a pair of chromosomes from the sample set, averaged\n            over all possible pairs.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one pair of sample sets and windows=None, a numpy scalar is\n            returned.\n\n        \"\"\"\n        return self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.divergence,\n            2,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    ############################################\n    # Pairwise sample x sample statistics\n    ############################################\n\n    def _chunk_sequence_by_tree(self, num_chunks):\n        \"\"\"\n        Return list of (left, right) genome interval tuples that contain\n        approximately equal numbers of trees as a 2D numpy array. A\n        maximum of self.num_trees single-tree intervals can be returned.\n        \"\"\"\n        if num_chunks <= 0 or int(num_chunks) != num_chunks:\n            raise ValueError(\"Number of chunks must be an integer > 0\")\n        num_chunks = min(self.num_trees, num_chunks)\n        breakpoints = self.breakpoints(as_array=True)[:-1]\n        splits = np.array_split(breakpoints, num_chunks)\n        chunks = []\n        for j in range(num_chunks - 1):\n            chunks.append((splits[j][0], splits[j + 1][0]))\n        chunks.append((splits[-1][0], self.sequence_length))\n        return chunks\n\n    @staticmethod\n    def _chunk_windows(windows, num_chunks):\n        \"\"\"\n        Returns a list of (at most) num_chunks windows, which represent splitting\n        up the specified list of windows into roughly equal work.\n\n        Currently this is implemented by just splitting up into roughly equal\n        numbers of windows in each chunk.\n        \"\"\"\n        if num_chunks <= 0 or int(num_chunks) != num_chunks:\n            raise ValueError(\"Number of chunks must be an integer > 0\")\n        num_chunks = min(len(windows) - 1, num_chunks)\n        splits = np.array_split(windows[:-1], num_chunks)\n        chunks = []\n        for j in range(num_chunks - 1):\n            chunk = np.append(splits[j], splits[j + 1][0])\n            chunks.append(chunk)\n        chunk = np.append(splits[-1], windows[-1])\n        chunks.append(chunk)\n        return chunks\n\n    def _parallelise_divmat_by_tree(self, num_threads, span_normalise, **kwargs):\n        \"\"\"\n        No windows were specified, so we can chunk up the whole genome by\n        tree, and do a simple sum of the results. This means that we have to\n        handle span_normalise specially, though.\n        \"\"\"\n\n        def worker(interval):\n            return self._ll_tree_sequence.divergence_matrix(interval, **kwargs)\n\n        work = self._chunk_sequence_by_tree(num_threads)\n        with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as pool:\n            results = pool.map(worker, work)\n        total = sum(results)\n        if span_normalise:\n            total /= self.sequence_length\n        return total\n\n    def _parallelise_divmat_by_window(self, windows, num_threads, **kwargs):\n        \"\"\"\n        We assume we have a number of windows that's >= to the number\n        of threads available, and let each thread have a chunk of the\n        windows. There will definitely cases where this leads to\n        pathological behaviour, so we may need a more sophisticated\n        strategy at some point.\n        \"\"\"\n\n        def worker(sub_windows):\n            return self._ll_tree_sequence.divergence_matrix(sub_windows, **kwargs)\n\n        work = self._chunk_windows(windows, num_threads)\n        with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:\n            futures = [executor.submit(worker, sub_windows) for sub_windows in work]\n            concurrent.futures.wait(futures)\n        return np.vstack([future.result() for future in futures])\n\n    @staticmethod\n    def _parse_stat_matrix_sample_sets(ids):\n        \"\"\"\n        Returns a flattened list of sets of IDs. If ids is a 1D list,\n        interpret as n one-element sets. Otherwise, it must be a sequence\n        of ID lists.\n        \"\"\"\n        id_dtype = np.int32\n        size_dtype = np.uint64\n        # Exclude some types that could be specified accidentally, and\n        # we may want to reserve for future use.\n        if isinstance(ids, (str, bytes, collections.abc.Mapping, numbers.Number)):\n            raise TypeError(f\"ID specification cannot be a {type(ids)}\")\n        if len(ids) == 0:\n            return np.array([], dtype=id_dtype), np.array([], dtype=size_dtype)\n        if isinstance(ids[0], numbers.Number):\n            # Interpret as a 1D array\n            flat = util.safe_np_int_cast(ids, id_dtype)\n            sizes = np.ones(len(flat), dtype=size_dtype)\n        else:\n            set_lists = []\n            sizes = []\n            for id_list in ids:\n                a = util.safe_np_int_cast(id_list, id_dtype)\n                if len(a.shape) != 1:\n                    raise ValueError(\"ID sets must be 1D integer arrays\")\n                set_lists.append(a)\n                sizes.append(len(a))\n            flat = np.hstack(set_lists)\n            sizes = np.array(sizes, dtype=size_dtype)\n        return flat, sizes\n\n    def divergence_matrix(\n        self,\n        sample_sets=None,\n        *,\n        windows=None,\n        num_threads=0,\n        mode=None,\n        span_normalise=True,\n    ):\n        \"\"\"\n        Finds the matrix of pairwise :meth:`.divergence` values between groups\n        of sample nodes. Returns a numpy array indexed by (window,\n        sample_set, sample_set): the [k,i,j]th value of the result gives the\n        mean divergence between pairs of samples from the i-th and j-th\n        sample sets in the k-th window. As for :meth:`.divergence`,\n        diagonal entries are corrected so that the\n        value gives the mean divergence for *distinct* samples,\n        and so diagonal entries are given by the :meth:`.diversity` of that\n        sample set.  For this reason, if an element of `sample_sets` has only\n        one element, the corresponding :meth:`.diversity` will be NaN.\n        However, this method will place a value of 0 in the diagonal instead of NaN\n        in such cases; otherwise, this is equivalent to computing values with\n        `meth`:.divergence`.\n        However, this is (usually) more efficient than computing many\n        pairwise values using the `indexes` argument to :meth:`.divergence`,\n        so see :meth:`.divergence` for a description of what exactly is computed.\n\n        :param list sample_sets: A list of sets of IDs of samples.\n        :param list windows: The breakpoints of the windows (including start\n            and end, so has one more entry than number of windows).\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\"; the other option is \"branch\").\n        :return: An array indexed by (window, sample_set, sample_set), or if windows is\n            `None`, an array indexed by (sample_set, sample_set).\n        \"\"\"\n        # NOTE for documentation of sample_sets. We *must* use samples currently because\n        # the normalisation for non-sample nodes is tricky. Do we normalise by the\n        # total span of the ts where the node is 'present' in the tree? We avoid this\n        # by insisting on sample nodes.\n\n        # NOTE for documentation of num_threads. Need to explain that the\n        # its best to think of as the number of background *worker* threads.\n        # default is to run without any worker threads. If you want to run\n        # with all the cores on the machine, use num_threads=os.cpu_count().\n        windows_specified = windows is not None\n        windows = self.parse_windows(windows)\n        mode = \"site\" if mode is None else mode\n\n        if sample_sets is None:\n            sample_sets = self.samples()\n            flattened_samples = self.samples()\n            sample_set_sizes = np.ones(len(sample_sets), dtype=np.uint32)\n        else:\n            flattened_samples, sample_set_sizes = self._parse_stat_matrix_sample_sets(\n                sample_sets\n            )\n\n        # FIXME this logic should be merged into __run_windowed_stat if\n        # we generalise the num_threads argument to all stats.\n        if num_threads <= 0:\n            D = self._ll_tree_sequence.divergence_matrix(\n                windows,\n                sample_sets=flattened_samples,\n                sample_set_sizes=sample_set_sizes,\n                mode=mode,\n                span_normalise=span_normalise,\n            )\n        else:\n            if windows_specified:\n                D = self._parallelise_divmat_by_window(\n                    windows,\n                    num_threads,\n                    sample_sets=flattened_samples,\n                    sample_set_sizes=sample_set_sizes,\n                    mode=mode,\n                    span_normalise=span_normalise,\n                )\n            else:\n                D = self._parallelise_divmat_by_tree(\n                    num_threads,\n                    span_normalise=span_normalise,\n                    sample_sets=flattened_samples,\n                    sample_set_sizes=sample_set_sizes,\n                    mode=mode,\n                )\n\n        if not windows_specified:\n            # Drop the windows dimension\n            D = D[0]\n        return D\n\n    def genetic_relatedness(\n        self,\n        sample_sets,\n        indexes=None,\n        windows=None,\n        mode=\"site\",\n        span_normalise=True,\n        polarised=True,\n        proportion=True,\n        centre=True,\n    ):\n        \"\"\"\n        Computes genetic relatedness between (and within) pairs of\n        sets of nodes from ``sample_sets``.\n        Operates on ``k = 2`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        :ref:`polarised <sec_stats_polarisation>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            Frequency of pairwise allelic matches in the window between two\n            sample sets relative to the rest of the sample sets. To be precise,\n            let `m(u,v)` denote the total number of alleles shared between\n            nodes `u` and `v`, and let `m(I,J)` be the average of `m(u,v)` over\n            all nodes `u` in sample set `I` and `v` in sample set `J`. Let `S`\n            and `T` be independently chosen sample sets. Then, for sample sets\n            `I` and `J`, this computes `E[m(I,J) - m(I,S) - m(J,T) + m(S,T)]`\n            if centre=True (the default), or `E[m(I,J)]` if centre=False.\n            This can also be seen as the covariance of a quantitative trait\n            determined by additive contributions from the genomes in each\n            sample set. Let each derived allele be associated with an effect\n            drawn from a `N(0,1)` distribution, and let the trait value of a\n            sample be the sum of its allele effects. Then, this computes\n            the covariance between the average trait values of two sample sets.\n            For example, to compute covariance between the traits of diploid\n            individuals, each sample set would be the pair of genomes of each\n            individual, with the trait being the average of the two genomes.\n            If ``proportion=True``, this then corresponds to :math:`K_{c0}` in\n            `Speed & Balding (2014) <https://www.nature.com/articles/nrg3821>`_,\n            multiplied by four (see below).\n\n        \"branch\"\n            Average area of branches in the window ancestral to pairs of samples\n            in two sample sets relative to the rest of the sample sets. To be\n            precise, let `B(u,v)` denote the total area of all branches\n            ancestral to nodes `u` and `v`, and let `B(I,J)` be the average of\n            `B(u,v)` over all nodes `u` in sample set `I` and `v` in sample set\n            `J`. Let `S` and `T` be two independently chosen sample sets. Then\n            for sample sets `I` and `J`, this computes\n            `E[B(I,J) - B(I,S) - B(J,T) + B(S,T)]` if centre=True (the default),\n            or `E[B(I,J)]` if centre=False.\n\n        \"node\"\n            For each node, the proportion of the window over which pairs of\n            samples in two sample sets are descendants, relative to the rest of\n            the sample sets. To be precise, for each node `n`, let `N(u,v)`\n            denote the proportion of the window over which samples `u` and `v`\n            are descendants of `n`, and let and let `N(I,J)` be the average of\n            `N(u,v)` over all nodes `u` in sample set `I` and `v` in sample set\n            `J`. Let `S` and `T` be two independently chosen sample sets. Then\n            for sample sets `I` and `J`, this computes\n            `E[N(I,J) - N(I,S) - N(J,T) + N(S,T)]` if centre=True (the default),\n            or `E[N(I,J)]` if centre=False.\n\n        *Note:* The default for this statistic - unlike most other statistics - is\n        ``polarised=True``. Using the default value ``centre=True``, setting\n        ``polarised=False`` will only multiply the result by a factor of two\n        for branch-mode, or site-mode if all sites are biallelic. (With\n        multiallelic sites the difference is more complicated.) The uncentred\n        and unpolarised value is probably not what you are looking for: for\n        instance, the unpolarised, uncentred site statistic between two samples\n        counts the number of alleles inherited by both *and* the number of\n        alleles inherited by neither of the two samples.\n\n        *Note:* Some authors\n        (see `Speed & Balding (2014) <https://www.nature.com/articles/nrg3821>`_)\n        compute relatedness between `I` and `J` as the total number of all pairwise\n        allelic matches between `I` and `J`, rather than the frequency,\n        which would define `m(I,J)` as the sum of `m(u,v)` rather than the average\n        in the definition of \"site\" relatedness above. If every sample set is the\n        samples of a :math:`k`-ploid individual, this would simply multiply the\n        result by :math:`k^2`. However, this definition would make the result not\n        useful as a summary statistic of typical relatedness for larger sample\n        sets.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True). Has no effect if ``proportion`` is True.\n        :param bool proportion: Defaults to True.  Whether to divide the result by\n            :meth:`.segregating_sites`, called with the same ``windows``,\n            ``mode``, and ``span_normalise``. Note that this counts sites\n            that are segregating between *any* of the samples of *any* of the\n            sample sets (rather than segregating between all of the samples of\n            the tree sequence).\n        :param bool polarised: Whether to leave the ancestral state out of computations:\n            see :ref:`sec_stats` for more details. Defaults to True.\n        :param bool centre: Defaults to True. Whether to 'centre' the result, as\n            described above (the usual definition is centred).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one pair of sample sets and windows=None, a numpy scalar is\n            returned.\n        \"\"\"\n\n        out = self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.genetic_relatedness,\n            2,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n            polarised=polarised,\n            centre=centre,\n        )\n        if proportion:\n            # TODO this should be done in C also\n            all_samples = np.array(list({u for s in sample_sets for u in s}))\n            denominator = self.segregating_sites(\n                sample_sets=all_samples,\n                windows=windows,\n                mode=mode,\n                span_normalise=span_normalise,\n            )\n            # the shapes of out and denominator should be the same except that\n            # out may have an extra dimension if indexes is not None\n            if indexes is not None and not isinstance(denominator, float):\n                oshape = list(out.shape)\n                oshape[-1] = 1\n                denominator = denominator.reshape(oshape)\n            with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n                out /= denominator\n\n        return out\n\n    def genetic_relatedness_matrix(\n        self,\n        sample_sets=None,\n        *,\n        windows=None,\n        num_threads=0,\n        mode=None,\n        span_normalise=True,\n    ):\n        \"\"\"\n        Computes the full matrix of pairwise genetic relatedness values\n        between (and within) pairs of sets of nodes from ``sample_sets``.\n        Returns a numpy array indexed by (window, sample_set, sample_set):\n        the [k,i,j]th value of the result gives the\n        genetic relatedness between pairs of samples from the i-th and j-th\n        sample sets in the k-th window.\n        This is (usually) more efficient than computing many pairwise\n        values using the `indexes` argument to :meth:`.genetic_relatedness`.\n        Specifically, this computes :meth:`.genetic_relatedness` with\n        ``centre=True`` and ``proportion=False`` (with caveats, see below).\n\n        *Warning:* in some cases, this does not compute exactly the same thing as\n        :meth:`.genetic_relatedness`: see below for more details.\n\n        If `mode=\"branch\"`, then the value obtained is the same as that from\n        :meth:`.genetic_relatedness`, using the options `centre=True` and\n        `proportion=False`. The same is true if `mode=\"site\"` and all sites have\n        at most one mutation.\n\n        However, if some sites have more than one mutation, the value may differ\n        from that given by :meth:`.genetic_relatedness`:, although if the proportion\n        of such sites is small, the difference will be small.\n        The reason is that this function (for efficiency) computes relatedness\n        using :meth:`.divergence_matrix` and the following relationship.\n        \"Relatedness\" measures the number of *shared* alleles (or branches),\n        while \"divergence\" measures the number of *non-shared* alleles (or branches).\n        Let :math:`T_i` be the total distance from sample :math:`i` up to the root;\n        then if :math:`D_{ij}` is the branch-mode divergence between :math:`i` and\n        :math:`j` and :math:`R_{ij}` is the branch-mode relatedness between :math:`i`\n        and :math:`j`, then :math:`T_i + T_j = D_{ij} + 2 R_{ij}.`\n        So, for any samples :math:`I`, :math:`J`, :math:`S`, :math:`T`\n        (that may now be random choices),\n        :math:`R_{IJ}-R_{IS}-R_{JT}+R_{ST} = (D_{IJ}-D_{IS}-D_{JT}+D_{ST})/ (-2)`.\n        This is exactly what we want for (centered) relatedness.\n        However, this relationship does not necessarily hold for `mode=\"site\"`:\n        it does hold if we can treat \"number of differing alleles\" as distances\n        on the tree, but this is not necessarily the case in the presence of\n        multiple mutations.\n\n        Another note regarding the above relationship between :math:`R` and :math:`D`\n        is that :meth:`.divergence` of a sample set to itself does not include\n        the \"self\" comparisons (so as to provide an unbiased estimator of a\n        population quantity), while the usual definition of genetic relatedness\n        *does* include such comparisons (to provide, for instance, an appropriate\n        value for prospective results beginning with only a given set of\n        individuals). So, diagonal entries in the relatedness matrix returned here\n        are obtained from :meth:`divergence_matrix` after first correcting\n        diagonals to include these \"self\" comparisons.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: An array indexed by (window, sample_set, sample_set), or if windows is\n            `None`, an array indexed by (sample_set, sample_set).\n        \"\"\"\n        # Further notes on the relationship between relatedness (R)\n        # and divergence (D) in mode=\"site\":\n        # The summary function for divergence is \"p (1-q)\",\n        # where p and q are the allele frequencies in the two sample sets;\n        # while for relatedness it is \"pq\". Summing across *all* alleles,\n        # we get that relatedness plus divergence is\n        # p1 (1-q1) + p1 q1 + ... + pk (1-qk) + pk qk = p1 + ... + pk = 1 .\n        # This implies that\n        # ts.divergence(..., span_normalise=False)\n        # + ts.genetic_relatedness(..., span_normalise=False, centre=False,\n        #       proportion=False, polarised=False)\n        # == ts.num_sites\n        # This could be the basis for a similar relationship between R and D.\n        # However, that relationship holds only with polarised=False, which is not\n        # the default, or what this function does (for good reason).\n        # So, without setting polarised=False, we have that that for samples i and j,\n        # divergence plus relatedness is equal to (something like)\n        # the total number of sites at which both i and j are ancestral;\n        # this depends on the samples and so does not cancel out of the centred\n        # version. We could work through these relationships to figure out what exactly\n        # the difference between genetic_relatedness_matrix(mode=\"site\") and\n        # genetic_relatedness(mode=\"site\") is, in the general case of multiple\n        # mutations... but that would be confusing, probably not that useful,\n        # and the short version of all this is that \"it's complicated\".\n\n        D = self.divergence_matrix(\n            sample_sets,\n            windows=windows,\n            num_threads=num_threads,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n        if sample_sets is None:\n            n = np.ones(self.num_samples)\n        else:\n            n = np.array([len(x) for x in sample_sets])\n\n        def _normalise(B):\n            if len(B) == 0:\n                return B\n            # correct for lack of self comparisons in divergence\n            np.fill_diagonal(B, np.diag(B) * (n - 1) / n)\n            K = B + np.mean(B)\n            y = np.mean(B, axis=0)\n            X = y[:, np.newaxis] + y[np.newaxis, :]\n            K -= X\n            return K / -2\n\n        if windows is None:\n            return _normalise(D)\n        else:\n            for j in range(D.shape[0]):\n                D[j] = _normalise(D[j])\n        return D\n\n    def genetic_relatedness_weighted(\n        self,\n        W,\n        indexes=None,\n        windows=None,\n        mode=\"site\",\n        span_normalise=True,\n        polarised=False,\n        centre=True,\n    ):\n        r\"\"\"\n        Computes weighted genetic relatedness. If the :math:`k` th pair of indices\n        is (i, j) then the :math:`k` th column of output will be\n        :math:`\\sum_{a,b} W_{ai} W_{bj} C_{ab}`,\n        where :math:`W` is the matrix of weights, and :math:`C_{ab}` is the\n        :meth:`genetic_relatedness <.TreeSequence.genetic_relatedness>` between sample\n        a and sample b, summing over all pairs of samples in the tree sequence.\n\n        *Note:* the genetic relatedness matrix :math:`C` here is as returned by\n        :meth:`.genetic_relatedness`, rather than by :meth:`.genetic_relatedness_matrix`\n        (see the latter's documentation for the difference).\n\n        :param numpy.ndarray W: An array of values with one row for each sample node and\n            one column for each set of weights.\n        :param list indexes: A list of 2-tuples, or None (default). Note that if\n            indexes = None, then W must have exactly two columns and this is equivalent\n            to indexes = [(0,1)].\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :param bool polarised: Whether to leave the ancestral state out of computations:\n            see :ref:`sec_stats` for more details. Defaults to True.\n        :param bool centre: Defaults to True. Whether to 'centre' the result, as\n            described above (the usual definition is centred).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n        \"\"\"\n        if len(W) != self.num_samples:\n            raise ValueError(\"First trait dimension must be equal to number of samples.\")\n        return self.__k_way_weighted_stat(\n            self._ll_tree_sequence.genetic_relatedness_weighted,\n            2,\n            W,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n            polarised=polarised,\n            centre=centre,\n        )\n\n    def genetic_relatedness_vector(\n        self,\n        W,\n        windows=None,\n        mode=\"site\",\n        span_normalise=True,\n        centre=True,\n        nodes=None,\n    ):\n        r\"\"\"\n        Computes the product of the genetic relatedness matrix and a vector of weights\n        (one per sample). The output is a (num windows) x (num samples) x (num weights)\n        array whose :math:`(w,i,j)`-th element is :math:`\\sum_{b} W_{bj} C_{ib}`,\n        where :math:`W` is the matrix of weights, and :math:`C_{ab}` is the\n        :meth:`genetic_relatedness <.TreeSequence.genetic_relatedness>` between sample\n        `a` and sample `b` in window `w`, and the sum is over all samples in the tree\n        sequence.  Like other statistics, if windows is None, the first dimension in\n        the output is dropped.\n\n        The relatedness used here corresponds to `polarised=True`; no unpolarised option\n        is available for this method.\n\n        Optionally, you may provide a list of focal nodes that modifies the behavior\n        as follows. If `nodes` is a list of `n` node IDs (that do not need to be\n        samples), then the output will have dimension (num windows) x n x (num weights),\n        and the matrix :math:`C` used in the definition above is the rectangular matrix\n        with :math:`C_{ij}` the relatedness between `nodes[i]` and `samples[j]`. This\n        can only be used with `centre=False`; if relatedness between uncentred nodes\n        and centred samples is desired, then simply subtract column means from `W` first.\n        The default is `nodes=None`, which is equivalent to setting `nodes` equal to\n        `ts.samples()`.\n\n        :param numpy.ndarray W: An array of values with one row for each sample node and\n            one column for each set of weights.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :param bool centre: Whether to use the *centred* relatedness matrix or not:\n            see :meth:`genetic_relatedness <.TreeSequence.genetic_relatedness>`.\n        :param list nodes: Optionally, a list of focal nodes as described above\n            (default: None).\n        :return: A ndarray with shape equal to (num windows, num samples, num weights),\n            or (num samples, num weights) if windows is None.\n        \"\"\"\n        if (not hasattr(W, \"__len__\")) or (len(W) != self.num_samples):\n            raise ValueError(\n                \"First weight dimension must be equal to number of samples.\"\n            )\n\n        out = self.__weighted_vector_stat(\n            self._ll_tree_sequence.genetic_relatedness_vector,\n            W,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n            centre=centre,\n            nodes=nodes,\n        )\n        return out\n\n    def _expand_indices(self, x: np.ndarray, indices: np.ndarray) -> np.ndarray:\n        y = np.zeros((self.num_samples, x.shape[1]))\n        y[indices] = x\n\n        return y\n\n    def _genetic_relatedness_vector_node(\n        self,\n        arr: np.ndarray,\n        indices: np.ndarray,\n        mode: str,\n        centre: bool = True,\n        windows=None,\n    ) -> np.ndarray:\n        x = arr - arr.mean(axis=0) if centre else arr\n        x = self._expand_indices(x, indices)\n        x = self.genetic_relatedness_vector(\n            W=x,\n            windows=windows,\n            mode=mode,\n            centre=False,\n            nodes=indices,\n            span_normalise=False,  # <- non-default!\n        )[0]\n        x = x - x.mean(axis=0) if centre else x\n\n        return x\n\n    def _genetic_relatedness_vector_individual(\n        self,\n        arr: np.ndarray,\n        indices: np.ndarray,\n        mode: str,\n        centre: bool = True,\n        windows=None,\n    ) -> np.ndarray:\n        ij = np.vstack(\n            [[n, k] for k, i in enumerate(indices) for n in self.individual(i).nodes]\n        )\n        samples, sample_individuals = (\n            ij[:, 0],\n            ij[:, 1],\n        )\n        ploidy = np.bincount(sample_individuals)\n        x = arr - arr.mean(axis=0) if centre else arr\n        x = x[sample_individuals] / ploidy[sample_individuals, np.newaxis]\n        x = self._expand_indices(x, samples)\n        x = self.genetic_relatedness_vector(\n            W=x,\n            windows=windows,\n            mode=mode,\n            centre=False,\n            nodes=samples,\n            span_normalise=False,  # <- non-default!\n        )[0]\n\n        def bincount_fn(w):\n            return np.bincount(sample_individuals, w) / ploidy\n\n        x = np.apply_along_axis(bincount_fn, axis=0, arr=x)\n        x = x - x.mean(axis=0) if centre else x  # centering within index in cols\n\n        return x\n\n    def pca(\n        self,\n        num_components: int,\n        windows: list = None,\n        samples: np.ndarray = None,\n        individuals: np.ndarray = None,\n        time_windows: np.ndarray = None,\n        mode: str = \"branch\",\n        centre: bool = True,\n        num_iterations: int = 5,\n        num_oversamples: int = None,\n        random_seed: int = None,\n        range_sketch: np.ndarray = None,\n    ) -> (np.ndarray, np.ndarray, np.ndarray):\n        \"\"\"\n        Performs principal component analysis (PCA) for a given set of samples or\n        individuals (default: all samples). The principal components are the\n        eigenvectors of the genetic relatedness matrix, which are obtained by a\n        randomized singular value decomposition (rSVD) algorithm.\n\n        Concretely, take :math:`M` as the matrix of non-span-normalised\n        genetic relatedness values, for instance obtained by\n        setting :math:`M_{ij}` to be the :meth:`~.TreeSequence.genetic_relatedness`\n        between sample :math:`i` and sample :math:`j` with the specified ``mode``,\n        ``proportion=False`` and ``span_normalise=False``. Then by default this\n        returns the top ``num_components`` eigenvectors of :math:`M`, so that\n        ``output.factors[i,k]`` is the position of sample `i` on the `k` th PC.\n        If ``samples`` or ``individuals`` are provided, then this does the same\n        thing, except with :math:`M_{ij}` either the relatedness between\n        ``samples[i]`` and ``samples[j]`` or the average relatedness between the\n        nodes of ``individuals[i]`` and ``individuals[j]``, respectively.\n        Factors are normalized to have norm 1, i.e.,\n        ``output.factors[:,k] ** 2).sum() == 1)`` for any ``k``.\n\n        The parameters ``centre`` and ``mode`` are passed to\n        :meth:`~.TreeSequence.genetic_relatedness`: the default ``centre=True`` results\n        in factors whose elements sum to zero; ``mode`` currently only supports the\n        ``\"branch\"`` setting. If ``windows`` are provided then PCA is carried out\n        separately in each genomic window. If ``time_windows`` is provided, then genetic\n        relatedness is measured using only ancestral material within the given time\n        window (see :meth:`decapitate <.TreeSequence.decapitate>` for how this is\n        defined).\n\n        So that the method scales to large tree sequences, the underlying method\n        relies on a randomized SVD algorithm, using\n        :meth:`genetic_relatedness_vector <.TreeSequence.genetic_relatedness_vector>`).\n        Larger values of ``num_iterations`` and\n        ``num_oversamples`` should produce better approximations to the true eigenvalues,\n        at the cost of greater compute times and/or memory usage. The method relies on\n        constructing ``range_sketch``, a low-dimensional approximation to the range\n        of :math:`M`, so that the result of a previous call to ``pca()`` may be passed\n        in.\n\n        To check for convergence, compare\n        ``pc1 = ts.pca()`` and ``pc2 = ts.pca(range_sketch=pc1.range_sketch)``; the\n        difference between ``pc1.factors`` and ``pc2.factors`` provides a\n        diagnostic of the convergence of the algorithm (i.e., if they are close\n        then it has likely converged). Alternatively, the output value of ``error_bound``\n        gives an approximate upper bound for the spectral norm of the difference\n        between :math:`M` and the projection of :math:`M` into the space spanned by\n        the columns of ``range_sketch``.\n        Algorithms are based on Algorithms 8\n        and 9 in Martinsson and Tropp, https://arxiv.org/pdf/2002.01387 .\n\n        :param int num_components: Number of principal components to return.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in (default: the entire genome).\n        :param numpy.ndarray samples: Samples to perform PCA with (default: all samples).\n        :param numpy.ndarray individuals: Individuals to perform PCA with. Cannot specify\n            both ``samples`` and ``individuals``.\n        :param numpy.ndarray time_windows: The time interval on which to apply PCA:\n            currently, this must be either None (default, covers all time)\n            or a single interval.\n        :param str mode: A string giving the \"type\" of relatedness to be computed\n            (defaults to \"branch\"; see\n            :meth:`genetic_relatedness_vector\n            <.TreeSequence.genetic_relatedness_vector>`).\n        :param bool centre: Whether to centre the genetic relatedness matrix.\n        :param int num_iterations: Number of power iterations used in the range finding\n            algorithm.\n        :param int num_oversamples: Number of additional test vectors (default: 10).\n            Cannot specify along with range_sketch.\n        :param int random_seed: The random seed. If this is None, a random seed will\n            be automatically generated. Valid random seeds are between 1 and\n            :math:`2^32 − 1`. Only used if `range_sketch` is not provided.\n        :param numpy.ndarray range_sketch: Sketch matrix for each window. Default is\n            randomly generated; cannot specify along with num_oversamples.\n        :return: A :class:`PCAResult` object, containing estimated principal components,\n            eigenvalues, and other information:\n            the principal component loadings are in PCAResult.factors\n            and the principal values are in PCAResult.eigenvalues.\n        \"\"\"\n\n        if (not isinstance(num_iterations, int)) or num_iterations < 1:\n            raise ValueError(\"num_iterations should be a positive integer.\")\n\n        if samples is None and individuals is None:\n            samples = self.samples()\n\n        if samples is not None and individuals is not None:\n            raise ValueError(\"Samples and individuals cannot be used at the same time\")\n        elif samples is not None:\n            output_type = \"node\"\n            dim = len(samples)\n        else:\n            assert individuals is not None\n            output_type = \"individual\"\n            dim = len(individuals)\n\n        if time_windows is None:\n            tree_sequence_low, tree_sequence_high = None, self\n        else:\n            assert time_windows[0] < time_windows[1], (\n                \"The second argument should be larger.\"\n            )\n            tree_sequence_low, tree_sequence_high = (\n                self.decapitate(time_windows[0]),\n                self.decapitate(time_windows[1]),\n            )\n\n        drop_windows = windows is None\n        windows = self.parse_windows(windows)\n        num_windows = len(windows) - 1\n        if num_windows < 1:\n            raise ValueError(\"Must have at least one window.\")\n\n        if num_components > dim:\n            raise ValueError(\n                \"Number of components must be less than or equal to \"\n                \"the number of samples (or individuals, if specified).\"\n            )\n\n        if num_oversamples is not None and range_sketch is not None:\n            raise ValueError(\"Cannot specify both num_oversamples and range_sketch.\")\n\n        if range_sketch is None:\n            if num_oversamples is None:\n                num_oversamples = min(10, dim - num_components)\n\n            rng = np.random.default_rng(random_seed)\n            range_sketch = rng.normal(\n                size=(num_windows, dim, num_components + num_oversamples)\n            )\n        else:\n            if drop_windows:\n                range_sketch = np.expand_dims(range_sketch, 0)\n            if range_sketch.shape[-1] < num_components:\n                raise ValueError(\n                    \"range_sketch must have at least as many columns as num_components\"\n                )\n            num_oversamples = range_sketch.shape[-1] - num_components\n\n        num_vectors = num_components + num_oversamples\n        if num_vectors > dim:\n            raise ValueError(\n                \"Number of columns in range_sketch \"\n                \"(num_components + num_oversamples) must be less\"\n                \" than or equal to the number of samples\"\n                \" (or individuals, if specified).\"\n            )\n        rs_exp_dims = (num_windows, dim, num_vectors)\n        rs_obs_dims = range_sketch.shape\n        if rs_obs_dims != rs_exp_dims:\n            if drop_windows:\n                rs_obs_dims = rs_obs_dims[1:]\n                rs_exp_dims = rs_exp_dims[1:]\n            raise ValueError(\n                \"Incorrect shape of range_sketch:\"\n                f\" expected {rs_exp_dims}; got {rs_obs_dims}.\"\n            )\n\n        def _rand_pow_range_finder(\n            operator,\n            operator_dim: int,\n            rank: int,\n            depth: int,\n            num_vectors: int,\n            Q: np.ndarray,\n        ) -> np.ndarray:\n            \"\"\"\n            Algorithm 9 in https://arxiv.org/pdf/2002.01387\n            \"\"\"\n            assert num_vectors >= rank > 0, \"num_vectors should not be smaller than rank\"\n            for _ in range(depth):\n                Q = np.linalg.qr(Q)[0]\n                Q = operator(Q)\n            Q = np.linalg.qr(Q)[0]\n            return Q\n\n        def _rand_svd(\n            operator,\n            operator_dim: int,\n            rank: int,\n            depth: int,\n            num_vectors: int,\n            range_sketch: np.ndarray,\n        ) -> (np.ndarray, np.ndarray, np.ndarray, float):\n            \"\"\"\n            Algorithm 8 in https://arxiv.org/pdf/2002.01387\n            \"\"\"\n            assert num_vectors >= rank > 0\n            Q = _rand_pow_range_finder(\n                operator,\n                operator_dim,\n                rank=num_vectors,\n                depth=depth,\n                num_vectors=num_vectors,\n                Q=range_sketch,\n            )\n            C = operator(Q).T\n            U_hat, D, _ = np.linalg.svd(C, full_matrices=False)\n            U = Q @ U_hat\n\n            error_factor = np.power(\n                1 + 4 * np.sqrt(2 * operator_dim / max(1, (rank - 1))),\n                1 / (2 * depth + 1),\n            )\n            error_bound = D[rank] * (1 + error_factor)\n            return U[:, :rank], D[:rank], Q, error_bound\n\n        _f_high = (\n            tree_sequence_high._genetic_relatedness_vector_node\n            if output_type == \"node\"\n            else tree_sequence_high._genetic_relatedness_vector_individual\n        )\n        if time_windows is not None:\n            _f_low = (\n                tree_sequence_low._genetic_relatedness_vector_node\n                if output_type == \"node\"\n                else tree_sequence_low._genetic_relatedness_vector_individual\n            )\n        indices = samples if output_type == \"node\" else individuals\n\n        U = np.empty((num_windows, dim, num_components))\n        D = np.empty((num_windows, num_components))\n        Q = np.empty((num_windows, dim, num_vectors))\n        E = np.empty(num_windows)\n        for i in range(num_windows):\n\n            def _G(x, i=i):\n                high = _f_high(\n                    arr=x,\n                    indices=indices,\n                    mode=mode,\n                    centre=centre,\n                    windows=windows[i : i + 2],\n                )\n                if time_windows is None:\n                    return high\n                else:\n                    low = _f_low(\n                        arr=x,\n                        indices=indices,\n                        mode=mode,\n                        centre=centre,\n                        windows=windows[i : i + 2],\n                    )\n                    return high - low\n\n            U[i], D[i], Q[i], E[i] = _rand_svd(\n                operator=_G,\n                operator_dim=dim,\n                rank=num_components,\n                depth=num_iterations,\n                num_vectors=num_vectors,\n                range_sketch=range_sketch[i],\n            )\n\n        if drop_windows:\n            U, D, Q, E = U[0], D[0], Q[0], E[0]\n\n        pca_result = PCAResult(factors=U, eigenvalues=D, range_sketch=Q, error_bound=E)\n\n        return pca_result\n\n    def trait_covariance(self, W, windows=None, mode=\"site\", span_normalise=True):\n        \"\"\"\n        Computes the mean squared covariances between each of the columns of ``W``\n        (the \"phenotypes\") and inheritance along the tree sequence.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        Operates on all samples in the tree sequence.\n\n        Concretely, if `g` is a binary vector that indicates inheritance from an allele,\n        branch, or node and `w` is a column of W, normalised to have mean zero,\n        then the covariance of `g` and `w` is :math:`\\\\sum_i g_i w_i`, the sum of the\n        weights corresponding to entries of `g` that are `1`. Since weights sum to\n        zero, this is also equal to the sum of weights whose entries of `g` are 0.\n        So, :math:`cov(g,w)^2 = ((\\\\sum_i g_i w_i)^2 + (\\\\sum_i (1-g_i) w_i)^2)/2`.\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            The sum of squared covariances between presence/absence of each allele and\n            phenotypes, divided by length of the window (if ``span_normalise=True``).\n            This is computed as sum_a (sum(w[a])^2 / 2), where\n            w is a column of W with the average subtracted off,\n            and w[a] is the sum of all entries of w corresponding to samples\n            carrying allele \"a\", and the first sum is over all alleles.\n\n        \"branch\"\n            The sum of squared covariances between the split induced by each branch and\n            phenotypes, multiplied by branch length, averaged across trees in\n            the window. This is computed as above: a branch with total weight\n            w[b] below b contributes (branch length) * w[b]^2 to the total\n            value for a tree. (Since the sum of w is zero, the total weight\n            below b and not below b are equal, canceling the factor of 2\n            above.)\n\n        \"node\"\n            For each node, the squared covariance between the property of\n            inheriting from this node and phenotypes, computed as in \"branch\".\n\n        :param numpy.ndarray W: An array of values with one row for each sample and one\n            column for each \"phenotype\".\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If windows=None and W is a single column, a numpy scalar is returned.\n        \"\"\"\n        if W.shape[0] != self.num_samples:\n            raise ValueError(\"First trait dimension must be equal to number of samples.\")\n        return self.__run_windowed_stat(\n            windows,\n            self._ll_tree_sequence.trait_covariance,\n            W,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def trait_correlation(self, W, windows=None, mode=\"site\", span_normalise=True):\n        \"\"\"\n        Computes the mean squared correlations between each of the columns of ``W``\n        (the \"phenotypes\") and inheritance along the tree sequence.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        Operates on all samples in the tree sequence.\n\n        This is computed as squared covariance in\n        :meth:`trait_covariance <.TreeSequence.trait_covariance>`,\n        but divided by :math:`p (1-p)`, where `p` is the proportion of samples\n        inheriting from the allele, branch, or node in question.\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            The sum of squared correlations between presence/absence of each allele and\n            phenotypes, divided by length of the window (if ``span_normalise=True``).\n            This is computed as the\n            :meth:`trait_covariance <.TreeSequence.trait_covariance>`\n            divided by the variance of the relevant column of W\n            and by :math:`p * (1 - p)`, where :math:`p` is the allele frequency.\n\n        \"branch\"\n            The sum of squared correlations between the split induced by each branch and\n            phenotypes, multiplied by branch length, averaged across trees in\n            the window. This is computed as the\n            :meth:`trait_covariance <.TreeSequence.trait_covariance>`,\n            divided by the variance of the column of w\n            and by :math:`p * (1 - p)`, where :math:`p` is the proportion of\n            the samples lying below the branch.\n\n        \"node\"\n            For each node, the squared correlation between the property of\n            inheriting from this node and phenotypes, computed as in \"branch\".\n\n        Note that above we divide by the **sample** variance, which for a\n        vector x of length n is ``np.var(x) * n / (n-1)``.\n\n        :param numpy.ndarray W: An array of values with one row for each sample and one\n            column for each \"phenotype\". Each column must have positive standard\n            deviation.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If windows=None and W is a single column, a numpy scalar is returned.\n        \"\"\"\n        if W.shape[0] != self.num_samples:\n            raise ValueError(\"First trait dimension must be equal to number of samples.\")\n        sds = np.std(W, axis=0)\n        if np.any(sds == 0):\n            raise ValueError(\n                \"Weight columns must have positive variance\", \"to compute correlation.\"\n            )\n        return self.__run_windowed_stat(\n            windows,\n            self._ll_tree_sequence.trait_correlation,\n            W,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def trait_regression(self, *args, **kwargs):\n        \"\"\"\n        Deprecated synonym for\n        :meth:`trait_linear_model <.TreeSequence.trait_linear_model>`.\n        \"\"\"\n        warnings.warn(\n            \"This is deprecated: please use trait_linear_model( ) instead.\",\n            FutureWarning,\n            stacklevel=4,\n        )\n        return self.trait_linear_model(*args, **kwargs)\n\n    def trait_linear_model(\n        self,\n        W,\n        Z=None,\n        windows=None,\n        mode=\"site\",\n        span_normalise=True,\n    ):\n        \"\"\"\n        Finds the relationship between trait and genotype after accounting for\n        covariates.  Concretely, for each trait w (i.e., each column of W),\n        this does a least-squares fit of the linear model :math:`w \\\\sim g + Z`,\n        where :math:`g` is inheritance in the tree sequence (e.g., genotype)\n        and the columns of :math:`Z` are covariates, and returns the squared\n        coefficient of :math:`g` in this linear model.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        Operates on all samples in the tree sequence.\n\n        To do this, if `g` is a binary vector that indicates inheritance from an allele,\n        branch, or node and `w` is a column of W, there are :math:`k` columns of\n        :math:`Z`, and the :math:`k+2`-vector :math:`b` minimises\n        :math:`\\\\sum_i (w_i - b_0 - b_1 g_i - b_2 z_{2,i} - ... b_{k+2} z_{k+2,i})^2`\n        then this returns the number :math:`b_1^2`. If :math:`g` lies in the linear span\n        of the columns of :math:`Z`, then :math:`b_1` is set to 0. To fit the\n        linear model without covariates (only the intercept), set `Z = None`.\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            Computes the sum of :math:`b_1^2/2` for each allele in the window,\n            as above with :math:`g` indicating presence/absence of the allele,\n            then divided by the length of the window if ``span_normalise=True``.\n            (For biallelic loci, this number is the same for both alleles, and so summing\n            over each cancels the factor of two.)\n\n        \"branch\"\n            The squared coefficient :math:`b_1^2`, computed for the split induced by each\n            branch (i.e., with :math:`g` indicating inheritance from that branch),\n            multiplied by branch length and tree span, summed over all trees\n            in the window, and divided by the length of the window if\n            ``span_normalise=True``.\n\n        \"node\"\n            For each node, the squared coefficient :math:`b_1^2`, computed for\n            the property of inheriting from this node, as in \"branch\".\n\n        :param numpy.ndarray W: An array of values with one row for each sample and one\n            column for each \"phenotype\".\n        :param numpy.ndarray Z: An array of values with one row for each sample and one\n            column for each \"covariate\", or `None`. Columns of `Z` must be linearly\n            independent.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If windows=None and W is a single column, a numpy scalar is returned.\n        \"\"\"\n        if W.shape[0] != self.num_samples:\n            raise ValueError(\"First trait dimension must be equal to number of samples.\")\n        if Z is None:\n            Z = np.ones((self.num_samples, 1))\n        else:\n            tZ = np.column_stack([Z, np.ones((Z.shape[0], 1))])\n            if np.linalg.matrix_rank(tZ) == tZ.shape[1]:\n                Z = tZ\n        if Z.shape[0] != self.num_samples:\n            raise ValueError(\"First dimension of Z must equal the number of samples.\")\n        if np.linalg.matrix_rank(Z) < Z.shape[1]:\n            raise ValueError(\"Matrix of covariates is computationally singular.\")\n        # numpy returns a lower-triangular cholesky\n        K = np.linalg.cholesky(np.matmul(Z.T, Z)).T\n        Z = np.matmul(Z, np.linalg.inv(K))\n        return self.__run_windowed_stat(\n            windows,\n            self._ll_tree_sequence.trait_linear_model,\n            W,\n            Z,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def segregating_sites(\n        self, sample_sets=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes the density of segregating sites for each of the sets of nodes\n        from ``sample_sets``, and related quantities.\n        Please see the :ref:`one-way statistics <sec_stats_sample_sets_one_way>`\n        section for details on how the ``sample_sets`` argument is interpreted\n        and how it interacts with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`, :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``. For a sample set ``A``, computes:\n\n        \"site\"\n            The sum over sites of [the number of alleles found in ``A`` at each site\n            minus one], per unit of chromosome length.\n            If all sites have at most two alleles in ``A``,\n            this is the density of segregating/polymorphic sites in ``A``\n            (since the \"minus one\" reduces the sum for monoallelic sites).\n            For sites with more than two alleles, the sum is increased by\n            one for each additional allele segregating in ``A``.\n            To get the **number** of segregating alleles in ``A``,\n            use ``span_normalise=False``.\n\n        \"branch\"\n            The total length of all branches in the tree subtended by the samples in\n            ``A``, averaged across the window.\n\n        \"node\"\n            The proportion of the window on which the node is ancestral to some,\n            but not all, of the samples in ``A``.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one sample set and windows=None, a numpy scalar is returned.\n        \"\"\"\n        return self.__one_way_sample_set_stat(\n            self._ll_tree_sequence.segregating_sites,\n            sample_sets,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def allele_frequency_spectrum(\n        self,\n        sample_sets=None,\n        windows=None,\n        time_windows=None,\n        mode=\"site\",\n        span_normalise=True,\n        polarised=False,\n    ):\n        \"\"\"\n        Computes the allele frequency spectrum (AFS) in windows across the genome for\n        with respect to the specified ``sample_sets``.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`sample sets <sec_stats_sample_sets>`,\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        :ref:`polarised <sec_stats_polarisation>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        and see :ref:`sec_tutorial_afs` for examples of how to use this method.\n\n        Similar to other windowed stats, the first dimension in the returned array\n        corresponds to windows, such that ``result[i]`` is the AFS in the ith\n        window. The AFS in each window is a k-dimensional numpy array, where k is\n        the number of input sample sets, such that ``result[i, j0, j1, ...]`` is the\n        value associated with frequency ``j0`` in ``sample_sets[0]``, ``j1`` in\n        ``sample_sets[1]``, etc, in window ``i``. From here, we will assume that\n        ``afs`` corresponds to the result in a single window, i.e.,\n        ``afs = result[i]``.\n\n        If a single sample set is specified, the allele frequency spectrum within\n        this set is returned, such that ``afs[j]`` is the value associated with\n        frequency ``j``. Thus, singletons are counted in ``afs[1]``, doubletons in\n        ``afs[2]``, and so on. The zeroth entry counts alleles or branches not\n        seen in the samples but that are polymorphic among the rest of the samples\n        of the tree sequence; likewise, the last entry counts alleles fixed in\n        the sample set but polymorphic in the entire set of samples. Please see\n        the :ref:`sec_tutorial_afs_zeroth_entry` for an illustration.\n\n        .. warning:: Please note that singletons are **not** counted in the initial\n            entry in each AFS array (i.e., ``afs[0]``), but in ``afs[1]``.\n\n        If ``sample_sets`` is None (the default), the allele frequency spectrum\n        for all samples in the tree sequence is returned. For convenience, if\n        there is only a single sample set, the outer list may be omitted (so that,\n        unlike other statistics, ``sample_sets=[0,1,2]`` is equivalent to\n        ``sample_sets=[[0,1,2]]``).\n\n        If more than one sample set is specified, the **joint** allele frequency\n        spectrum within windows is returned. For example, if we set\n        ``sample_sets = [S0, S1]``, then afs[1, 2] counts the number of sites that\n        are singletons within S0 and doubletons in S1. The dimensions of the\n        output array will be ``[num_windows] + [1 + len(S) for S in sample_sets]``.\n\n        If ``polarised`` is False (the default) the AFS will be *folded*, so that\n        the counts do not depend on knowing which allele is ancestral. If folded,\n        the frequency spectrum for a single sample set ``S`` has ``afs[j] = 0`` for\n        all ``j > len(S) / 2``, so that alleles at frequency ``j`` and ``len(S) - j``\n        both add to the same entry. If there is more than one sample set, the\n        returned array is \"lower triangular\" in a similar way. For more details,\n        especially about handling of multiallelic sites, see :ref:`sec_stats_notes_afs`.\n\n        What is computed depends on ``mode``:\n\n        \"site\"\n            The number of alleles at a given frequency within the specified sample\n            sets for each window, per unit of sequence length. To obtain the total\n            number of alleles, set ``span_normalise`` to False.\n\n        \"branch\"\n            The total length of branches in the trees subtended by subsets of the\n            specified sample sets, per unit of sequence length. To obtain the\n            total, set ``span_normalise`` to False.\n\n        \"node\"\n            Not supported for this method (raises a ValueError).\n\n        For example, suppose that `S0` is a list of 5 sample IDs, and `S1` is\n        a list of 3 other sample IDs. Then `afs = ts.allele_frequency_spectrum([S0, S1],\n        mode=\"site\", span_normalise=False)` will be a 5x3 numpy array, and if\n        there are six alleles that are present in only one sample of `S0` but\n        two samples of `S1`, then `afs[1,2]` will be equal to 6.  Similarly,\n        `branch_afs = ts.allele_frequency_spectrum([S0, S1], mode=\"branch\",\n        span_normalise=False)` will also be a 5x3 array, and `branch_afs[1,2]`\n        will be the total area (i.e., length times span) of all branches that\n        are above exactly one sample of `S0` and two samples of `S1`.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of samples to compute the joint allele frequency.\n        :param list windows: An increasing list of breakpoints between windows\n            along the genome.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A (k + 1) dimensional numpy array, where k is the number of sample\n            sets specified.\n            If there is one sample set and windows=None, a 1 dimensional array is\n            returned.\n        \"\"\"\n        if sample_sets is None:\n            sample_sets = [self.samples()]\n        try:\n            # this also happens in __one_way_sample_set_stat, but we need to do\n            # slightly different pre-processing here to allow for the case that\n            # sample sets is a single list of IDs (for most stats, this would mean\n            # dropping a dimension, but not for the AFS)\n            sample_sets = np.array(sample_sets, dtype=np.uint64)\n        except ValueError:\n            pass\n        else:\n            if len(sample_sets.shape) == 1:\n                sample_sets = [sample_sets]\n        return self.__one_way_sample_set_stat(\n            self._ll_tree_sequence.allele_frequency_spectrum,\n            sample_sets,\n            windows=windows,\n            time_windows=time_windows,\n            mode=mode,\n            span_normalise=span_normalise,\n            polarised=polarised,\n        )\n\n    def Tajimas_D(self, sample_sets=None, windows=None, mode=\"site\"):\n        \"\"\"\n        Computes Tajima's D of sets of nodes from ``sample_sets`` in windows.\n        Please see the :ref:`one-way statistics <sec_stats_sample_sets_one_way>`\n        section for details on how the ``sample_sets`` argument is interpreted\n        and how it interacts with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`, :ref:`mode <sec_stats_mode>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        Operates on ``k = 1`` sample sets at a\n        time. For a sample set ``X`` of ``n`` nodes, if and ``T`` is the mean\n        number of pairwise differing sites in ``X`` and ``S`` is the number of\n        sites segregating in ``X`` (computed with :meth:`diversity\n        <.TreeSequence.diversity>` and :meth:`segregating sites\n        <.TreeSequence.segregating_sites>`, respectively, both not span\n        normalised), then Tajima's D is\n\n        .. code-block:: python\n\n            D = (T - S / h) / sqrt(a * S + (b / c) * S * (S - 1))\n            h = 1 + 1 / 2 + ... + 1 / (n - 1)\n            g = 1 + 1 / 2**2 + ... + 1 / (n - 1) ** 2\n            a = (n + 1) / (3 * (n - 1) * h) - 1 / h**2\n            b = 2 * (n**2 + n + 3) / (9 * n * (n - 1)) - (n + 2) / (h * n) + g / h**2\n            c = h**2 + g\n\n        What is computed for diversity and segregating sites depends on ``mode``;\n        see those functions for more details.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one sample set and windows=None, a numpy scalar is returned.\n        \"\"\"\n\n        # TODO this should be done in C as we'll want to support this method there.\n        def tjd_func(sample_set_sizes, flattened, **kwargs):\n            n = sample_set_sizes\n            T = self.ll_tree_sequence.diversity(n, flattened, **kwargs)\n            S = self.ll_tree_sequence.segregating_sites(n, flattened, **kwargs)\n            h = np.array([np.sum(1 / np.arange(1, nn)) for nn in n])\n            g = np.array([np.sum(1 / np.arange(1, nn) ** 2) for nn in n])\n            with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                a = (n + 1) / (3 * (n - 1) * h) - 1 / h**2\n                b = 2 * (n**2 + n + 3) / (9 * n * (n - 1)) - (n + 2) / (h * n) + g / h**2\n                D = (T - S / h) / np.sqrt(a * S + (b / (h**2 + g)) * S * (S - 1))\n            return D\n\n        return self.__one_way_sample_set_stat(\n            tjd_func, sample_sets, windows=windows, mode=mode, span_normalise=False\n        )\n\n    def Fst(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes \"windowed\" Fst between pairs of sets of nodes from ``sample_sets``.\n        Operates on ``k = 2`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        For sample sets ``X`` and ``Y``, if ``d(X, Y)`` is the\n        :meth:`divergence <.TreeSequence.divergence>`\n        between ``X`` and ``Y``, and ``d(X)`` is the\n        :meth:`diversity <.TreeSequence.diversity>` of ``X``, then what is\n        computed is\n\n        .. code-block:: python\n\n            Fst = 1 - 2 * (d(X) + d(Y)) / (d(X) + 2 * d(X, Y) + d(Y))\n\n        What is computed for diversity and divergence depends on ``mode``;\n        see those functions for more details.\n\n        For ``mode='site'``, this definition of Fst appears as equation (6) in\n        `Slatkin (1991) <https://doi.org/10.1017/S0016672300029827>`_, and\n        is also found as equation (9) in\n        `Nei (1973) <https://doi.org/10.1073/pnas.70.12.3321>`_.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 2-tuples.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one pair of sample sets and windows=None, a numpy scalar is\n            returned.\n        \"\"\"\n        # TODO this should really be implemented in C (presumably C programmers will want\n        # to compute Fst too), but in the mean time implementing using the low-level\n        # calls has two advantages: (a) we automatically change dimensions like the other\n        # two-way stats and (b) it's a bit more efficient because we're not messing\n        # around with indexes and samples sets twice.\n\n        def fst_func(\n            sample_set_sizes,\n            flattened,\n            indexes,\n            windows,\n            mode,\n            span_normalise,\n            polarised,\n            centre,\n        ):\n            # note: this is kinda hacky - polarised and centre are not used here -\n            # but this seems necessary to use our __k_way_sample_set_stat framework\n            divergences = self._ll_tree_sequence.divergence(\n                sample_set_sizes,\n                flattened,\n                indexes=indexes,\n                windows=windows,\n                mode=mode,\n                span_normalise=span_normalise,\n                polarised=polarised,\n                centre=centre,\n            )\n            diversities = self._ll_tree_sequence.diversity(\n                sample_set_sizes,\n                flattened,\n                windows=windows,\n                mode=mode,\n                span_normalise=span_normalise,\n                polarised=polarised,\n            )\n\n            orig_shape = divergences.shape\n            # \"node\" statistics produce a 3D array\n            if len(divergences.shape) == 2:\n                divergences.shape = (divergences.shape[0], 1, divergences.shape[1])\n                diversities.shape = (diversities.shape[0], 1, diversities.shape[1])\n\n            fst = np.repeat(1.0, np.prod(divergences.shape))\n            fst.shape = divergences.shape\n            for i, (u, v) in enumerate(indexes):\n                denom = (\n                    diversities[:, :, u]\n                    + diversities[:, :, v]\n                    + 2 * divergences[:, :, i]\n                )\n                with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n                    fst[:, :, i] -= (\n                        2 * (diversities[:, :, u] + diversities[:, :, v]) / denom\n                    )\n            fst.shape = orig_shape\n            return fst\n\n        return self.__k_way_sample_set_stat(\n            fst_func,\n            2,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def Y3(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes the 'Y' statistic between triples of sets of nodes from ``sample_sets``.\n        Operates on ``k = 3`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``. Each is an average across every\n        combination of trios of samples ``(a, b, c)``, one chosen from each sample set:\n\n        \"site\"\n            The average density of sites at which ``a`` differs from ``b`` and\n            ``c``, per unit of chromosome length.\n\n        \"branch\"\n            The average length of all branches that separate ``a`` from ``b``\n            and ``c`` (in units of time).\n\n        \"node\"\n            For each node, the average proportion of the window on which ``a``\n            inherits from that node but ``b`` and ``c`` do not, or vice-versa.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 3-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one triple of sample sets and windows=None, a numpy scalar is\n            returned.\n        \"\"\"\n        return self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.Y3,\n            3,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def Y2(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes the 'Y2' statistic between pairs of sets of nodes from ``sample_sets``.\n        Operates on ``k = 2`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``. Each is computed exactly as\n        ``Y3``, except that the average is across every possible trio of samples\n        ``(a, b1, b2)``, where ``a`` is chosen from the first sample set, and\n        ``b1, b2`` are chosen (without replacement) from the second sample set.\n        See :meth:`Y3 <.TreeSequence.Y3>` for more details.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one pair of sample sets and windows=None, a numpy scalar is\n            returned.\n        \"\"\"\n        return self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.Y2,\n            2,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def Y1(self, sample_sets, windows=None, mode=\"site\", span_normalise=True):\n        \"\"\"\n        Computes the 'Y1' statistic within each of the sets of nodes given by\n        ``sample_sets``.\n        Please see the :ref:`one-way statistics <sec_stats_sample_sets_one_way>`\n        section for details on how the ``sample_sets`` argument is interpreted\n        and how it interacts with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`, :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n        Operates on ``k = 1`` sample set at a time.\n\n        What is computed depends on ``mode``. Each is computed exactly as\n        ``Y3``, except that the average is across every possible trio of samples\n        samples ``(a1, a2, a3)`` all chosen without replacement from the same\n        sample set. See :meth:`Y3 <.TreeSequence.Y3>` for more details.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one sample set and windows=None, a numpy scalar is returned.\n        \"\"\"\n        return self.__one_way_sample_set_stat(\n            self._ll_tree_sequence.Y1,\n            sample_sets,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def f4(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes Patterson's f4 statistic between four groups of nodes from\n        ``sample_sets``.\n        Operates on ``k = 4`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``. Each is an average across every possible\n        combination of four samples ``(a, b; c, d)``, one chosen from each sample set:\n\n        \"site\"\n            The average density of sites at which ``a`` and ``c`` agree but\n            differs from ``b`` and ``d``, minus the average density of sites at\n            which ``a`` and ``d`` agree but differs from ``b`` and ``c``, per\n            unit of chromosome length.\n\n        \"branch\"\n            The average length of all branches that separate ``a`` and ``c``\n            from ``b`` and ``d``, minus the average length of all branches that\n            separate ``a`` and ``d`` from ``b`` and ``c`` (in units of time).\n\n        \"node\"\n            For each node, the average proportion of the window on which ``a`` and ``c``\n            inherit from that node but ``b`` and ``d`` do not, or vice-versa,\n            minus the average proportion of the window on which ``a`` and ``d``\n            inherit from that node but ``b`` and ``c`` do not, or vice-versa.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 4-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there are four sample sets and windows=None, a numpy scalar is returned.\n        \"\"\"\n        return self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.f4,\n            4,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def f3(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        r\"\"\"\n        Computes Patterson's f3 statistic between three groups of nodes from\n        ``sample_sets``.\n        Note that the order of the arguments of f3 differs across the literature:\n        here, ``f3([A, B, C])`` for sample sets ``A``, ``B``, and ``C``\n        will estimate\n        :math:`f_3(A; B, C) = \\mathbb{E}[(p_A - p_B) (p_A - p_C)]`,\n        where :math:`p_A` is the allele frequency in ``A``.\n        When used as a test for admixture, the putatively admixed population\n        is usually placed as population ``A`` (see\n        `Peter (2016) <https://doi.org/10.1534/genetics.115.183913>`_\n        for more discussion).\n\n        Operates on ``k = 3`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``. Each works exactly as\n        :meth:`f4 <.TreeSequence.f4>`, except the average is across every possible\n        combination of four samples ``(a1, b; a2, c)`` where `a1` and `a2` have both\n        been chosen (without replacement) from the first sample set. See\n        :meth:`f4 <.TreeSequence.f4>` for more details.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 3-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there are three sample sets and windows=None, a numpy scalar is returned.\n        \"\"\"\n        return self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.f3,\n            3,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def f2(\n        self, sample_sets, indexes=None, windows=None, mode=\"site\", span_normalise=True\n    ):\n        \"\"\"\n        Computes Patterson's f2 statistic between two groups of nodes from\n        ``sample_sets``.\n        Operates on ``k = 2`` sample sets at a time; please see the\n        :ref:`multi-way statistics <sec_stats_sample_sets_multi_way>`\n        section for details on how the ``sample_sets`` and ``indexes`` arguments are\n        interpreted and how they interact with the dimensions of the output array.\n        See the :ref:`statistics interface <sec_stats_interface>` section for details on\n        :ref:`windows <sec_stats_windows>`,\n        :ref:`mode <sec_stats_mode>`,\n        :ref:`span normalise <sec_stats_span_normalise>`,\n        and :ref:`return value <sec_stats_output_format>`.\n\n        What is computed depends on ``mode``. Each works exactly as\n        :meth:`f4 <.TreeSequence.f4>`, except the average is across every possible\n        combination of four samples ``(a1, b1; a2, b2)`` where `a1` and `a2` have\n        both been chosen (without replacement) from the first sample set, and ``b1``\n        and ``b2`` have both been chosen (without replacement) from the second\n        sample set. See :meth:`f4 <.TreeSequence.f4>` for more details.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the windows\n            to compute the statistic in.\n        :param str mode: A string giving the \"type\" of the statistic to be computed\n            (defaults to \"site\").\n        :param bool span_normalise: Whether to divide the result by the span of the\n            window (defaults to True).\n        :return: A ndarray with shape equal to (num windows, num statistics).\n            If there is one pair of sample sets and windows=None, a numpy scalar is\n            returned.\n        \"\"\"\n        return self.__k_way_sample_set_stat(\n            self._ll_tree_sequence.f2,\n            2,\n            sample_sets,\n            indexes=indexes,\n            windows=windows,\n            mode=mode,\n            span_normalise=span_normalise,\n        )\n\n    def mean_descendants(self, sample_sets):\n        \"\"\"\n        Computes for every node the mean number of samples in each of the\n        `sample_sets` that descend from that node, averaged over the\n        portions of the genome for which the node is ancestral to *any* sample.\n        The output is an array, `C[node, j]`, which reports the total span of\n        all genomes in `sample_sets[j]` that inherit from `node`, divided by\n        the total span of the genome on which `node` is an ancestor to any\n        sample in the tree sequence.\n\n        .. warning:: The interface for this method is preliminary and may be subject to\n            backwards incompatible changes in the near future. The long-term stable\n            API for this method will be consistent with other :ref:`sec_stats`.\n            In particular, the normalization by proportion of the genome that `node`\n            is an ancestor to anyone may not be the default behaviour in the future.\n\n        :param list sample_sets: A list of lists of node IDs.\n        :return: An array with dimensions (number of nodes in the tree sequence,\n            number of reference sets)\n        \"\"\"\n        return self._ll_tree_sequence.mean_descendants(sample_sets)\n\n    def genealogical_nearest_neighbours(self, focal, sample_sets, num_threads=0):\n        \"\"\"\n        Return the genealogical nearest neighbours (GNN) proportions for the given\n        focal nodes, with reference to two or more sets of interest, averaged over all\n        trees in the tree sequence.\n\n        The GNN proportions for a focal node in a single tree are given by first finding\n        the most recent common ancestral node :math:`a` between the focal node and any\n        other node present in the reference sets. The GNN proportion for a specific\n        reference set, :math:`S` is the number of nodes in :math:`S` that descend from\n        :math:`a`, as a proportion of the total number of descendant nodes in any of the\n        reference sets.\n\n        For example, consider a case with 2 sample sets, :math:`S_1` and :math:`S_2`.\n        For a given tree, :math:`a` is the node that includes at least one descendant in\n        :math:`S_1` or :math:`S_2` (not including the focal node). If the descendants of\n        :math:`a` include some nodes in :math:`S_1` but no nodes in :math:`S_2`, then the\n        GNN proportions for that tree will be 100% :math:`S_1` and 0% :math:`S_2`, or\n        :math:`[1.0, 0.0]`.\n\n        For a given focal node, the GNN proportions returned by this function are an\n        average of the GNNs for each tree, weighted by the genomic distance spanned by\n        that tree.\n\n        For an precise mathematical definition of GNN, see https://doi.org/10.1101/458067\n\n        .. note::\n            The reference sets need not include all the samples, hence the most\n            recent common ancestral node of the reference sets, :math:`a`, need not be\n            the immediate ancestor of the focal node. If the reference sets only comprise\n            sequences from relatively distant individuals, the GNN statistic may end up\n            as a measure of comparatively distant ancestry, even for tree sequences that\n            contain many closely related individuals.\n\n        .. warning:: The interface for this method is preliminary and may be subject to\n            backwards incompatible changes in the near future. The long-term stable\n            API for this method will be consistent with other :ref:`sec_stats`.\n\n        :param list focal: A list of :math:`n` nodes whose GNNs should be calculated.\n        :param list sample_sets: A list of :math:`m` lists of node IDs.\n        :return: An :math:`n`  by :math:`m` array of focal nodes by GNN proportions.\n            Every focal node corresponds to a row. The numbers in each\n            row corresponding to the GNN proportion for each of the passed-in reference\n            sets. Rows therefore sum to one.\n        :rtype: numpy.ndarray\n        \"\"\"\n        # TODO add windows=None option: https://github.com/tskit-dev/tskit/issues/193\n        if num_threads <= 0:\n            return self._ll_tree_sequence.genealogical_nearest_neighbours(\n                focal, sample_sets\n            )\n        else:\n            worker = functools.partial(\n                self._ll_tree_sequence.genealogical_nearest_neighbours,\n                reference_sets=sample_sets,\n            )\n            focal = util.safe_np_int_cast(focal, np.int32)\n            splits = np.array_split(focal, num_threads)\n            with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as pool:\n                arrays = pool.map(worker, splits)\n            return np.vstack(list(arrays))\n\n    def kc_distance(self, other, lambda_=0.0):\n        \"\"\"\n        Returns the average :meth:`Tree.kc_distance` between pairs of trees along\n        the sequence whose intervals overlap. The average is weighted by the\n        fraction of the sequence on which each pair of trees overlap.\n\n        :param TreeSequence other: The other tree sequence to compare to.\n        :param float lambda_: The KC metric lambda parameter determining the\n            relative weight of topology and branch length.\n        :return: The computed KC distance between this tree sequence and other.\n        :rtype: float\n        \"\"\"\n        return self._ll_tree_sequence.get_kc_distance(other._ll_tree_sequence, lambda_)\n\n    def count_topologies(self, sample_sets=None):\n        \"\"\"\n        Returns a generator that produces the same distribution of topologies as\n        :meth:`Tree.count_topologies` but sequentially for every tree in a tree\n        sequence. For use on a tree sequence this method is much faster than\n        computing the result independently per tree.\n\n        .. warning:: The interface for this method is preliminary and may be subject to\n            backwards incompatible changes in the near future.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with.\n        :rtype: iter(:class:`tskit.TopologyCounter`)\n        :raises ValueError: If nodes in ``sample_sets`` are invalid or are\n            internal samples.\n        \"\"\"\n        if sample_sets is None:\n            sample_sets = [self.samples(population=pop.id) for pop in self.populations()]\n\n        yield from combinatorics.treeseq_count_topologies(self, sample_sets)\n\n    def ibd_segments(\n        self,\n        *,\n        within=None,\n        between=None,\n        max_time=None,\n        min_span=None,\n        store_pairs=None,\n        store_segments=None,\n    ):\n        \"\"\"\n        Finds pairs of samples that are identical by descent (IBD) and returns\n        the result as an :class:`.IdentitySegments` instance. The information\n        stored in this object is controlled by the ``store_pairs`` and\n        ``store_segments`` parameters. By default only total counts and other\n        statistics of the IBD segments are stored (i.e.,\n        ``store_pairs=False``), since storing pairs and segments has a\n        substantial CPU and memory overhead. Please see the\n        :ref:`sec_identity` section for more details on how to access the\n        information stored in the :class:`.IdentitySegments`.\n\n        If ``within`` is specified, only IBD segments for pairs of nodes within\n        that set will be recorded. If ``between`` is specified, only IBD\n        segments from pairs that are in one or other of the specified sample\n        sets will be reported. Note that ``within`` and ``between`` are\n        mutually exclusive.\n\n        A pair of nodes ``(u, v)`` has an IBD segment with a left and right\n        coordinate ``[left, right)`` and ancestral node ``a`` iff the most\n        recent common ancestor of the segment ``[left, right)`` in nodes ``u``\n        and ``v`` is ``a``, and the segment has been inherited along the same\n        genealogical path (ie. it has not been broken by recombination). The\n        segments returned are the longest possible ones.\n\n        Note that this definition is purely genealogical --- allelic states\n        *are not* considered here. If used without time or length thresholds, the\n        segments returned for a given pair will partition the span of the contig\n        represented by the tree sequence.\n\n        :param list within: A list of node IDs defining set of nodes that\n            we find IBD segments for. If not specified, this defaults to\n            all samples in the tree sequence.\n        :param list[list] between: A list of lists of sample node IDs. Given\n            two sample sets A and B, only IBD segments will be returned such\n            that one of the samples is an element of A and the other is\n            an element of B. Cannot be specified with ``within``.\n        :param float max_time: Only segments inherited from common\n            ancestors whose node times are more recent than the specified time\n            will be returned. Specifying a maximum time is strongly recommended when\n            working with large tree sequences.\n        :param float min_span: Only segments in which the difference between\n            the right and left genome coordinates (i.e., the span of the\n            segment) is greater than this value will be included. (Default=0)\n        :param bool store_pairs: If True store information separately for each\n            pair of samples ``(a, b)`` that are found to be IBD. Otherwise\n            store summary information about all sample pairs. (Default=False)\n        :param bool store_segments: If True store each IBD segment\n            ``(left, right, c)`` and associate it with the corresponding\n            sample pair ``(a, b)``. If True, implies ``store_pairs``.\n            (Default=False).\n        :return: An :class:`.IdentitySegments` object containing the recorded\n            IBD information.\n        :rtype: IdentitySegments\n        \"\"\"\n        return self.dump_tables().ibd_segments(\n            within=within,\n            between=between,\n            max_time=max_time,\n            min_span=min_span,\n            store_segments=store_segments,\n            store_pairs=store_pairs,\n        )\n\n    def pair_coalescence_counts(\n        self,\n        sample_sets=None,\n        indexes=None,\n        windows=None,\n        span_normalise=True,\n        pair_normalise=False,\n        time_windows=\"nodes\",\n    ):\n        \"\"\"\n        Calculate the number of coalescing sample pairs per node, summed over\n        trees and weighted by tree span.\n\n        The number of coalescing pairs may be calculated within or between the\n        non-overlapping lists of samples contained in `sample_sets`. In the\n        latter case, pairs are counted if they have exactly one member in each\n        of two sample sets. If `sample_sets` is omitted, a single set\n        containing all samples is assumed.\n\n        The argument `indexes` may be used to specify which pairs of sample\n        sets to compute the statistic between, and in what order. If\n        `indexes=None`, then `indexes` is assumed to equal `[(0,0)]` for a\n        single sample set and `[(0,1)]` for two sample sets. For more than two\n        sample sets, `indexes` must be explicitly passed.\n\n        The argument `time_windows` may be used to count coalescence\n        events within time intervals (if an array of breakpoints is supplied)\n        rather than for individual nodes (the default).\n\n        The output array has dimension `(windows, indexes, nodes)` with\n        dimensions dropped when the corresponding argument is set to None.\n\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with, or None.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the\n            sequence windows to compute the statistic in, or None.\n        :param bool span_normalise: Whether to divide the result by the span of\n            non-missing sequence in the window (defaults to True).\n        :param bool pair_normalise: Whether to divide the result by the total\n            number of pairs for a given index (defaults to False).\n        :param time_windows: Either a string \"nodes\" or an increasing\n            list of breakpoints between time intervals.\n        \"\"\"\n\n        if sample_sets is None:\n            sample_sets = [list(self.samples())]\n\n        drop_middle_dimension = False\n        if indexes is None:\n            drop_middle_dimension = True\n            if len(sample_sets) == 1:\n                indexes = [(0, 0)]\n            elif len(sample_sets) == 2:\n                indexes = [(0, 1)]\n            else:\n                raise ValueError(\n                    \"Must specify indexes if there are more than two sample sets\"\n                )\n        num_indexes = len(indexes)\n\n        drop_left_dimension = False\n        if windows is None:\n            drop_left_dimension = True\n            windows = np.array([0.0, self.sequence_length])\n        num_windows = len(windows) - 1\n\n        if isinstance(time_windows, str) and time_windows == \"nodes\":\n            num_time_windows = self.num_nodes\n            node_bin_map = np.arange(num_time_windows, dtype=np.int32)\n        else:\n            if self.time_units == tskit.TIME_UNITS_UNCALIBRATED:\n                raise ValueError(\"Time windows require calibrated node times\")\n            num_time_windows = len(time_windows) - 1\n            node_bin_map = np.digitize(self.nodes_time, time_windows) - 1\n            node_bin_map[node_bin_map == num_time_windows] = tskit.NULL\n            node_bin_map = node_bin_map.astype(np.int32)\n        num_bins = node_bin_map.max() + 1\n\n        sample_set_sizes = np.array([len(s) for s in sample_sets], dtype=np.uint32)\n        sample_sets = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n\n        coalescing_pairs = np.zeros((num_windows, num_indexes, num_time_windows))\n        coalescing_pairs[..., :num_bins] = self.ll_tree_sequence.pair_coalescence_counts(\n            sample_sets=sample_sets,\n            sample_set_sizes=sample_set_sizes,\n            windows=windows,\n            indexes=indexes,\n            node_bin_map=node_bin_map,\n            span_normalise=span_normalise,\n            pair_normalise=pair_normalise,\n        )\n\n        if drop_middle_dimension:\n            coalescing_pairs = np.squeeze(coalescing_pairs, axis=1)\n        if drop_left_dimension:\n            coalescing_pairs = np.squeeze(coalescing_pairs, axis=0)\n\n        return coalescing_pairs\n\n    def pair_coalescence_quantiles(\n        self,\n        quantiles,\n        sample_sets=None,\n        indexes=None,\n        windows=None,\n    ):\n        \"\"\"\n        Estimate quantiles of pair coalescence times by inverting the empirical\n        CDF. This is equivalent to the \"inverted_cdf\" method of\n        `numpy.quantile` applied to node times, with weights proportional to\n        the number of coalescing pairs per node (averaged over trees, see\n        `TreeSequence.pair_coalescence_counts`).\n\n        Quantiles of pair coalescence times may be calculated within or\n        between the non-overlapping lists of samples contained in `sample_sets`. In\n        the latter case, pairs are counted if they have exactly one member in each\n        of two sample sets. If `sample_sets` is omitted, a single set containing\n        all samples is assumed.\n\n        The argument `indexes` may be used to specify which pairs of sample sets to\n        compute coalescences between, and in what order. If `indexes=None`, then\n        `indexes` is assumed to equal `[(0,0)]` for a single sample set and\n        `[(0,1)]` for two sample sets. For more than two sample sets, `indexes`\n        must be explicitly passed.\n\n        The output array has dimension `(windows, indexes, quantiles)` with\n        dimensions dropped when the corresponding argument is set to None.\n\n        :param quantiles: A list of increasing breakpoints between [0, 1].\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with, or None.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the\n            sequence windows to compute the statistic in, or None.\n        \"\"\"\n\n        if sample_sets is None:\n            sample_sets = [list(self.samples())]\n\n        drop_middle_dimension = False\n        if indexes is None:\n            drop_middle_dimension = True\n            if len(sample_sets) == 1:\n                indexes = [(0, 0)]\n            elif len(sample_sets) == 2:\n                indexes = [(0, 1)]\n            else:\n                raise ValueError(\n                    \"Must specify indexes if there are more than two sample sets\"\n                )\n\n        if self.time_units == tskit.TIME_UNITS_UNCALIBRATED:\n            raise ValueError(\"Pair coalescence quantiles require calibrated node times\")\n\n        drop_left_dimension = False\n        if windows is None:\n            drop_left_dimension = True\n            windows = np.array([0.0, self.sequence_length])\n\n        sample_set_sizes = np.array([len(s) for s in sample_sets], dtype=np.uint32)\n        sample_sets = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n        _, node_bin_map = np.unique(self.nodes_time, return_inverse=True)\n        node_bin_map = util.safe_np_int_cast(node_bin_map, np.int32)\n\n        coalescence_times = self.ll_tree_sequence.pair_coalescence_quantiles(\n            sample_sets=sample_sets,\n            sample_set_sizes=sample_set_sizes,\n            windows=windows,\n            indexes=indexes,\n            node_bin_map=node_bin_map,\n            quantiles=quantiles,\n        )\n\n        if drop_middle_dimension:\n            coalescence_times = np.squeeze(coalescence_times, axis=1)\n        if drop_left_dimension:\n            coalescence_times = np.squeeze(coalescence_times, axis=0)\n\n        return coalescence_times\n\n    def pair_coalescence_rates(\n        self,\n        time_windows,\n        sample_sets=None,\n        indexes=None,\n        windows=None,\n    ):\n        \"\"\"\n        Estimate the rate at which pairs of samples coalesce within time\n        windows, using the empirical cumulative distribution function (ecdf) of\n        pair coalescence times.  Assuming that pair coalescence events follow a\n        nonhomogeneous Poisson process, the empirical rate for a time window\n        :math:`[a, b)` where :math:`ecdf(b) < 1` is,\n\n        ..math:\n\n            log(1 - \\\\frac{ecdf(b) - ecdf(a)}{1 - ecdf(a)}) / (a - b)\n\n        If the last coalescence event is within :math:`[a, b)`, so that\n        :math:`ecdf(b) = 1`, then an estimate of the empirical rate is\n\n        ..math:\n\n            (\\\\mathbb{E}[t | t > a] - a)^{-1}\n\n        where :math:`\\\\mathbb{E}[t | t < a]` is the average pair coalescence time\n        conditional on coalescence after the start of the last epoch.\n\n        The first breakpoint in `time_windows` must start at the age of the\n        samples, and the last must end at infinity. In the output array, any\n        time windows where all pairs have coalesced by start of the window will\n        contain `NaN` values.\n\n        Pair coalescence rates may be calculated within or between the\n        non-overlapping lists of samples contained in `sample_sets`. In the\n        latter case, pairs are counted if they have exactly one member in each\n        of two sample sets. If `sample_sets` is omitted, a single group\n        containing all samples is assumed.\n\n        The argument `indexes` may be used to specify which pairs of sample\n        sets to compute the statistic between, and in what order. If\n        `indexes=None`, then `indexes` is assumed to equal `[(0,0)]` for a\n        single sample set and `[(0,1)]` for two sample sets. For more than two\n        sample sets, `indexes` must be explicitly passed.\n\n        The output array has dimension `(windows, indexes, time_windows)` with\n        dimensions dropped when the corresponding argument is set to None.\n\n        :param time_windows: An increasing list of breakpoints between time\n            intervals, starting at the age of the samples and ending at\n            infinity.\n        :param list sample_sets: A list of lists of Node IDs, specifying the\n            groups of nodes to compute the statistic with, or None.\n        :param list indexes: A list of 2-tuples, or None.\n        :param list windows: An increasing list of breakpoints between the\n            sequence windows to compute the statistic in, or None.\n        \"\"\"\n\n        if sample_sets is None:\n            sample_sets = [list(self.samples())]\n\n        drop_middle_dimension = False\n        if indexes is None:\n            drop_middle_dimension = True\n            if len(sample_sets) == 1:\n                indexes = [(0, 0)]\n            elif len(sample_sets) == 2:\n                indexes = [(0, 1)]\n            else:\n                raise ValueError(\n                    \"Must specify indexes if there are more than two sample sets\"\n                )\n\n        if self.time_units == tskit.TIME_UNITS_UNCALIBRATED:\n            raise ValueError(\"Pair coalescence rates require calibrated node times\")\n\n        drop_left_dimension = False\n        if windows is None:\n            drop_left_dimension = True\n            windows = np.array([0.0, self.sequence_length])\n\n        sample_set_sizes = np.array([len(s) for s in sample_sets], dtype=np.uint32)\n        sample_sets = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)\n        node_bin_map = np.digitize(self.nodes_time, time_windows) - 1\n        node_bin_map[node_bin_map == time_windows.size - 1] = tskit.NULL\n        node_bin_map = node_bin_map.astype(np.int32)\n\n        coalescence_rates = self.ll_tree_sequence.pair_coalescence_rates(\n            sample_sets=sample_sets,\n            sample_set_sizes=sample_set_sizes,\n            windows=windows,\n            indexes=indexes,\n            node_bin_map=node_bin_map,\n            time_windows=time_windows,\n        )\n\n        if drop_middle_dimension:\n            coalescence_rates = np.squeeze(coalescence_rates, axis=1)\n        if drop_left_dimension:\n            coalescence_rates = np.squeeze(coalescence_rates, axis=0)\n\n        return coalescence_rates\n\n    def impute_unknown_mutations_time(\n        self,\n        method=None,\n    ):\n        \"\"\"\n        Returns an array of mutation times, where any unknown times are\n        imputed from the times of associated nodes. Not to be confused with\n        :meth:`TableCollection.compute_mutation_times`, which modifies the\n        ``time`` column of the mutations table in place.\n\n        :param str method: The method used to impute the unknown mutation times.\n            Currently only \"min\" is supported, which uses the time of the node\n            below the mutation as the mutation time. The \"min\" method can also\n            be specified by ``method=None`` (Default: ``None``).\n        :return: An array of length equal to the number of mutations in the\n            tree sequence.\n        \"\"\"\n        allowed_methods = [\"min\"]\n        if method is None:\n            method = \"min\"\n        if method not in allowed_methods:\n            raise ValueError(\n                f\"Mutations time imputation method must be chosen from {allowed_methods}\"\n            )\n        if method == \"min\":\n            mutations_time = self.mutations_time.copy()\n            unknown = tskit.is_unknown_time(mutations_time)\n            mutations_time[unknown] = self.nodes_time[self.mutations_node[unknown]]\n            return mutations_time\n\n    def ld_matrix(\n        self,\n        sample_sets=None,\n        mode=\"site\",\n        stat=\"r2\",\n        sites=None,\n        positions=None,\n        indexes=None,\n    ):\n        r\"\"\"\n\n        Returns a matrix of the specified two-locus statistic (default\n        :math:`r^2`) computed from sample allelic states or branch lengths.\n        The resulting linkage disequilibrium (LD) matrix represents either the\n        two-locus statistic as computed between all pairs of specified\n        ``sites`` (``\"site\"`` mode, producing a\n        ``len(sites)``-by-``len(sites)`` sized matrix), or as computed from the\n        branch structures at marginal trees between pairs of trees at all\n        specified ``positions`` (``\"branch\"`` mode, producing a\n        ``len(positions)``-by-``len(positions)`` sized matrix).\n\n        The sites considered for ``\"site\"`` mode defaults to all sites (which may\n        result in a very large matrix!), but can be restricted using\n        the ``sites`` argument. Sites must be passed as a list of lists,\n        specifying the ``[row_sites, col_sites]``, resulting in a\n        rectangular matrix, or by specifying a single list of ``[sites]``, in\n        which a square matrix will be produced (see\n        :ref:`sec_stats_two_locus_site` for examples). Here, ``sites``,\n        ``row_sites``, and ``col_sites`` are each lists of site indexes.\n\n        Similarly, in the ``\"branch\"`` mode, the ``positions`` argument specifies\n        genomic coordinates at which the expectation for the two-locus statistic\n        is computed, given the local tree structure.\n        (See :ref:`sec_stats_two_locus_branch` for explanation of in what sense\n        this is an expectation.) This defaults to computing\n        the LD for each pair of distinct trees (this is equivalent to passing in\n        the leftmost coordinates of each tree's span, since intervals are closed on\n        the left and open on the right). Similar to the site mode, a nested list\n        of row and column positions can be specified separately (resulting in a\n        rectangular matrix) or a single list of a specified positions results\n        in a square matrix (see :ref:`sec_stats_two_locus_branch` for\n        examples). Like ``sites``, the ``positions`` must be specified as a list\n        of lists.\n\n        Some LD statistics are defined for both within a single set of samples\n        and for two sample sets. If the ``indexes`` argument is specified, then\n        ``indexes`` specifies the indexes of the sample sets in the\n        ``sample_sets`` list between which to compute LD. For instance, this\n        results in a 3D array whose ``[k,:,:]``-th slice contains LD values\n        between ``sample_sets[i]`` and ``sample_sets[j]``, where ``(i, j)`` is\n        the ``k``-th element of ``indexes``.\n\n        For more on how the ``indexes`` and ``sample_sets`` interact with the\n        output dimensions, see the :ref:`sec_stats_two_locus_sample_sets`\n        section. Statistics are defined in the\n        :ref:`sec_stats_two_locus_summary_functions_two_way` section.\n\n        **Available Stats** (use ``Stat Name`` in the ``stat`` keyword\n        argument). Statistics marked as \"multi sample set\" allow\n        (but do not require) computation from two sample sets\n        via the ``indexes`` argument.\n\n        ======================= ========== ================ ==============\n        Stat                     Polarised Multi Sample Set Stat Name\n        ======================= ========== ================ ==============\n        :math:`r^2`              n          y               \"r2\"\n        :math:`r`                y          n               \"r\"\n        :math:`D^2`              n          y               \"D2\"\n        :math:`D`                y          n               \"D\"\n        :math:`D'`               y          n               \"D_prime\"\n        :math:`D_z`              n          n               \"Dz\"\n        :math:`\\pi_2`            n          n               \"pi2\"\n        :math:`\\widehat{D^2}`    n          y               \"D2_unbiased\"\n        :math:`\\widehat{D_z}`    n          n               \"Dz_unbiased\"\n        :math:`\\widehat{\\pi_2}`  n          n               \"pi2_unbiased\"\n        ======================= ========== ================ ==============\n\n        :param list sample_sets: A list, or a list of lists of sample node IDs,\n            specifying the groups of nodes to compute the statistic with. Defaults\n            to all samples.\n        :param str mode: A string giving the \"type\" of the statistic to be\n            computed. Defaults to \"site\", can be \"site\" or \"branch\".\n        :param str stat: A string giving the selected two-locus statistic to\n            compute. Defaults to \"r2\".\n        :param list sites: A list of lists of sites over which to compute an\n            LD matrix. Can be specified as a list of lists to control the row\n            and column sites. Only available in \"site\" mode. Specify as\n            ``[row_sites, col_sites]`` or ``[all_sites]``.\n            Defaults to all sites.\n        :param list positions: A list of lists of genomic positions where\n            expected LD is computed based on tree topologies and branch\n            lengths. Only applicable in \"branch\" mode. Specify as a list of\n            two lists to control the row and column positions, as\n            ``[row_positions, col_positions]``, or ``[all_positions]``.\n            Defaults to the leftmost coordinates of all trees and computes\n            LD between all pairs of trees.\n        :param list indexes: A list of 2-tuples or a single 2-tuple, specifying\n            the indexes of two sample sets over which to compute a two-way LD\n            statistic. Only :math:`r^2`, :math:`D^2`, and :math:`\\widehat{D^2}`\n            are implemented for two-way statistics.\n        :return: A 2D or 3D array of LD matrices.\n        :rtype: numpy.ndarray\n        \"\"\"\n        one_way_stats = {\n            \"D\": self._ll_tree_sequence.D_matrix,\n            \"D2\": self._ll_tree_sequence.D2_matrix,\n            \"r2\": self._ll_tree_sequence.r2_matrix,\n            \"D_prime\": self._ll_tree_sequence.D_prime_matrix,\n            \"r\": self._ll_tree_sequence.r_matrix,\n            \"Dz\": self._ll_tree_sequence.Dz_matrix,\n            \"pi2\": self._ll_tree_sequence.pi2_matrix,\n            \"Dz_unbiased\": self._ll_tree_sequence.Dz_unbiased_matrix,\n            \"D2_unbiased\": self._ll_tree_sequence.D2_unbiased_matrix,\n            \"pi2_unbiased\": self._ll_tree_sequence.pi2_unbiased_matrix,\n        }\n        two_way_stats = {\n            \"D2\": self._ll_tree_sequence.D2_ij_matrix,\n            \"D2_unbiased\": self._ll_tree_sequence.D2_ij_unbiased_matrix,\n            \"r2\": self._ll_tree_sequence.r2_ij_matrix,\n        }\n        stats = one_way_stats if indexes is None else two_way_stats\n        try:\n            stat_func = stats[stat]\n        except KeyError:\n            raise ValueError(\n                f\"Unknown two-locus statistic '{stat}', we support: {list(stats.keys())}\"\n            )\n\n        if indexes is not None:\n            return self.__k_way_two_locus_sample_set_stat(\n                stat_func,\n                2,\n                sample_sets,\n                indexes=indexes,\n                sites=sites,\n                positions=positions,\n                mode=mode,\n            )\n\n        return self.__two_locus_sample_set_stat(\n            stat_func, sample_sets, sites=sites, positions=positions, mode=mode\n        )\n\n    def sample_nodes_by_ploidy(self, ploidy):\n        \"\"\"\n        Returns an 2D array of node IDs, where each row has length `ploidy`.\n        This is useful when individuals are not defined in the tree sequence\n        so `TreeSequence.individuals_nodes` cannot be used. The samples are\n        placed in the array in the order which they are found in the node\n        table. The number of sample nodes must be a multiple of ploidy.\n\n        :param int ploidy: The number of samples per individual.\n        :return: A 2D array of node IDs, where each row has length `ploidy`.\n        :rtype: numpy.ndarray\n        \"\"\"\n        if ploidy <= 0 or ploidy != int(ploidy):\n            raise ValueError(\"Ploidy must be a positive integer\")\n        sample_node_ids = np.flatnonzero(self.nodes_flags & tskit.NODE_IS_SAMPLE)\n        num_samples = len(sample_node_ids)\n        if num_samples == 0:\n            raise ValueError(\"No sample nodes in tree sequence\")\n        if num_samples % ploidy != 0:\n            raise ValueError(\n                f\"Number of sample nodes {num_samples} is not a multiple \"\n                f\"of ploidy {ploidy}\"\n            )\n        num_samples_per_individual = num_samples // ploidy\n        sample_node_ids = sample_node_ids.reshape((num_samples_per_individual, ploidy))\n        return sample_node_ids\n\n    def map_to_vcf_model(\n        self,\n        individuals=None,\n        ploidy=None,\n        name_metadata_key=None,\n        individual_names=None,\n        include_non_sample_nodes=None,\n        position_transform=None,\n        contig_id=None,\n        isolated_as_missing=None,\n    ):\n        \"\"\"\n        Maps the sample nodes in this tree sequence to a representation suitable for\n        VCF output, using the individuals if present.\n\n        Creates a VcfModelMapping object that contains a nodes-to-individual\n        mapping as a 2D array of (individuals, nodes), the individual names and VCF\n        compatible site positions and contig length. The\n        mapping is created by first checking if the tree sequence contains individuals.\n        If it does, the mapping is created using the individuals in the tree sequence.\n        By default only the sample nodes of the individuals are included in the mapping,\n        unless ``include_non_sample_nodes`` is set to True, in which case all nodes\n        belonging to the individuals are included. Any individuals without any nodes\n        will have no nodes in their row of the mapping, being essentially of zero ploidy.\n        If no individuals are present, the mapping is created using only the sample nodes\n        and the specified ploidy.\n\n        As the tskit data model allows non-integer positions, site positions and contig\n        length are transformed to integer values suitable for VCF output. The\n        transformation is done using the ``position_transform`` function, which must\n        return an integer numpy array the same dimension as the input. By default,\n        this is set to ``numpy.round()`` which will round values to the nearest integer.\n\n        If neither ``name_metadata_key`` nor ``individual_names`` is specified, the\n        individual names are set to ``\"tsk_{individual_id}\"`` for each individual. If\n        no individuals are present, the individual names are set to ``\"tsk_{i}\"`` with\n        ``0 <= i < num_sample_nodes/ploidy``.\n\n        A warning is emitted if any sample nodes do not have an individual ID.\n\n        :param list individuals: Specific individual IDs to include in the VCF. If not\n            specified and the tree sequence contains individuals, all individuals are\n            included that are associated with least one sample node (or at least one of\n            any node if ``include_non_sample_nodes`` is True), and the mapping arrays\n            will be in ascending order of the ID of the individual in the tree sequence.\n        :param int ploidy: The ploidy, or number of nodes per individual. Only used when\n            the tree sequence does not contain individuals. Cannot be used if the tree\n            sequence contains individuals. Defaults to 1 if not specified.\n        :param str name_metadata_key: The key in the individual metadata to use\n            for individual names. Cannot be specified simultaneously with\n            individual_names.\n        :param list individual_names: The names to use for each individual. Cannot\n            be specified simultaneously with name_metadata_key.\n        :param bool include_non_sample_nodes: If True, include all nodes belonging to\n            the individuals in the mapping. If False, only include sample nodes.\n            Defaults to False.\n        :param position_transform: A callable that transforms the\n            site position values into integer valued coordinates suitable for\n            VCF. The function takes a single positional parameter x and must\n            return an integer numpy array the same dimension as x. By default,\n            this is set to ``numpy.round()`` which will round values to the\n            nearest integer. If the string \"legacy\" is provided here, the\n            pre 0.2.0 legacy behaviour of rounding values to the nearest integer\n            (starting from 1) and avoiding the output of identical positions\n            by incrementing is used.\n            See the :ref:`sec_export_vcf_modifying_coordinates` for examples\n            and more information.\n        :param str contig_id: The ID of the contig to use in the VCF output.\n            Defaults to \"1\" if not specified.\n        :param bool isolated_as_missing: If True, isolated samples without mutations\n            will be considered as missing data in the VCF output. If False, these samples\n            will have the ancestral state in the VCF output.\n            Default: True.\n        :return: A VcfModelMapping containing the node-to-individual mapping,\n            individual names, transformed positions, and transformed contig length.\n        :raises ValueError: If both name_metadata_key and individual_names are specified,\n            if ploidy is specified when individuals are present, if an invalid individual\n            ID is specified, if a specified individual has no nodes, or if the number of\n            individuals doesn't match the number of names.\n        \"\"\"\n        if include_non_sample_nodes is None:\n            include_non_sample_nodes = False\n\n        if contig_id is None:\n            contig_id = \"1\"\n\n        if isolated_as_missing is None:\n            isolated_as_missing = True\n\n        if name_metadata_key is not None and individual_names is not None:\n            raise ValueError(\n                \"Cannot specify both name_metadata_key and individual_names\"\n            )\n\n        if self.num_individuals > 0 and ploidy is not None:\n            raise ValueError(\n                \"Cannot specify ploidy when individuals are present in the tree sequence\"\n            )\n\n        if self.num_individuals == 0 and include_non_sample_nodes:\n            raise ValueError(\n                \"Cannot include non-sample nodes when individuals are not present in \"\n                \"the tree sequence\"\n            )\n\n        if self.num_individuals > 0 and np.any(\n            np.logical_and(\n                self.nodes_individual == tskit.NULL,\n                self.nodes_flags & tskit.NODE_IS_SAMPLE,\n            )\n        ):\n            warnings.warn(\n                \"At least one sample node does not have an individual ID.\", stacklevel=1\n            )\n\n        if self.num_individuals == 0 and individuals is None:\n            if ploidy is None:\n                ploidy = 1\n            individuals_nodes = self.sample_nodes_by_ploidy(ploidy)\n            if individual_names is None:\n                individual_names = [f\"tsk_{i}\" for i in range(len(individuals_nodes))]\n        else:\n            if individuals is None:\n                individuals = np.arange(self.num_individuals, dtype=np.int32)\n            if len(individuals) == 0:\n                raise ValueError(\"No individuals specified\")\n            if min(individuals) < 0 or max(individuals) >= self.num_individuals:\n                raise ValueError(\"Invalid individual ID\")\n\n            individuals_nodes = self.individuals_nodes[individuals]\n            non_sample_nodes = np.logical_not(\n                self.nodes_flags[individuals_nodes] & tskit.NODE_IS_SAMPLE\n            )\n            if np.any(non_sample_nodes) and not include_non_sample_nodes:\n                individuals_nodes[non_sample_nodes] = -1\n                rows_to_reorder = np.any(non_sample_nodes, axis=1)\n                for i in np.where(rows_to_reorder)[0]:\n                    row = individuals_nodes[i]\n                    individuals_nodes[i] = np.concatenate(\n                        [row[row != -1], row[row == -1]]\n                    )\n\n            if individual_names is None:\n                if name_metadata_key is not None:\n                    individual_names = [\n                        self.individual(i).metadata[name_metadata_key]\n                        for i in individuals\n                    ]\n                else:\n                    individual_names = [f\"tsk_{i}\" for i in individuals]\n\n        individual_names = np.array(individual_names, dtype=object)\n\n        if len(individuals_nodes) != len(individual_names):\n            raise ValueError(\n                \"The number of individuals does not match the number of names\"\n            )\n\n        def legacy_position_transform(positions):\n            \"\"\"\n            Transforms positions in the tree sequence into VCF coordinates under\n            the pre 0.2.0 legacy rule.\n            \"\"\"\n            last_pos = 0\n            transformed = []\n            for pos in positions:\n                pos = int(round(pos))\n                if pos <= last_pos:\n                    pos = last_pos + 1\n                transformed.append(pos)\n                last_pos = pos\n            return transformed\n\n        if position_transform is None:\n            position_transform = np.round\n        elif position_transform == \"legacy\":\n            position_transform = legacy_position_transform\n        transformed_positions = np.array(\n            position_transform(self.sites_position), dtype=int\n        )\n        if transformed_positions.shape != (self.num_sites,):\n            raise ValueError(\n                \"Position transform must return an array of the same length\"\n            )\n        contig_length = max(1, int(position_transform([self.sequence_length])[0]))\n\n        return VcfModelMapping(\n            individuals_nodes,\n            individual_names,\n            transformed_positions,\n            contig_length,\n            contig_id,\n            isolated_as_missing,\n        )\n\n    ############################################\n    #\n    # Deprecated APIs. These are either already unsupported, or will be unsupported in a\n    # later release.\n    #\n    ############################################\n\n    def get_pairwise_diversity(self, samples=None):\n        # Deprecated alias for self.pairwise_diversity\n        return self.pairwise_diversity(samples)\n\n    def pairwise_diversity(self, samples=None):\n        \"\"\"\n        Returns the pairwise nucleotide site diversity, the average number of sites\n        that differ between a every possible pair of distinct samples.  If `samples` is\n        specified, calculate the diversity within this set.\n\n         .. deprecated:: 0.2.0\n             please use :meth:`.diversity` instead. Since version 0.2.0 the error\n             semantics have also changed slightly. It is no longer an error\n             when there is one sample and a tskit.LibraryError is raised\n             when non-sample IDs are provided rather than a ValueError. It is\n             also no longer an error to compute pairwise diversity at sites\n             with multiple mutations.\n\n        :param list samples: The set of samples within which we calculate\n            the diversity. If None, calculate diversity within the entire sample.\n        :return: The pairwise nucleotide site diversity.\n        :rtype: float\n        \"\"\"\n        if samples is None:\n            samples = self.samples()\n        return float(\n            self.diversity(\n                [samples], windows=[0, self.sequence_length], span_normalise=False\n            )[0][0]\n        )\n\n    def get_time(self, u):\n        # Deprecated. Use ts.node(u).time\n        if u < 0 or u >= self.get_num_nodes():\n            raise ValueError(\"ID out of bounds\")\n        node = self.node(u)\n        return node.time\n\n    def get_population(self, u):\n        # Deprecated. Use ts.node(u).population\n        if u < 0 or u >= self.get_num_nodes():\n            raise ValueError(\"ID out of bounds\")\n        node = self.node(u)\n        return node.population\n\n    def records(self):\n        # Deprecated. Use either ts.edges() or ts.edgesets().\n        t = [node.time for node in self.nodes()]\n        pop = [node.population for node in self.nodes()]\n        for e in self.edgesets():\n            yield CoalescenceRecord(\n                e.left, e.right, e.parent, e.children, t[e.parent], pop[e.parent]\n            )\n\n    # Unsupported old methods.\n\n    def get_num_records(self):\n        raise NotImplementedError(\n            \"This method is no longer supported. Please use the \"\n            \"TreeSequence.num_edges if possible to work with edges rather \"\n            \"than coalescence records. If not, please use len(list(ts.edgesets())) \"\n            \"which should return the number of coalescence records, as previously \"\n            \"defined. Please open an issue on GitHub if this is \"\n            \"important for your workflow.\"\n        )\n\n    def diffs(self):\n        raise NotImplementedError(\n            \"This method is no longer supported. Please use the \"\n            \"TreeSequence.edge_diffs() method instead\"\n        )\n\n    def newick_trees(self, precision=3, breakpoints=None, Ne=1):\n        raise NotImplementedError(\n            \"This method is no longer supported. Please use the Tree.newick\"\n            \" method instead\"\n        )\n\n    def to_nexus(self, precision=14):\n        raise NotImplementedError(\n            \"This method is no longer supported since 0.4.0. Please use the as_nexus \"\n            \"or write_nexus methods instead\"\n        )\n\n\n# TODO move to \"text_formats.py\"\ndef write_ms(\n    tree_sequence,\n    output,\n    print_trees=False,\n    precision=4,\n    num_replicates=1,\n    write_header=True,\n):\n    \"\"\"\n    Write ``ms`` formatted output from the genotypes of a tree sequence\n    or an iterator over tree sequences. Usage:\n\n    .. code-block:: python\n\n        import tskit as ts\n\n        tree_sequence = msprime.simulate(\n            sample_size=sample_size,\n            Ne=Ne,\n            length=length,\n            mutation_rate=mutation_rate,\n            recombination_rate=recombination_rate,\n            random_seed=random_seed,\n            num_replicates=num_replicates,\n        )\n        with open(\"output.ms\", \"w\") as ms_file:\n            ts.write_ms(tree_sequence, ms_file)\n\n    :param ts tree_sequence: The tree sequence (or iterator over tree sequences) to\n        write to ms file\n    :param io.IOBase output: The file-like object to write the ms-style output\n    :param bool print_trees: Boolean parameter to write out newick format trees\n        to output [optional]\n    :param int precision: Numerical precision with which to write the ms\n        output [optional]\n    :param bool write_header: Boolean parameter to write out the header. [optional]\n    :param int num_replicates: Number of replicates simulated [required if\n        num_replicates used in simulation]\n\n    The first line of this ms-style output file written has two arguments which\n    are sample size and number of replicates. The second line has a 0 as a substitute\n    for the random seed.\n    \"\"\"\n    if not isinstance(tree_sequence, collections.abc.Iterable):\n        tree_sequence = [tree_sequence]\n\n    i = 0\n    for tree_seq in tree_sequence:\n        if i > 0:\n            write_header = False\n        i = i + 1\n\n        if write_header is True:\n            print(\n                f\"ms {tree_seq.sample_size} {num_replicates}\",\n                file=output,\n            )\n            print(\"0\", file=output)\n\n        print(file=output)\n        print(\"//\", file=output)\n        if print_trees is True:\n            \"\"\"\n            Print out the trees in ms-format from the specified tree sequence.\n            \"\"\"\n            if len(tree_seq.trees()) == 1:\n                tree = next(tree_seq.trees())\n                newick = tree.newick(precision=precision)\n                print(newick, file=output)\n            else:\n                for tree in tree_seq.trees():\n                    newick = tree.newick(precision=precision)\n                    print(f\"[{tree.span:.{precision}f}]\", newick, file=output)\n\n        else:\n            s = tree_seq.get_num_sites()\n            print(\"segsites:\", s, file=output)\n            if s != 0:\n                print(\"positions: \", end=\"\", file=output)\n                positions = [\n                    variant.position / (tree_seq.sequence_length)\n                    for variant in tree_seq.variants()\n                ]\n                for position in positions:\n                    print(\n                        f\"{position:.{precision}f}\",\n                        end=\" \",\n                        file=output,\n                    )\n                print(file=output)\n\n                genotypes = tree_seq.genotype_matrix()\n                for k in range(tree_seq.num_samples):\n                    tmp_str = \"\".join(map(str, genotypes[:, k]))\n                    if set(tmp_str).issubset({\"0\", \"1\", \"-\"}):\n                        print(tmp_str, file=output)\n                    else:\n                        raise ValueError(\n                            \"This tree sequence contains non-biallelic\"\n                            \"SNPs and is incompatible with the ms format!\"\n                        )\n            else:\n                print(file=output)\n\n\n@dataclass\nclass PCAResult:\n    \"\"\"\n    The result of a call to TreeSequence.pca() capturing the output values\n    and algorithm convergence details.\n\n\n    \"\"\"\n\n    factors: np.ndarray\n    \"\"\"\n    The principal component factors (or scores).\n    Columns are orthogonal, with one entry per sample\n    or individual (see :meth:`pca <.TreeSequence.pca>`).\n    This is the same as the loadings because the GRM is symmetric.\n    \"\"\"\n    eigenvalues: np.ndarray\n    \"\"\"\n    Eigenvalues of the genetic relatedness matrix.\n    \"\"\"\n    range_sketch: np.ndarray\n    \"\"\"\n    Range sketch matrix. Can be used as an input for\n    :meth:`pca <.TreeSequence.pca>` option to further improve precision.\n    \"\"\"\n    error_bound: np.ndarray\n    \"\"\"\n    An estimate of the error resulting from the randomized algorithm (experimental).\n    Eigenvalues should be correct to within (roughly) this additive factor,\n    and factors should be correct to within (roughly) this factor divided by the\n    next-largest eigenvalue in the Euclidean norm. These estimates are obtained from\n    a bound on the expected L2 operator norm between the true GRM and its\n    low-dimensional approximation, from equation 1.11 in\n    https://arxiv.org/pdf/0909.4061 .\n    \"\"\"\n"
  },
  {
    "path": "python/tskit/util.py",
    "content": "# MIT License\n#\n# Copyright (c) 2018-2025 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nModule responsible for various utility functions used in other modules.\n\"\"\"\n\nimport dataclasses\nimport datetime\nimport html\nimport io\nimport itertools\nimport json\nimport numbers\nimport os\nimport textwrap\n\nimport numpy as np\n\nimport tskit\nfrom tskit import UNKNOWN_TIME\n\n\n# Extra methods for dataclasses\nclass Dataclass:\n    def replace(self, **kwargs):\n        \"\"\"\n        Return a new instance of this dataclass, with the specified attributes\n        overwritten by new values.\n\n        :return: A new instance of the same type\n        \"\"\"\n        return dataclasses.replace(self, **kwargs)\n\n    def asdict(self, **kwargs):\n        \"\"\"\n        Return a new dict which maps field names to their corresponding values\n        in this dataclass.\n        \"\"\"\n        return dataclasses.asdict(self, **kwargs)\n\n\ndef canonical_json(obj):\n    \"\"\"\n    Returns string of encoded JSON with keys sorted and whitespace removed to enable\n    byte-level comparison of encoded data.\n\n    :param Any obj: Python object to encode\n    :return: The encoded string\n    :rtype: str\n    \"\"\"\n    return json.dumps(obj, sort_keys=True, separators=(\",\", \":\"))\n\n\ndef is_unknown_time(time):\n    \"\"\"\n    As the default unknown mutation time (:const:`UNKNOWN_TIME`) is a specific NAN value,\n    equality always fails (A NAN value is not equal to itself by definition).\n    This method compares the bitfield such that unknown times can be detected. Either\n    single floats can be passed or lists/arrays.\n\n    Note that NANs are a set of floating-point values. `tskit.UNKNOWN_TIME` is a specific\n    value in this set. `np.nan` is a differing value, but both are NAN.\n    See https://en.wikipedia.org/wiki/NaN\n\n    This function only returns true for ``tskit.is_unknown_time(tskit.UNKNOWN_TIME)``\n    and will return false for ``tskit.is_unknown_time(np.nan)`` or any other NAN or\n    non-NAN value.\n\n    :param time: Value or array to check.\n    :type time: float | array-like\n    :return: A single boolean or array of booleans the same shape as ``time``.\n    :rtype: bool | numpy.ndarray[bool]\n    \"\"\"\n    return np.asarray(time, dtype=np.float64).view(np.uint64) == np.float64(\n        UNKNOWN_TIME\n    ).view(np.uint64)\n\n\ndef safe_np_int_cast(int_array, dtype, copy=False):\n    \"\"\"\n    A few functions require arrays of certain dtypes (e.g. node indices are np.int32,\n    genotypes are np.int8, etc. Standard numpy integer arrays are of (dtype=np.int64),\n    so need casting. This function casts but checks bounds to avoid wrap-around\n    conversion errors. Strangely, numpy seems not to have this functionality built-in.\n\n    If copy=False, and the original array is a numpy array of exactly the same dtype\n    required, simply return the original rather than making a copy (same as the numpy\n    .astype(copy=...) function)\n    \"\"\"\n    if not isinstance(int_array, np.ndarray):\n        int_array = np.array(int_array)\n        # Since this is a new numpy array anyway, it's always a copy, so economize by\n        # setting copy=False\n        copy = False\n    if int_array.size == 0:\n        return int_array.astype(dtype, copy=copy)  # Allow empty arrays of any type\n    try:\n        return int_array.astype(dtype, casting=\"safe\", copy=copy)\n    except TypeError:\n        if int_array.dtype == np.dtype(\"O\"):\n            # this occurs e.g. if we're passed a list of lists of different lengths\n            raise TypeError(\"Cannot convert to a rectangular array.\")\n        bounds = np.iinfo(dtype)\n        if np.any(int_array < bounds.min) or np.any(int_array > bounds.max):\n            raise OverflowError(f\"Cannot convert safely to {dtype} type\")\n        if int_array.dtype.kind == \"i\" and np.dtype(dtype).kind == \"u\":\n            # Allow casting from int to unsigned int, since we have checked bounds\n            casting = \"unsafe\"\n        else:\n            # Raise a TypeError when we try to convert from, e.g., a float.\n            casting = \"same_kind\"\n        return int_array.astype(dtype, casting=casting, copy=copy)\n\n\n#\n# Pack/unpack lists of data into flattened numpy arrays.\n#\n\n\ndef pack_bytes(data):\n    \"\"\"\n    Packs the specified list of bytes into a flattened numpy array of 8 bit integers\n    and corresponding offsets. See :ref:`sec_encoding_ragged_columns` for details\n    of this encoding.\n\n    :param list[bytes] data: The list of bytes values to encode.\n    :return: The tuple (packed, offset) of numpy arrays representing the flattened\n        input data and offsets.\n    :rtype: numpy.ndarray (dtype=np.int8), numpy.ndarray (dtype=np.uint32)\n    \"\"\"\n    n = len(data)\n    offsets = np.zeros(n + 1, dtype=np.uint32)\n    for j in range(n):\n        offsets[j + 1] = offsets[j] + len(data[j])\n    column = np.zeros(offsets[-1], dtype=np.int8)\n    for j, value in enumerate(data):\n        column[offsets[j] : offsets[j + 1]] = bytearray(value)\n    return column, offsets\n\n\ndef unpack_bytes(packed, offset):\n    \"\"\"\n    Unpacks a list of bytes from the specified numpy arrays of packed byte\n    data and corresponding offsets. See :ref:`sec_encoding_ragged_columns` for details\n    of this encoding.\n\n    :param numpy.ndarray packed: The flattened array of byte values.\n    :param numpy.ndarray offset: The array of offsets into the ``packed`` array.\n    :return: The list of bytes values unpacked from the parameter arrays.\n    :rtype: list[bytes]\n    \"\"\"\n    # This could be done a lot more efficiently...\n    ret = []\n    for j in range(offset.shape[0] - 1):\n        raw = packed[offset[j] : offset[j + 1]].tobytes()\n        ret.append(raw)\n    return ret\n\n\ndef pack_strings(strings, encoding=\"utf8\"):\n    \"\"\"\n    Packs the specified list of strings into a flattened numpy array of 8 bit integers\n    and corresponding offsets using the specified text encoding.\n    See :ref:`sec_encoding_ragged_columns` for details of this encoding of\n    columns of variable length data.\n\n    :param list[str] data: The list of strings to encode.\n    :param str encoding: The text encoding to use when converting string data\n        to bytes. See the :mod:`codecs` module for information on available\n        string encodings.\n    :return: The tuple (packed, offset) of numpy arrays representing the flattened\n        input data and offsets.\n    :rtype: numpy.ndarray (dtype=np.int8), numpy.ndarray (dtype=np.uint32)\n    \"\"\"\n    return pack_bytes([bytearray(s.encode(encoding)) for s in strings])\n\n\ndef unpack_strings(packed, offset, encoding=\"utf8\"):\n    \"\"\"\n    Unpacks a list of strings from the specified numpy arrays of packed byte\n    data and corresponding offsets using the specified text encoding.\n    See :ref:`sec_encoding_ragged_columns` for details of this encoding of\n    columns of variable length data.\n\n    :param numpy.ndarray packed: The flattened array of byte values.\n    :param numpy.ndarray offset: The array of offsets into the ``packed`` array.\n    :param str encoding: The text encoding to use when converting string data\n        to bytes. See the :mod:`codecs` module for information on available\n        string encodings.\n    :return: The list of strings unpacked from the parameter arrays.\n    :rtype: list[str]\n    \"\"\"\n    return [b.decode(encoding) for b in unpack_bytes(packed, offset)]\n\n\ndef pack_arrays(list_of_lists, dtype=np.float64):\n    \"\"\"\n    Packs the specified list of numeric lists into a flattened numpy array\n    of the specified dtype with corresponding offsets. See\n    :ref:`sec_encoding_ragged_columns` for details of this encoding of columns\n    of variable length data.\n\n    :param list[list] list_of_lists: The list of numeric lists to encode.\n    :param dtype: The dtype for the packed array, defaults to float64\n    :return: The tuple (packed, offset) of numpy arrays representing the flattened\n        input data and offsets.\n    :rtype: numpy.array (dtype=dtype), numpy.array (dtype=np.uint32)\n    \"\"\"\n    # TODO must be possible to do this more efficiently with numpy\n    n = len(list_of_lists)\n    offset = np.zeros(n + 1, dtype=np.uint32)\n    for j in range(n):\n        offset[j + 1] = offset[j] + len(list_of_lists[j])\n    data = np.empty(offset[-1], dtype=dtype)\n    for j in range(n):\n        data[offset[j] : offset[j + 1]] = list_of_lists[j]\n    return data, offset\n\n\ndef unpack_arrays(packed, offset):\n    \"\"\"\n    Unpacks a list of arrays from the specified numpy array of packed\n    data and its associated offset array. See\n    :ref:`sec_encoding_ragged_columns` for details of how columns\n    of variable length data are encoded in this way.\n\n    :param numpy.ndarray packed: The flattened array of data.\n    :param numpy.ndarray offset: The array of offsets into the ``packed`` array.\n    :return: A list of numpy arrays unpacked from the flattened ``packed`` array.\n    :rtype: list[numpy.ndarray]\n    \"\"\"\n    ret = []\n    for j in range(offset.shape[0] - 1):\n        ret.append(packed[offset[j] : offset[j + 1]])\n    return ret\n\n\n#\n# Interval utilities\n#\n\n\ndef intervals_to_np_array(intervals, start, end):\n    \"\"\"\n    Converts the specified intervals to a numpy array and checks for\n    errors.\n    \"\"\"\n    intervals = np.array(intervals, dtype=np.float64)\n    # Special case the empty list of intervals\n    if len(intervals) == 0:\n        intervals = np.zeros((0, 2), dtype=np.float64)\n    if len(intervals.shape) != 2:\n        raise ValueError(\"Intervals must be a 2D numpy array\")\n    if intervals.shape[1] != 2:\n        raise ValueError(\"Intervals array shape must be (N, 2)\")\n    # TODO do this with numpy operations.\n    last_right = start\n    for left, right in intervals:\n        if left < start or right > end:\n            raise ValueError(f\"Intervals must be within {start} and {end}\")\n        if right <= left:\n            raise ValueError(\"Bad interval: right <= left\")\n        if left < last_right:\n            raise ValueError(\"Intervals must be disjoint.\")\n        last_right = right\n    return intervals\n\n\ndef negate_intervals(intervals, start, end):\n    \"\"\"\n    Returns the set of intervals *not* covered by the specified set of\n    disjoint intervals in the specified range.\n    \"\"\"\n    intervals = intervals_to_np_array(intervals, start, end)\n    other_intervals = []\n    last_right = start\n    for left, right in intervals:\n        if left != last_right:\n            other_intervals.append((last_right, left))\n        last_right = right\n    if last_right != end:\n        other_intervals.append((last_right, end))\n    return np.array(other_intervals)\n\n\ndef naturalsize(value):\n    \"\"\"\n    Format a number of bytes like a human readable filesize (e.g. 10 kiB)\n    \"\"\"\n    # Taken from https://github.com/jmoiron/humanize\n    suffix = (\"KiB\", \"MiB\", \"GiB\", \"TiB\", \"PiB\", \"EiB\", \"ZiB\", \"YiB\")\n    base = 1024\n    format_ = \"%.1f\"\n\n    bytes_ = float(value)\n    abs_bytes = abs(bytes_)\n\n    if abs_bytes == 1:\n        return \"%d Byte\" % bytes_\n    elif abs_bytes < base:\n        return \"%d Bytes\" % bytes_\n\n    for i, s in enumerate(suffix):\n        unit = base ** (i + 2)\n        if abs_bytes < unit:\n            return (format_ + \" %s\") % ((base * bytes_ / unit), s)\n    return (format_ + \" %s\") % ((base * bytes_ / unit), s)\n\n\ndef obj_to_collapsed_html(d, name=None, open_depth=0, max_items=30, max_item_len=100):\n    \"\"\"\n    Recursively make an HTML representation of python objects.\n\n    :param str name: Name for this object\n    :param int open_depth: By default sub-sections are collapsed. If this number is\n        non-zero the first layers up to open_depth will be opened.\n    :param int max_items: Maximum number of items to display per collection\n    :return: The HTML as a string\n    :rtype: str\n    \"\"\"\n    opened = \"open\" if open_depth > 0 else \"\"\n    open_depth -= 1\n    name = f\"{str(name)}:\" if name is not None else \"\"\n    if isinstance(d, dict):\n        items = list(d.items())\n        more = len(items) - max_items\n        display_items = items[:max_items] if more > 0 else items\n        inner_html = \"\".join(\n            f\"{obj_to_collapsed_html(val, key, open_depth, max_items)}<br/>\"\n            for key, val in display_items\n        )\n        if more > 0:\n            inner_html += f\"... and {more} more\"\n        return f\"\"\"\n            <div>\n                <span class=\"tskit-details-label\">{name}</span>\n                <details {opened}>\n                    <summary>dict</summary>\n                    {inner_html}\n                </details>\n            </div>\n            \"\"\"\n    elif isinstance(d, list):\n        items = d\n        more = len(items) - max_items\n        display_items = items[:max_items] if more > 0 else items\n        inner_html = \"\".join(\n            f\"{obj_to_collapsed_html(val, None, open_depth, max_items)}<br/>\"\n            for val in display_items\n        )\n        if more > 0:\n            inner_html += f\"... and {more} more\"\n        return f\"\"\"\n            <div>\n                <span class=\"tskit-details-label\">{name}</span>\n                <details {opened}>\n                    <summary>list</summary>\n                    {inner_html}\n                </details>\n            </div>\n            \"\"\"\n    else:\n        d_str = str(d)\n        if len(d_str) > max_item_len:\n            d_str = d_str[:max_item_len] + \"...\"\n        d_str = textwrap.fill(d_str, width=30)\n        d_str = f\"{name} {html.escape(str(d_str))}\"\n        d_str = d_str.replace(\"\\n\", \"<br/>\")\n        return d_str\n\n\ndef truncate_string_end(string, length):\n    \"\"\"\n    If a string is longer than \"length\" then snip out the middle and replace with an\n    ellipsis.\n    \"\"\"\n    if len(string) <= length:\n        return string\n    return f\"{string[: length - 3]}...\"\n\n\ndef render_metadata(md, length=40):\n    if md == b\"\":\n        return \"\"\n    return truncate_string_end(str(md), length)\n\n\ndef unicode_table(\n    rows, *, title=None, header=None, row_separator=True, column_alignments=None\n):\n    \"\"\"\n    Convert a table (list of lists) of strings to a unicode table. If a row contains\n    the string \"__skipped__NNN\" then \"skipped N rows\" is displayed.\n\n    :param list[list[str]] rows: List of rows, each of which is a list of strings for\n        each cell. Each row must have the same number of cells.\n    :param str title: If specified the first output row will be a single cell\n        containing this string, left-justified. [optional]\n    :param list[str] header: Specifies a row above the main rows which will be in double\n        lined borders and left justified. Must be same length as each row. [optional]\n    :param boolean row_separator: If True add lines between each row. [Default: True]\n    :param column_alignments str: A string of the same length as the number of cells in\n        a row (i.e. columns) where each character specifies an alignment such as ``<``,\n        ``>`` or ``^`` as used in Python's string formatting mini-language. If ``None``,\n        set the first column to be left justified and the remaining columns to be right\n        justified [Default: ``None``]\n    :return: The table as a string\n    :rtype: str\n    \"\"\"\n    if header is not None:\n        all_rows = [header] + rows\n    else:\n        all_rows = rows\n    widths = [\n        max(len(row[i_col]) for row in all_rows) for i_col in range(len(all_rows[0]))\n    ]\n    if column_alignments is None:\n        column_alignments = \"<\" + \">\" * (len(widths) - 1)\n    out = []\n    inner_width = sum(widths) + len(header or rows[0]) - 1\n    if title is not None:\n        out += [\n            f\"╔{'═' * inner_width}╗\\n║{title.ljust(inner_width)}║\\n\",\n            f\"╠{'╤'.join('═' * w for w in widths)}╣\\n\",\n        ]\n    if header is not None:\n        out += [\n            f\"╔{'╤'.join('═' * w for w in widths)}╗\\n\",\n            f\"║{'│'.join(cell.ljust(w) for cell, w in zip(header, widths))}║\\n\",\n            f\"╠{'╪'.join('═' * w for w in widths)}╣\\n\",\n        ]\n    last_skipped = False\n    for i, row in enumerate(rows):\n        if \"__skipped__\" in row:\n            msg = f\"{row[11:]} rows skipped (tskit.set_print_options)\"[\n                :inner_width\n            ].center(inner_width)\n            row_str = f\"║{msg}║\\n\"\n            if row_separator:\n                out += [\n                    f\"╟{'┴'.join('─' * w for w in widths)}╢\\n\" + row_str,\n                    f\"╟{'┬'.join('─' * w for w in widths)}╢\\n\",\n                ]\n            else:\n                out.append(row_str)\n            last_skipped = True\n        else:\n            if i != 0 and not last_skipped and row_separator:\n                out.append(f\"╟{'┼'.join('─' * w for w in widths)}╢\\n\")\n\n            out.append(\n                \"║\"\n                + \"│\".join(\n                    f\"{r:{a}{w}}\" for r, w, a in zip(row, widths, column_alignments)\n                )\n                + \"║\\n\"\n            )\n            last_skipped = False\n\n    out.append(f\"╚{'╧'.join('═' * w for w in widths)}╝\\n\")\n    return \"\".join(out)\n\n\ndef format_number(number, sig_digits=8, sep=\"\\u2009\"):\n    \"\"\"\n    Format a number with with a separator to indicate thousands\n    and up to `sig_digits` significant digits using 'g' format.\n\n    number: int, float, or a numeric string.\n    sig_digits: int, number of significant digits to display.\n    sep: str, the separator to use for thousands, default is a thin space.\n    Returns a string.\n    \"\"\"\n    if isinstance(number, str):\n        try:\n            number = float(number)\n        except ValueError:\n            raise TypeError(\"The string cannot be converted to a number\")\n\n    fmt = f\",.{sig_digits}g\"\n    return format(number, fmt).replace(\",\", sep)\n\n\ndef html_table(rows, *, header):\n    headers = \"\".join(f\"<th>{h}</th>\" for h in header)\n    rows = (\n        (\n            f'<td style=\"text-align: center;\" colspan=\"{len(headers)}\"><em>{row[11:]}'\n            f\" rows skipped (tskit.set_print_options)</em></td>\"\n            if \"__skipped__\" in row\n            else \"\".join(f\"<td>{cell}</td>\" for cell in row)\n        )\n        for row in rows\n    )\n    rows = \"\".join(f\"<tr>{row}</tr>\\n\" for row in rows)\n    return f\"\"\"\n        <div>\n            <style scoped=\"\">\n                .tskit-table tbody tr th:only-of-type {{vertical-align: middle;}}\n                .tskit-table tbody tr th {{vertical-align: top;}}\n                .tskit-table tbody td {{text-align: right;padding: 0.5em 0.5em;}}\n                .tskit-table tbody th {{padding: 0.5em 0.5em;}}\n            </style>\n            <table border=\"1\" class=\"tskit-table\">\n                <thead>\n                    <tr>\n                        {headers}\n                    </tr>\n                </thead>\n                <tbody>\n                    {rows}\n                </tbody>\n            </table>\n        </div>\n    \"\"\"\n\n\ndef tree_sequence_html(ts):\n    table_rows = \"\".join(\n        f\"\"\"\n            <tr>\n                <td>{name.capitalize()}</td>\n                <td>{format_number(table.num_rows)}</td>\n                <td>{naturalsize(table.nbytes)}</td>\n                <td style=\"text-align: center;\">\n                    {\n            \"✅\" if hasattr(table, \"metadata\") and len(table.metadata) > 0 else \"\"\n        }\n                </td>\n            </tr>\n        \"\"\"\n        for name, table in ts.tables.table_name_map.items()\n    )\n\n    provenance_rows = \"\"\n    provenances = list(ts.provenances())\n    # Detail the most recent 10 provenances, and collapse the rest\n    display_provenances = provenances[-10:]\n    extra_provenances = provenances[0:-10]\n    for prov in reversed(display_provenances):\n        try:\n            timestamp = datetime.datetime.fromisoformat(prov.timestamp).strftime(\n                \"%d %B, %Y at %I:%M:%S %p\"\n            )\n            record = json.loads(prov.record)\n            software_name = record.get(\"software\", {}).get(\"name\", \"Unknown\")\n            software_version = record.get(\"software\", {}).get(\"version\", \"Unknown\")\n            command = record.get(\"parameters\", {}).get(\"command\", \"Unknown\")\n            details = obj_to_collapsed_html(record, None, 0)\n            provenance_rows += f\"\"\"\n                <tr>\n                    <td>{timestamp}</td>\n                    <td>{software_name}</td>\n                    <td>{software_version}</td>\n                    <td>{command}</td>\n                    <td>\n                        <details>\n                            <summary>Details</summary>\n                            {details}\n                        </details>\n                    </td>\n                </tr>\n            \"\"\"\n        except Exception as e:\n            provenance_rows += (\n                f\"\"\"Could not parse provenance record: \"\"\"\n                f\"\"\"{e.__class__.__name__} {str(e)}\"\"\"\n            )\n    if len(extra_provenances) > 0:\n        provenance_rows += f\"\"\"\n            <tr>\n                <td colspan=\"5\"><i>... {len(extra_provenances)} more</i></td>\n            </tr>\n        \"\"\"\n    md = (\n        obj_to_collapsed_html(ts.metadata, None, 1)\n        if len(ts.tables.metadata_bytes) > 0\n        else \"No Metadata\"\n    )\n    return f\"\"\"\n        <div>\n            <style>\n                .tskit-table thead tr th {{text-align: left;padding: 0.5em 0.5em;}}\n                .tskit-table tbody tr td {{padding: 0.5em 0.5em;}}\n                .tskit-table tbody tr td:first-of-type {{text-align: left;}}\n                .tskit-details-label {{vertical-align: top; padding-right:5px;}}\n                .tskit-table-set {{display: inline-flex;flex-wrap: wrap;margin: -12px 0 0 -12px;width: calc(100% + 12px);}}\n                .tskit-table-set-table {{margin: 12px 0 0 12px;}}\n                details {{display: inline-block;}}\n                summary {{cursor: pointer; outline: 0; display: list-item;}}\n            </style>\n            <div class=\"tskit-table-set\">\n                <div class=\"tskit-table-set-table\">\n                    <table class=\"tskit-table\">\n                        <thead>\n                            <tr>\n                                <th style=\"padding:0;line-height:21px;\">\n                                    <img style=\"height: 32px;display: inline-block;padding: 3px 5px 3px 0;\" src=\"https://raw.githubusercontent.com/tskit-dev/administrative/main/tskit_logo.svg\"/>\n                                    <a target=\"_blank\" href=\"https://tskit.dev/tskit/docs/latest/python-api.html#the-treesequence-class\"> Tree Sequence </a>\n                                </th>\n                            </tr>\n                        </thead>\n                        <tbody>\n                            <tr><td>Trees</td><td>{format_number(ts.num_trees)}</td></tr>\n                            <tr><td>Sequence Length</td><td>{format_number(ts.sequence_length)}</td></tr>\n                            <tr><td>Time Units</td><td>{ts.time_units}</td></tr>\n                            <tr><td>Sample Nodes</td><td>{format_number(ts.num_samples)}</td></tr>\n                            <tr><td>Total Size</td><td>{naturalsize(ts.nbytes)}</td></tr>\n                            <tr>\n                                <td>Metadata</td><td style=\"text-align: left;\">{md}</td>\n                            </tr>\n                        </tbody>\n                    </table>\n                </div>\n                <div class=\"tskit-table-set-table\">\n                    <table class=\"tskit-table\">\n                        <thead>\n                            <tr>\n                                <th style=\"line-height:21px;\">Table</th>\n                                <th>Rows</th>\n                                <th>Size</th>\n                                <th>Has Metadata</th>\n                            </tr>\n                        </thead>\n                        <tbody>\n                            {table_rows}\n                        </tbody>\n                    </table>\n                </div>\n                <div class=\"tskit-table-set-table\">\n                    <table class=\"tskit-table\">\n                        <thead>\n                            <tr>\n                                <th>Provenance Timestamp</th>\n                                <th>Software Name</th>\n                                <th>Version</th>\n                                <th>Command</th>\n                                <th>Full record</th>\n                            </tr>\n                        </thead>\n                        <tbody>\n                            {provenance_rows}\n                        </tbody>\n                    </table>\n                </div>\n            </div>\n            <div style=\"margin-top: 1em; font-size: 0.8em; text-align: center;\">\n                To cite this software, please consult the citation manual: <a href=\"https://tskit.dev/citation/\" target=\"_blank\">https://tskit.dev/citation/</a>\n            </div>\n        </div>\n    \"\"\"  # noqa: E501\n\n\ndef tree_html(tree):\n    return f\"\"\"\n            <div>\n              <style>\n                .tskit-table thead tr th {{text-align: left;padding: 0.5em 0.5em;}}\n                .tskit-table tbody tr td {{padding: 0.5em 0.5em;}}\n                .tskit-table tbody tr td:first-of-type {{text-align: left;}}\n                .tskit-details-label {{vertical-align: top; padding-right:5px;}}\n                .tskit-table-set {{display: inline-flex;flex-wrap: wrap;margin: -12px 0 0 -12px;width: calc(100% + 12px);}}\n                .tskit-table-set-table {{margin: 12px 0 0 12px;}}\n                details {{display: inline-block;}}\n                summary {{cursor: pointer; outline: 0; display: list-item;}}\n              </style>\n              <div class=\"tskit-table-set\">\n                <div class=\"tskit-table-set-table\">\n                  <table class=\"tskit-table\">\n                    <thead>\n                      <tr>\n                        <th style=\"padding:0;line-height:21px;\">\n                          <img style=\"height: 32px;display: inline-block;padding: 3px 5px 3px 0;\" src=\"https://raw.githubusercontent.com/tskit-dev/administrative/main/tskit_logo.svg\"/>\n                          <a target=\"_blank\" href=\"https://tskit.dev/tskit/docs/latest/python-api.html#the-tree-class\"> Tree </a>\n                        </th>\n                      </tr>\n                    </thead>\n                    <tbody>\n                      <tr><td>Index</td><td>{format_number(tree.index)}</td></tr>\n                      <tr><td>Interval</td><td>{format_number(tree.interval.left)}-{format_number(tree.interval.right)} ({format_number(tree.span)})</td></tr>\n                      <tr><td>Roots</td><td>{format_number(tree.num_roots)}</td></tr>\n                      <tr><td>Nodes</td><td>{format_number(len(tree.preorder()))}</td></tr>\n                      <tr><td>Sites</td><td>{format_number(tree.num_sites)}</td></tr>\n                      <tr><td>Mutations</td><td>{format_number(tree.num_mutations)}</td></tr>\n                      <tr><td>Total Branch Length</td><td>{format_number(tree.total_branch_length)}</td></tr>\n                    </tbody>\n                  </table>\n                </div>\n              </div>\n            </div>\n            \"\"\"  # noqa: E501\n\n\ndef variant_html(variant):\n    class_type = \"Variant\"\n\n    url_tskit_logo = (\n        \"https://raw.githubusercontent.com/tskit-dev/administrative/main/tskit_logo.svg\"\n    )\n    url_variant_class_doc = (\n        \"https://tskit.dev/tskit/docs/latest/python-api.html#the-variant-class\"\n    )\n\n    html_body_head = f\"\"\"\n        <div>\n            <style>\n                .tskit-table thead tr th {{text-align: left;padding: 0.5em 0.5em;}}\n                .tskit-table tbody tr td {{padding: 0.5em 0.5em;}}\n                .tskit-table tbody tr td:first-of-type {{text-align: left;}}\n                .tskit-details-label {{vertical-align: top; padding-right:5px;}}\n                .tskit-table-set {{display: inline-flex;flex-wrap: wrap;margin: -12px 0 0 -12px;width: calc(100% + 12px);}}\n                .tskit-table-set-table {{margin: 12px 0 0 12px;}}\n                details {{display: inline-block;}}\n                summary {{cursor: pointer; outline: 0; display: list-item;}}\n            </style>\n            <div class=\"tskit-table-set\">\n                <div class=\"tskit-table-set-table\">\n                <table class=\"tskit-table\">\n                    <thead>\n                    <tr>\n                        <th style=\"padding:0;line-height:21px;\">\n                        <img style=\"height: 32px;display: inline-block;padding: 3px 5px 3px 0;\" src=\"{url_tskit_logo}\"/>\n                        <a target=\"_blank\" href=\"{url_variant_class_doc}\"> {class_type} </a>\n                        </th>\n                    </tr>\n                    </thead>\n                    <tbody>\n        \"\"\"  # noqa: E501\n\n    html_body_tail = \"\"\"\n                    </tbody>\n            </table>\n            </div>\n        </div>\n        </div>\n        \"\"\"\n\n    try:\n        site_id = variant.site.id\n        site_position = variant.site.position\n        num_samples = len(variant.samples)\n        num_alleles = variant.num_alleles\n        has_missing_data = str(variant.has_missing_data)\n        isolated_as_missing = str(bool(variant.isolated_as_missing))\n\n        counts = variant.counts()\n        freqs = variant.frequencies()\n\n        return (\n            html_body_head\n            + f\"\"\"\n                <tr><td>Site Id</td><td>{format_number(site_id)}</td></tr>\n                <tr><td>Site Position</td><td>{format_number(site_position)}</td></tr>\n                <tr><td>Number of Nodes</td><td>{format_number(num_samples)}</td></tr>\n                <tr><td>Number of Alleles</td><td>{format_number(num_alleles)}</td></tr>\n            \"\"\"\n            + \"\\n\".join(\n                [\n                    f\"\"\"<tr><td>Nodes with Allele {\n                        \"missing\" if k is None else \"'\" + k + \"'\"\n                    }</td><td>\"\"\"\n                    + f\"{format_number(counts[k])}\"\n                    + \" \"\n                    + f\"({format_number(freqs[k] * 100, 2)}%)\"\n                    + \"</td></tr>\"\n                    for k in variant.alleles\n                ]\n            )\n            + f\"\"\"\n                <tr><td>Has Missing Data</td><td>{has_missing_data}</td></tr>\n                <tr><td>Isolated as Missing</td><td>{isolated_as_missing}</td></tr>\n            \"\"\"\n            + html_body_tail\n        )\n    except ValueError as err:\n        return (\n            html_body_head\n            + f\"\"\"\n                        <tr><td>Error</td><td>{str(err)}</td></tr>\n            \"\"\"\n            + html_body_tail\n        )\n\n\ndef convert_file_like_to_open_file(file_like, mode):\n    # Get ourselves a local version of the file. The semantics here are complex\n    # because need to support a range of inputs and the free behaviour is\n    # slightly different on each.\n    _file = None\n    local_file = True\n    try:\n        # First, see if we can interpret the argument as a pathlike object.\n        path = os.fspath(file_like)\n        _file = open(path, mode)\n    except TypeError:\n        pass\n    if _file is None:\n        # Now we try to open file. If it's not a pathlike object, it could be\n        # an integer fd or object with a fileno method. In this case we\n        # must make sure that close is **not** called on the fd.\n        try:\n            _file = open(file_like, mode, closefd=False, buffering=0)\n        except TypeError:\n            pass\n    if _file is None:\n        # Assume that this is a file **but** we haven't opened it, so we must\n        # not close it.\n        if mode == \"wb\" and not hasattr(file_like, \"write\"):\n            raise TypeError(\"file object must have a write method\")\n        _file = file_like\n        local_file = False\n    return _file, local_file\n\n\ndef set_print_options(*, max_lines=40):\n    \"\"\"\n    Set the options for printing to strings and HTML\n\n    :param integer max_lines: The maximum number of lines to print from a table, beyond\n    this number the middle of the table will be skipped.\n    \"\"\"\n    tskit._print_options = {\"max_lines\": max_lines}\n\n\ndef truncate_rows(num_rows, limit=None):\n    \"\"\"\n    Return a list of indexes into a set of rows, but if a ``limit`` is set, truncate the\n    number of rows and place a single ``-1`` entry, instead of the intermediate indexes\n    \"\"\"\n    if limit is None or num_rows <= limit:\n        return range(num_rows)\n    return itertools.chain(\n        range(limit // 2),\n        [-1],\n        range(num_rows - (limit - (limit // 2)), num_rows),\n    )\n\n\ndef random_nucleotides(length: numbers.Number, *, seed: int | None = None) -> str:\n    \"\"\"\n    Returns a random string of nucleotides of the specified length. Characters\n    are drawn uniformly from the alphabet \"ACTG\".\n\n    :param int length: The length of the random sequence.\n    :return: A string of the specified length consisting of random nucleotide\n       characters.\n    :rtype: str\n    \"\"\"\n    if int(length) != length:\n        raise ValueError(\"length must be an integer\")\n    rng = np.random.RandomState(seed)\n    encoded_nucleotides = np.array(list(map(ord, \"ACTG\")), dtype=np.int8)\n    a = rng.choice(encoded_nucleotides, size=int(length))\n    return a.tobytes().decode(\"ascii\")\n\n\ndef raise_known_file_format_errors(open_file, existing_exception):\n    \"\"\"\n    Sniffs the file for pk-zip or hdf header bytes, then raises an exception\n    if these are detected, if not raises the existing exception.\n    \"\"\"\n    # Check for HDF5 header bytes\n    try:\n        open_file.seek(0)\n        header = open_file.read(4)\n    except io.UnsupportedOperation:\n        # If we can't seek, we can't sniff the file.\n        raise existing_exception\n    if header == b\"\\x89HDF\":\n        raise tskit.FileFormatError(\n            \"The specified file appears to be in HDF5 format. This file \"\n            \"may have been generated by msprime < 0.6.0 (June 2018) which \"\n            \"can no longer be read directly. Please convert to the new \"\n            \"kastore format using the ``tskit upgrade`` command from tskit version<0.6.2\"\n        ) from existing_exception\n    if header[:2] == b\"\\x50\\x4b\":\n        raise tskit.FileFormatError(\n            \"The specified file appears to be in zip format, so may be a compressed \"\n            \"tree sequence. Try using the tszip module to decompress this file before \"\n            \"loading. `pip install tszip; tsunzip <filename>` or use \"\n            \"`tszip.decompress` in Python code.\"\n        ) from existing_exception\n    raise existing_exception\n"
  },
  {
    "path": "python/tskit/vcf.py",
    "content": "#\n# MIT License\n#\n# Copyright (c) 2019-2024 Tskit Developers\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\"\"\"\nConvert tree sequences to VCF.\n\"\"\"\n\nimport numpy as np\n\nfrom . import provenance\n\n\nclass VcfWriter:\n    \"\"\"\n    Writes a VCF representation of the genotypes tree sequence to a\n    file-like object.\n    \"\"\"\n\n    def __init__(\n        self,\n        tree_sequence,\n        *,\n        ploidy,\n        contig_id,\n        individuals,\n        individual_names,\n        position_transform,\n        site_mask,\n        sample_mask,\n        isolated_as_missing,\n        allow_position_zero,\n        include_non_sample_nodes,\n    ):\n        self.tree_sequence = tree_sequence\n\n        vcf_model = tree_sequence.map_to_vcf_model(\n            individuals=individuals,\n            ploidy=ploidy,\n            individual_names=individual_names,\n            include_non_sample_nodes=include_non_sample_nodes,\n            position_transform=position_transform,\n            contig_id=contig_id,\n            isolated_as_missing=isolated_as_missing,\n        )\n\n        # We now make some tweaks to the VCF model required for\n        # writing the VCF in text format\n\n        # Remove individuals with zero ploidy as these cannot be\n        # represented in VCF.\n        to_keep = (vcf_model.individuals_nodes != -1).any(axis=1)\n        vcf_model.individuals_nodes = vcf_model.individuals_nodes[to_keep]\n        vcf_model.individual_names = vcf_model.individuals_name[to_keep]\n        self.individual_ploidies = [\n            len(nodes[nodes >= 0]) for nodes in vcf_model.individuals_nodes\n        ]\n        self.num_individuals = len(vcf_model.individual_names)\n\n        if len(vcf_model.individuals_nodes) == 0:\n            raise ValueError(\"No samples in resulting VCF model\")\n\n        if len(vcf_model.transformed_positions) > 0:\n            # Arguably this should be last_pos + 1, but if we hit this\n            # condition the coordinate systems are all muddled up anyway\n            # so it's simpler to stay with this rule that was inherited\n            # from the legacy VCF output code.\n            vcf_model.contig_length = max(\n                vcf_model.transformed_positions[-1], vcf_model.contig_length\n            )\n\n        # Flatten the array of node IDs, filtering out the -1 padding values\n        self.samples = []\n        for row in vcf_model.individuals_nodes:\n            for node_id in row:\n                if node_id != -1:\n                    self.samples.append(node_id)\n\n        if site_mask is None:\n            site_mask = np.zeros(tree_sequence.num_sites, dtype=bool)\n        self.site_mask = np.array(site_mask, dtype=bool)\n        if self.site_mask.shape != (tree_sequence.num_sites,):\n            raise ValueError(\"Site mask must be 1D a boolean array of length num_sites\")\n\n        # The VCF spec does not allow for positions to be 0, so we error if one of the\n        # transformed positions is 0 and allow_position_zero is False.\n        if not allow_position_zero and np.any(\n            vcf_model.transformed_positions[~site_mask] == 0\n        ):\n            raise ValueError(\n                \"A variant position of 0 was found in the VCF output, this is not \"\n                \"fully compliant with the VCF spec. If you still wish to write the VCF \"\n                'please use the \"allow_position_zero\" argument to write_vcf. '\n                \"Alternatively, you can increment all the positions by one using \"\n                '\"position_transform = lambda x: 1 + x\" or coerce the zero to one with '\n                '\"position_transform = lambda x: np.fmax(1, x)\"'\n            )\n\n        self.sample_mask = sample_mask\n        if sample_mask is not None:\n            if not callable(sample_mask):\n                sample_mask = np.array(sample_mask, dtype=bool)\n                self.sample_mask = lambda _: sample_mask\n\n        self.vcf_model = vcf_model\n\n    def __write_header(self, output):\n        print(\"##fileformat=VCFv4.2\", file=output)\n        print(f\"##source=tskit {provenance.__version__}\", file=output)\n        print('##FILTER=<ID=PASS,Description=\"All filters passed\">', file=output)\n        print(\n            f\"##contig=<ID={self.vcf_model.contig_id},length={self.vcf_model.contig_length}>\",\n            file=output,\n        )\n        print(\n            '##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">', file=output\n        )\n        vcf_samples = \"\\t\".join(self.vcf_model.individual_names)\n        print(\n            \"#CHROM\",\n            \"POS\",\n            \"ID\",\n            \"REF\",\n            \"ALT\",\n            \"QUAL\",\n            \"FILTER\",\n            \"INFO\",\n            \"FORMAT\",\n            vcf_samples,\n            sep=\"\\t\",\n            file=output,\n        )\n\n    def write(self, output):\n        self.__write_header(output)\n\n        # Build the array for hold the text genotype VCF data and the indexes into\n        # this array for when we're updating it.\n        gt_array = []\n        indexes = []\n        for ploidy in self.individual_ploidies:\n            for _ in range(ploidy):\n                indexes.append(len(gt_array))\n                # First element here is a placeholder that we'll write the actual\n                # genotypes into when for each variant.\n                gt_array.extend([0, ord(\"|\")])\n            gt_array[-1] = ord(\"\\t\")\n        gt_array[-1] = ord(\"\\n\")\n        gt_array = np.array(gt_array, dtype=np.int8)\n        # TODO Unclear here whether using int64 or int32 will be faster for this index\n        # array. Test it out.\n        indexes = np.array(indexes, dtype=int)\n\n        for variant in self.tree_sequence.variants(\n            samples=self.samples, isolated_as_missing=self.vcf_model.isolated_as_missing\n        ):\n            site_id = variant.site.id\n            # We check the mask before we do any checks so we can use this as a\n            # way of skipping problematic sites.\n            if self.site_mask[site_id]:\n                continue\n\n            if variant.num_alleles > 9:\n                raise ValueError(\n                    \"More than 9 alleles not currently supported. Please open an issue \"\n                    \"on GitHub if this limitation affects you.\"\n                )\n            pos = self.vcf_model.transformed_positions[variant.index]\n            ref = variant.alleles[0]\n            alt = \".\"\n            if variant.num_alleles > 1:\n                alt = \",\".join(variant.alleles[1 : variant.num_alleles])\n            print(\n                self.vcf_model.contig_id,\n                pos,\n                site_id,\n                ref,\n                alt,\n                \".\",\n                \"PASS\",\n                \".\",\n                \"GT\",\n                sep=\"\\t\",\n                end=\"\\t\",\n                file=output,\n            )\n            genotypes = variant.genotypes\n            gt_array[indexes] = genotypes + ord(\"0\")\n            if self.sample_mask is not None:\n                genotypes = genotypes.copy()\n                sample_mask = np.array(self.sample_mask(variant), dtype=bool)\n                if sample_mask.shape != genotypes.shape:\n                    raise ValueError(\n                        \"Sample mask must be a numpy array of size num_samples\"\n                    )\n                genotypes[sample_mask] = -1\n            if self.sample_mask is not None or variant.has_missing_data:\n                missing = genotypes == -1\n                gt_array[indexes[missing]] = ord(\".\")\n            g_bytes = memoryview(gt_array).tobytes()\n            g_str = g_bytes.decode()\n            print(g_str, end=\"\", file=output)\n"
  }
]